Removed old haddock reader code. Add dependency on haddock-library.

This also removes the dependency on alex and happy.
author: John MacFarlane <jgm@berkeley.edu> 2014-06-18 11:33:09 -0700
committer: John MacFarlane <jgm@berkeley.edu> 2014-06-18 11:33:09 -0700
commit: ab390a10ec3bc42c71d8746152acbf3ee7b1595b (patch)
tree: f8700679986bf08874192f3c44c9a7fa3d5e4fdb /src/Text/Pandoc
parent: b371e83d7362c6ffc6e4dd98b7288ddd7f23f46a (diff)
download: pandoc-ab390a10ec3bc42c71d8746152acbf3ee7b1595b.tar.gz
3 files changed, 21 insertions, 360 deletions
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index 0e74406ef..65d8de98f 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -3,7 +3,8 @@
    Copyright   : Copyright (C) 2013 David Lazar
    License     : GNU GPL, version 2 or above
 
-   Maintainer  : David Lazar <lazar6@illinois.edu>
+   Maintainer  : David Lazar <lazar6@illinois.edu>,
+                 John MacFarlane <jgm@berkeley.edu>
    Stability   : alpha
 
 Conversion of Haddock markup to 'Pandoc' document.
@@ -12,22 +13,31 @@ module Text.Pandoc.Readers.Haddock
     ( readHaddock
     ) where
 
-import Text.Pandoc.Builder
+import Text.Pandoc.Builder (Blocks, Inlines)
+import qualified Text.Pandoc.Builder as B
+import Data.Monoid
+import Text.Pandoc.Definition
 import Text.Pandoc.Options
-import Text.Pandoc.Readers.Haddock.Lex
-import Text.Pandoc.Readers.Haddock.Parse
+import Documentation.Haddock.Parser (parseParas, Identifier)
+import Documentation.Haddock.Types
 
 -- | Parse Haddock markup and return a 'Pandoc' document.
 readHaddock :: ReaderOptions -- ^ Reader options
             -> String        -- ^ String to parse
             -> Pandoc
-readHaddock _ s = Pandoc nullMeta blocks
-  where
-    blocks = case parseParas (tokenise s (0,0)) of
-        Left [] -> error "parse failure"
-        Left (tok:_) -> error $ "parse failure " ++ pos (tokenPos tok)
-          where pos (l, c) = "(line " ++ show l ++ ", column " ++ show c ++ ")"
-        Right x -> mergeLists (toList x)
+readHaddock _ = B.doc . docHToBlocks . parseParas
+
+docHToBlocks :: DocH mod Identifier -> Blocks
+docHToBlocks d =
+  case d of
+    DocAppend d1 d2 -> mappend (docHToBlocks d1) (docHToBlocks d2)
+    DocParagraph ils -> B.para $ docHToInlines ils
+
+docHToInlines :: DocH mod Identifier -> Inlines
+docHToInlines d =
+  case d of
+    DocAppend d1 d2 -> mappend (docHToInlines d1) (docHToInlines d2)
+    DocString s -> B.text s
 
 -- similar to 'docAppend' in Haddock.Doc
 mergeLists :: [Block] -> [Block]
diff --git a/src/Text/Pandoc/Readers/Haddock/Lex.x b/src/Text/Pandoc/Readers/Haddock/Lex.x
deleted file mode 100644
index 120e96ebf..000000000
--- a/src/Text/Pandoc/Readers/Haddock/Lex.x
+++ /dev/null
@@ -1,171 +0,0 @@
---
--- Haddock - A Haskell Documentation Tool
---
--- (c) Simon Marlow 2002
---
--- This file was modified and integrated into GHC by David Waern 2006.
--- Then moved back into Haddock by Isaac Dupree in 2009 :-)
--- Then copied into Pandoc by David Lazar in 2013 :-D
-
-{
-{-# LANGUAGE BangPatterns #-}   -- Generated by Alex
-{-# OPTIONS -Wwarn -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
---     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
-module Text.Pandoc.Readers.Haddock.Lex (
-    Token(..),
-    LToken,
-    tokenise,
-    tokenPos
- ) where
-
-import Data.Char
-import Numeric (readHex)
-}
-
-%wrapper "posn"
-
-$ws       = $white # \n
-$digit    = [0-9]
-$hexdigit = [0-9a-fA-F]
-$special  = [\"\@]
-$alphanum = [A-Za-z0-9]
-$ident    = [$alphanum \'\_\.\!\#\$\%\&\*\+\/\<\=\>\?\@\\\\\^\|\-\~\:]
-
-:-
-
--- beginning of a paragraph
-<0,para> {
-  $ws* \n               ;
-  $ws* \>               { begin birdtrack }
-  $ws* prop \> .* \n    { strtoken TokProperty `andBegin` property}
-  $ws* \>\>\>           { strtoken TokExamplePrompt `andBegin` exampleexpr }
-  $ws* [\*\-]           { token TokBullet `andBegin` string }
-  $ws* \[               { token TokDefStart `andBegin` def }
-  $ws* \( $digit+ \)    { token TokNumber `andBegin` string }
-  $ws* $digit+ \.       { token TokNumber `andBegin` string }
-  $ws*                  { begin string }
-}
-
--- beginning of a line
-<line> {
-  $ws* \>               { begin birdtrack }
-  $ws* \>\>\>           { strtoken TokExamplePrompt `andBegin` exampleexpr }
-
-  $ws* \n               { token TokPara `andBegin` para }
-  -- ^ Here, we really want to be able to say
-  -- $ws* (\n | <eof>)  { token TokPara `andBegin` para}
-  -- because otherwise a trailing line of whitespace will result in
-  -- a spurious TokString at the end of a docstring.  We don't have <eof>,
-  -- though (NOW I realise what it was for :-).  To get around this, we always
-  -- append \n to the end of a docstring.
-
-  ()                    { begin string }
-}
-
-<birdtrack> .*  \n?     { strtokenNL TokBirdTrack `andBegin` line }
-
-<property> ()           { token TokPara `andBegin` para }
-
-<example> {
-  $ws*  \n              { token TokPara `andBegin` para }
-  $ws* \>\>\>           { strtoken TokExamplePrompt `andBegin` exampleexpr }
-  ()                    { begin exampleresult }
-}
-
-<exampleexpr> .* \n     { strtokenNL TokExampleExpression `andBegin` example }
-
-<exampleresult> .* \n   { strtokenNL TokExampleResult `andBegin` example }
-
-<string,def> {
-  $special              { strtoken $ \s -> TokSpecial (head s) }
-  \<\< [^\>]* \>\>      { strtoken $ \s -> TokPic (init $ init $ tail $ tail s) }
-  \< [^\>]* \>          { strtoken $ \s -> TokURL (init (tail s)) }
-  \# [^\#]* \#          { strtoken $ \s -> TokAName (init (tail s)) }
-  \/ [^\/]* \/          { strtoken $ \s -> TokEmphasis (init (tail s)) }
-  [\'\`] $ident+ [\'\`] { strtoken $ \s -> TokIdent (init (tail s)) }
-  \\ .                  { strtoken (TokString . tail) }
-  "&#" $digit+ \;       { strtoken $ \s -> TokString [chr (read (init (drop 2 s)))] }
-  "&#" [xX] $hexdigit+ \;
-        { strtoken $ \s -> case readHex (init (drop 3 s)) of [(n,_)] -> TokString [chr n] }
-  -- allow special characters through if they don't fit one of the previous
-  -- patterns.
-  [\/\'\`\<\#\&\\]      { strtoken TokString }
-  [^ $special \/ \< \# \n \'\` \& \\ \]]* \n { strtokenNL TokString `andBegin` line }
-  [^ $special \/ \< \# \n \'\` \& \\ \]]+    { strtoken TokString }
-}
-
-<def> {
-  \]                    { token TokDefEnd `andBegin` string }
-}
-
--- ']' doesn't have any special meaning outside of the [...] at the beginning
--- of a definition paragraph.
-<string> {
-  \]                    { strtoken TokString }
-}
-
-{
--- | A located token
-type LToken = (Token, AlexPosn)
-
-data Token
-  = TokPara
-  | TokNumber
-  | TokBullet
-  | TokDefStart
-  | TokDefEnd
-  | TokSpecial Char
-  | TokIdent String
-  | TokString String
-  | TokURL String
-  | TokPic String
-  | TokEmphasis String
-  | TokAName String
-  | TokBirdTrack String
-  | TokProperty String
-  | TokExamplePrompt String
-  | TokExampleExpression String
-  | TokExampleResult String
-  deriving Show
-
-tokenPos :: LToken -> (Int, Int)
-tokenPos t = let AlexPn _ line col = snd t in (line, col)
-
-type StartCode = Int
-type Action = AlexPosn -> String -> StartCode -> (StartCode -> [LToken]) -> [LToken]
-
-tokenise :: String -> (Int, Int) -> [LToken]
-tokenise str (line, col) = go (posn,'\n',[],eofHack str) para
-  where posn = AlexPn 0 line col
-        go inp@(pos,_,_,str) sc =
-          case alexScan inp sc of
-                AlexEOF -> []
-                AlexError _ -> []
-                AlexSkip  inp' len     -> go inp' sc
-                AlexToken inp' len act -> act pos (take len str) sc (\sc -> go inp' sc)
-
--- NB. we add a final \n to the string, (see comment in the beginning of line
--- production above).
-eofHack str = str++"\n"
-
-andBegin :: Action -> StartCode -> Action
-andBegin act new_sc = \pos str _ cont -> act pos str new_sc cont
-
-token :: Token -> Action
-token t = \pos _ sc cont -> (t, pos) : cont sc
-
-strtoken, strtokenNL :: (String -> Token) -> Action
-strtoken t = \pos str sc cont -> (t str, pos) : cont sc
-strtokenNL t = \pos str sc cont -> (t (filter (/= '\r') str), pos) : cont sc
--- ^ We only want LF line endings in our internal doc string format, so we
--- filter out all CRs.
-
-begin :: StartCode -> Action
-begin sc = \_ _ _ cont -> cont sc
-
-}
diff --git a/src/Text/Pandoc/Readers/Haddock/Parse.y b/src/Text/Pandoc/Readers/Haddock/Parse.y
deleted file mode 100644
index 9c2bbc8a9..000000000
--- a/src/Text/Pandoc/Readers/Haddock/Parse.y
+++ /dev/null
@@ -1,178 +0,0 @@
--- This code was copied from the 'haddock' package, modified, and integrated
--- into Pandoc by David Lazar.
-{
-{-# LANGUAGE BangPatterns #-} -- required for versions of Happy before 1.18.6
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
---     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
-module Text.Pandoc.Readers.Haddock.Parse (parseString, parseParas) where
-
-import Text.Pandoc.Readers.Haddock.Lex
-import Text.Pandoc.Builder
-import Text.Pandoc.Shared (trim, trimr)
-import Data.Generics (everywhere, mkT)
-import Data.Char  (isSpace)
-import Data.Maybe (fromMaybe)
-import Data.List  (stripPrefix, intersperse)
-import Data.Monoid (mempty, mconcat)
-}
-
-%expect 0
-
-%tokentype  { LToken }
-
-%token
-    '/'     { (TokSpecial '/',_) }
-    '@'     { (TokSpecial '@',_) }
-    '['     { (TokDefStart,_) }
-    ']'     { (TokDefEnd,_) }
-    DQUO    { (TokSpecial '\"',_) }
-    URL     { (TokURL $$,_) }
-    PIC     { (TokPic $$,_) }
-    ANAME   { (TokAName $$,_) }
-    '/../'  { (TokEmphasis $$,_) }
-    '-'     { (TokBullet,_) }
-    '(n)'   { (TokNumber,_) }
-    '>..'   { (TokBirdTrack $$,_) }
-    PROP    { (TokProperty $$,_) }
-    PROMPT  { (TokExamplePrompt $$,_) }
-    RESULT  { (TokExampleResult $$,_) }
-    EXP     { (TokExampleExpression $$,_) }
-    IDENT   { (TokIdent $$,_) }
-    PARA    { (TokPara,_) }
-    STRING  { (TokString $$,_) }
-
-%monad { Either [LToken] }
-
-%name parseParas doc
-%name parseString seq
-
-%%
-
-doc :: { Blocks }
-    : apara PARA doc    { $1 <> $3 }
-    | PARA doc          { $2 }
-    | apara             { $1 }
-    | {- empty -}       { mempty }
-
-apara :: { Blocks }
-    : ulpara            { bulletList [$1] }
-    | olpara            { orderedList [$1] }
-    | defpara           { definitionList [$1] }
-    | para              { $1 }
-
-ulpara :: { Blocks }
-    : '-' para          { $2 }
-
-olpara  :: { Blocks }
-    : '(n)' para        { $2 }
-
-defpara :: { (Inlines, [Blocks]) }
-    : '[' seq ']' seq   { (trimInlines $2, [plain $ trimInlines $4]) }
-
-para :: { Blocks }
-    : seq               { para' $1 }
-    | codepara          { codeBlockWith ([], ["haskell"], []) $1 }
-    | property          { $1 }
-    | examples          { $1 }
-
-codepara :: { String }
-    : '>..' codepara    { $1 ++ $2 }
-    | '>..'             { $1 }
-
-property :: { Blocks }
-    : PROP              { makeProperty $1 }
-
-examples :: { Blocks }
-    : example examples  { $1 <> $2 }
-    | example           { $1 }
-
-example :: { Blocks }
-    : PROMPT EXP result { makeExample $1 $2 (lines $3) }
-    | PROMPT EXP        { makeExample $1 $2 [] }
-
-result :: { String }
-    : RESULT result     { $1 ++ $2 }
-    | RESULT            { $1 }
-
-seq :: { Inlines }
-    : elem seq          { $1 <> $2 }
-    | elem              { $1 }
-
-elem :: { Inlines }
-    : elem1             { $1 }
-    | '@' seq1 '@'      { monospace $2 }
-
-seq1 :: { Inlines }
-    : PARA seq1         { linebreak <> $2 }
-    | elem1 seq1        { $1 <> $2 }
-    | elem1             { $1 }
-
-elem1 :: { Inlines }
-    : STRING            { text $1 }
-    | '/../'            { emph (str $1) }
-    | URL               { makeHyperlink $1 }
-    | PIC               { image $1 $1 mempty }
-    | ANAME             { mempty } -- TODO
-    | IDENT             { codeWith ([], ["haskell"], []) $1 }
-    | DQUO strings DQUO { codeWith ([], ["haskell"], []) $2 }
-
-strings :: { String }
-    : STRING            { $1 }
-    | STRING strings    { $1 ++ $2 }
-
-{
-happyError :: [LToken] -> Either [LToken] a
-happyError toks = Left toks
-
-para' :: Inlines -> Blocks
-para' = para . trimInlines
-
-monospace :: Inlines -> Inlines
-monospace = everywhere (mkT go)
-  where
-    go (Str s) = Code nullAttr s
-    go x = x
-
--- | Create a `Hyperlink` from given string.
---
--- A hyperlink consists of a URL and an optional label.  The label is separated
--- from the url by one or more whitespace characters.
-makeHyperlink :: String -> Inlines
-makeHyperlink input = case break isSpace $ trim input of
-  (url, "") -> link url url (str url)
-  (url, lb) -> link url url (trimInlines $ text lb)
-
-makeProperty :: String -> Blocks
-makeProperty s = case trim s of
-  'p':'r':'o':'p':'>':xs ->
-    codeBlockWith ([], ["property"], []) (dropWhile isSpace xs)
-  xs ->
-    error $ "makeProperty: invalid input " ++ show xs
-
--- | Create an 'Example', stripping superfluous characters as appropriate
-makeExample :: String -> String -> [String] -> Blocks
-makeExample prompt expression result =
-    para $ codeWith ([], ["haskell","expr"], []) (trim expression)
-        <> linebreak
-        <> (mconcat $ intersperse linebreak $ map coder result')
-  where
-    -- 1. drop trailing whitespace from the prompt, remember the prefix
-    prefix = takeWhile isSpace prompt
-
-    -- 2. drop, if possible, the exact same sequence of whitespace
-    -- characters from each result line
-    --
-    -- 3. interpret lines that only contain the string "<BLANKLINE>" as an
-    -- empty line
-    result' = map (substituteBlankLine . tryStripPrefix prefix) result
-      where
-        tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys
-
-        substituteBlankLine "<BLANKLINE>" = ""
-        substituteBlankLine line          = line
-    coder = codeWith ([], ["result"], [])
-}
author	John MacFarlane <jgm@berkeley.edu>	2014-06-18 11:33:09 -0700
committer	John MacFarlane <jgm@berkeley.edu>	2014-06-18 11:33:09 -0700
commit	ab390a10ec3bc42c71d8746152acbf3ee7b1595b (patch)
tree	f8700679986bf08874192f3c44c9a7fa3d5e4fdb /src/Text/Pandoc
parent	b371e83d7362c6ffc6e4dd98b7288ddd7f23f46a (diff)
download	pandoc-ab390a10ec3bc42c71d8746152acbf3ee7b1595b.tar.gz