diff options
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 10 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 91 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 21 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 2 |
4 files changed, 61 insertions, 63 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 054d9eb72..f9a738e94 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -153,15 +153,15 @@ htmlScript = try (do rest <- manyTill anyChar (htmlEndTag "script") return (open ++ rest ++ "</script>")) -rawHtmlBlock = do - notFollowedBy (do {choice [htmlTag "/body", htmlTag "/html"]; return ' '}) +rawHtmlBlock = try (do + notFollowedBy' (choice [htmlTag "/body", htmlTag "/html"]) body <- choice [htmlScript, anyHtmlBlockTag, htmlComment, xmlDec, definition] sp <- (many space) state <- getState if stateParseRaw state then return (RawHtml (body ++ sp)) else - return Null + return Null) htmlComment = try (do string "<!--" @@ -284,12 +284,12 @@ preCodeBlock = try (do result <- manyTill anyChar (htmlEndTag "code") spaces htmlEndTag "pre" - return (CodeBlock (decodeEntities result))) + return (CodeBlock (stripTrailingNewlines (decodeEntities result)))) bareCodeBlock = try (do htmlTag "code" result <- manyTill anyChar (htmlEndTag "code") - return (CodeBlock (decodeEntities result))) + return (CodeBlock (stripTrailingNewlines (decodeEntities result)))) -- -- block quotes diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 3bf3dfd23..bd91c5014 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -39,37 +39,33 @@ normalizeDashes = gsub "([0-9])--([0-9])" "\\1-\\2" normalizePunctuation :: String -> String normalizePunctuation = normalizeDashes . normalizeQuotes --- | Returns command option (between []) if any, or empty string. -commandOpt = option "" (between (char '[') (char ']') (many1 (noneOf "]"))) - -- | Returns text between brackets and its matching pair. -bracketedText = try (do - char '{' - result <- many (choice [ try (do{ char '\\'; - b <- oneOf "{}"; - return (['\\', b])}), -- escaped bracket - count 1 (noneOf "{}"), - do {text <- bracketedText; return ("{" ++ text ++ "}")} ]) - char '}' - return (concat result)) +bracketedText openB closeB = try (do + char openB + result <- many (choice [ oneOfStrings [ ['\\', openB], ['\\', closeB] ], + count 1 (noneOf [openB, closeB]), + bracketedText openB closeB ]) + char closeB + return ([openB] ++ (concat result) ++ [closeB])) --- | Parses list of arguments of LaTeX command. -commandArgs = many bracketedText +-- | Returns an option or argument of a LaTeX command +optOrArg = choice [ (bracketedText '{' '}'), (bracketedText '[' ']') ] --- | Parses LaTeX command, returns (name, star, option, list of arguments). +-- | Returns list of options and arguments of a LaTeX command +commandArgs = many optOrArg + +-- | Parses LaTeX command, returns (name, star, list of options/arguments). command = try (do char '\\' name <- many1 alphaNum star <- option "" (string "*") -- some commands have starred versions - opt <- commandOpt args <- commandArgs - return (name, star, opt, args)) + return (name, star, args)) begin name = try (do string "\\begin{" string name char '}' - option "" commandOpt option [] commandArgs spaces return name) @@ -93,7 +89,6 @@ anyEnvironment = try (do name <- many alphaNum star <- option "" (string "*") -- some environments have starred variants char '}' - option "" commandOpt option [] commandArgs spaces contents <- manyTill block (end (name ++ star)) @@ -103,15 +98,14 @@ anyEnvironment = try (do -- parsing documents -- --- | Skip everything up through \begin{document} -skipLaTeXHeader = try (do - manyTill anyChar (begin "document") +-- | Process LaTeX preamble, extracting metadata +processLaTeXPreamble = do + manyTill (choice [bibliographic, comment, unknownCommand]) (try (string "\\begin{document}")) spaces - return "") -- | Parse LaTeX and return 'Pandoc'. parseLaTeX = do - option "" skipLaTeXHeader -- if parsing a fragment, this might not be present + option () processLaTeXPreamble -- preamble might not be present, if a fragment blocks <- parseBlocks spaces option "" (string "\\end{document}") -- if parsing a fragment, this might not be present @@ -121,7 +115,10 @@ parseLaTeX = do let keyBlocks = stateKeyBlocks state let noteBlocks = stateNoteBlocks state let blocks' = filter (/= Null) blocks - return (Pandoc (Meta [] [] "") (blocks' ++ (reverse noteBlocks) ++ (reverse keyBlocks))) + let title' = stateTitle state + let authors' = stateAuthors state + let date' = stateDate state + return (Pandoc (Meta title' authors' date') (blocks' ++ (reverse noteBlocks) ++ (reverse keyBlocks))) -- -- parsing blocks @@ -209,7 +206,7 @@ mathBlockWith start end = try (do list = bulletList <|> orderedList <?> "list" listItem = try (do - ("item", _, _, _) <- command + ("item", _, _) <- command spaces state <- getState let oldParserContext = stateParserContext state @@ -265,7 +262,7 @@ authors = try (do string "\\author{" authors <- manyTill anyChar (char '}') spaces - let authors' = map removeLeadingTrailingSpace $ lines $ gsub "\\\\" "\n" authors + let authors' = map removeLeadingTrailingSpace $ lines $ gsub "\\\\\\\\" "\n" authors updateState (\state -> state { stateAuthors = authors' }) return Null) @@ -283,15 +280,15 @@ date = try (do -- this forces items to be parsed in different blocks itemBlock = try (do - ("item", _, opt, _) <- command + ("item", _, args) <- command state <- getState if (stateParserContext state == ListItemState) then fail "item should be handled by list block" else - if null opt then + if null args then return Null else - return (Plain [Str opt])) + return (Plain [Str (stripFirstAndLast (head args))])) -- -- raw LaTeX @@ -312,15 +309,13 @@ rawLaTeXEnvironment = try (do star <- option "" (string "*") -- for starred variants let name' = name ++ star char '}' - opt <- option "" commandOpt args <- option [] commandArgs - let optStr = if (null opt) then "" else "[" ++ opt ++ "]" - let argStr = concatMap (\arg -> ("{" ++ arg ++ "}")) args + let argStr = concat args contents <- manyTill (choice [(many1 (noneOf "\\")), (do{ (Para [TeX str]) <- rawLaTeXEnvironment; return str }), string "\\"]) (end name') spaces - return (Para [TeX ("\\begin{" ++ name' ++ "}" ++ optStr ++ argStr ++ + return (Para [TeX ("\\begin{" ++ name' ++ "}" ++ argStr ++ (concat contents) ++ "\\end{" ++ name' ++ "}")])) unknownEnvironment = try (do @@ -335,17 +330,16 @@ unknownCommand = try (do notFollowedBy' (string "\\end{itemize}") notFollowedBy' (string "\\end{enumerate}") notFollowedBy' (string "\\end{document}") - (name, star, opt, args) <- command + (name, star, args) <- command spaces - let optStr = if null opt then "" else "[" ++ opt ++ "]" - let argStr = concatMap (\arg -> ("{" ++ arg ++ "}")) args + let argStr = concat args state <- getState if (name == "item") && ((stateParserContext state) == ListItemState) then fail "should not be parsed as raw" else string "" if stateParseRaw state then - return (Plain [TeX ("\\" ++ name ++ star ++ optStr ++ argStr)]) + return (Plain [TeX ("\\" ++ name ++ star ++ argStr)]) else return (Plain [Str (joinWithSep " " args)])) @@ -554,13 +548,19 @@ link = try (do return (Link (normalizeSpaces label) ref)) image = try (do - ("includegraphics", _, _, (src:lst)) <- command - return (Image [Str "image"] (Src src ""))) + ("includegraphics", _, args) <- command + let args' = filter (\arg -> (take 1 arg) /= "[") args + let src = if null args' then + Src "" "" + else + Src (stripFirstAndLast (head args')) "" + return (Image [Str "image"] src)) footnote = try (do - ("footnote", _, _, (contents:[])) <- command + ("footnote", _, (contents:[])) <- command + let contents' = stripFirstAndLast contents let blocks = case runParser parseBlocks defaultParserState "footnote" contents of - Left err -> error $ "Input:\n" ++ show contents ++ + Left err -> error $ "Input:\n" ++ show contents' ++ "\nError:\n" ++ show err Right result -> result state <- getState @@ -574,12 +574,11 @@ footnote = try (do -- | Parse any LaTeX command and return it in a raw TeX inline element. rawLaTeXInline :: GenParser Char ParserState Inline rawLaTeXInline = try (do - (name, star, opt, args) <- command - let optStr = if (null opt) then "" else "[" ++ opt ++ "]" - let argStr = concatMap (\arg -> "{" ++ arg ++ "}") args + (name, star, args) <- command + let argStr = concat args state <- getState if ((name == "begin") || (name == "end") || (name == "item")) then fail "not an inline command" else string "" - return (TeX ("\\" ++ name ++ star ++ optStr ++ argStr))) + return (TeX ("\\" ++ name ++ star ++ argStr))) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 60ac40fd7..df2f43e87 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -136,8 +136,7 @@ header = choice [ setextHeader, atxHeader ] <?> "header" atxHeader = try (do lead <- many1 (char atxHChar) skipSpaces - txt <- many1 (do {notFollowedBy' atxClosing; inline}) - atxClosing + txt <- manyTill inline atxClosing return (Header (length lead) (normalizeSpaces txt))) atxClosing = try (do @@ -195,7 +194,7 @@ indentedBlock = try (do codeBlock = do result <- choice [indentedBlock, indentedLine] option "" blanklines - return (CodeBlock result) + return (CodeBlock (stripTrailingNewlines result)) -- -- note block @@ -286,9 +285,9 @@ orderedListStart = listLine start = try (do notFollowedBy' start notFollowedBy blankline - notFollowedBy' (try (do{ indentSpaces; - many (spaceChar); - choice [bulletListStart, orderedListStart]})) + notFollowedBy' (do{ indentSpaces; + many (spaceChar); + choice [bulletListStart, orderedListStart]}) line <- manyTill anyChar newline return (line ++ "\n")) @@ -311,7 +310,7 @@ listContinuation start = return ((concat result) ++ blanks)) listContinuationLine start = try (do - notFollowedBy blankline + notFollowedBy' blankline notFollowedBy' start option "" indentSpaces result <- manyTill anyChar newline @@ -404,10 +403,10 @@ special = choice [ link, referenceLink, rawHtmlInline, autoLink, escapedChar = escaped anyChar -ltSign = do +ltSign = try (do notFollowedBy' rawHtmlBlocks -- don't return < if it starts html char '<' - return (Str ['<']) + return (Str ['<'])) specialCharsMinusLt = filter (/= '<') specialChars @@ -487,7 +486,7 @@ endline = -- next line would allow block quotes without preceding blank line -- Markdown.pl does allow this, but there's a chance of a wrapped -- greater-than sign triggering a block quote by accident... --- notFollowedBy (try (do { choice [emailBlockQuoteStart, string ",----"]; return ' ' })) +-- notFollowedBy' (choice [emailBlockQuoteStart, string ",----"]) notFollowedBy blankline -- parse potential list starts at beginning of line differently if in a list: st <- getState @@ -560,7 +559,7 @@ referenceLinkSingle = -- a link like [this] autoLink = -- a link <like.this.com> try (do - notFollowedBy (do {anyHtmlBlockTag; return ' '}) + notFollowedBy' anyHtmlBlockTag src <- between (char autoLinkStart) (char autoLinkEnd) (many (noneOf (spaceChars ++ endLineChars ++ [autoLinkEnd]))) case (matchRegex emailAddress src) of diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 82e5ea303..69c7d9baa 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -292,7 +292,7 @@ indentedBlock variable = try (do codeBlock = try (do codeBlockStart result <- indentedBlock False -- the False means we want one tab stop indent on each line - return (CodeBlock result)) + return (CodeBlock (stripTrailingNewlines result))) -- -- raw html |