aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/LaTeX.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/LaTeX.hs')
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs91
1 files changed, 45 insertions, 46 deletions
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 3bf3dfd23..bd91c5014 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -39,37 +39,33 @@ normalizeDashes = gsub "([0-9])--([0-9])" "\\1-\\2"
normalizePunctuation :: String -> String
normalizePunctuation = normalizeDashes . normalizeQuotes
--- | Returns command option (between []) if any, or empty string.
-commandOpt = option "" (between (char '[') (char ']') (many1 (noneOf "]")))
-
-- | Returns text between brackets and its matching pair.
-bracketedText = try (do
- char '{'
- result <- many (choice [ try (do{ char '\\';
- b <- oneOf "{}";
- return (['\\', b])}), -- escaped bracket
- count 1 (noneOf "{}"),
- do {text <- bracketedText; return ("{" ++ text ++ "}")} ])
- char '}'
- return (concat result))
+bracketedText openB closeB = try (do
+ char openB
+ result <- many (choice [ oneOfStrings [ ['\\', openB], ['\\', closeB] ],
+ count 1 (noneOf [openB, closeB]),
+ bracketedText openB closeB ])
+ char closeB
+ return ([openB] ++ (concat result) ++ [closeB]))
--- | Parses list of arguments of LaTeX command.
-commandArgs = many bracketedText
+-- | Returns an option or argument of a LaTeX command
+optOrArg = choice [ (bracketedText '{' '}'), (bracketedText '[' ']') ]
--- | Parses LaTeX command, returns (name, star, option, list of arguments).
+-- | Returns list of options and arguments of a LaTeX command
+commandArgs = many optOrArg
+
+-- | Parses LaTeX command, returns (name, star, list of options/arguments).
command = try (do
char '\\'
name <- many1 alphaNum
star <- option "" (string "*") -- some commands have starred versions
- opt <- commandOpt
args <- commandArgs
- return (name, star, opt, args))
+ return (name, star, args))
begin name = try (do
string "\\begin{"
string name
char '}'
- option "" commandOpt
option [] commandArgs
spaces
return name)
@@ -93,7 +89,6 @@ anyEnvironment = try (do
name <- many alphaNum
star <- option "" (string "*") -- some environments have starred variants
char '}'
- option "" commandOpt
option [] commandArgs
spaces
contents <- manyTill block (end (name ++ star))
@@ -103,15 +98,14 @@ anyEnvironment = try (do
-- parsing documents
--
--- | Skip everything up through \begin{document}
-skipLaTeXHeader = try (do
- manyTill anyChar (begin "document")
+-- | Process LaTeX preamble, extracting metadata
+processLaTeXPreamble = do
+ manyTill (choice [bibliographic, comment, unknownCommand]) (try (string "\\begin{document}"))
spaces
- return "")
-- | Parse LaTeX and return 'Pandoc'.
parseLaTeX = do
- option "" skipLaTeXHeader -- if parsing a fragment, this might not be present
+ option () processLaTeXPreamble -- preamble might not be present, if a fragment
blocks <- parseBlocks
spaces
option "" (string "\\end{document}") -- if parsing a fragment, this might not be present
@@ -121,7 +115,10 @@ parseLaTeX = do
let keyBlocks = stateKeyBlocks state
let noteBlocks = stateNoteBlocks state
let blocks' = filter (/= Null) blocks
- return (Pandoc (Meta [] [] "") (blocks' ++ (reverse noteBlocks) ++ (reverse keyBlocks)))
+ let title' = stateTitle state
+ let authors' = stateAuthors state
+ let date' = stateDate state
+ return (Pandoc (Meta title' authors' date') (blocks' ++ (reverse noteBlocks) ++ (reverse keyBlocks)))
--
-- parsing blocks
@@ -209,7 +206,7 @@ mathBlockWith start end = try (do
list = bulletList <|> orderedList <?> "list"
listItem = try (do
- ("item", _, _, _) <- command
+ ("item", _, _) <- command
spaces
state <- getState
let oldParserContext = stateParserContext state
@@ -265,7 +262,7 @@ authors = try (do
string "\\author{"
authors <- manyTill anyChar (char '}')
spaces
- let authors' = map removeLeadingTrailingSpace $ lines $ gsub "\\\\" "\n" authors
+ let authors' = map removeLeadingTrailingSpace $ lines $ gsub "\\\\\\\\" "\n" authors
updateState (\state -> state { stateAuthors = authors' })
return Null)
@@ -283,15 +280,15 @@ date = try (do
-- this forces items to be parsed in different blocks
itemBlock = try (do
- ("item", _, opt, _) <- command
+ ("item", _, args) <- command
state <- getState
if (stateParserContext state == ListItemState) then
fail "item should be handled by list block"
else
- if null opt then
+ if null args then
return Null
else
- return (Plain [Str opt]))
+ return (Plain [Str (stripFirstAndLast (head args))]))
--
-- raw LaTeX
@@ -312,15 +309,13 @@ rawLaTeXEnvironment = try (do
star <- option "" (string "*") -- for starred variants
let name' = name ++ star
char '}'
- opt <- option "" commandOpt
args <- option [] commandArgs
- let optStr = if (null opt) then "" else "[" ++ opt ++ "]"
- let argStr = concatMap (\arg -> ("{" ++ arg ++ "}")) args
+ let argStr = concat args
contents <- manyTill (choice [(many1 (noneOf "\\")),
(do{ (Para [TeX str]) <- rawLaTeXEnvironment; return str }),
string "\\"]) (end name')
spaces
- return (Para [TeX ("\\begin{" ++ name' ++ "}" ++ optStr ++ argStr ++
+ return (Para [TeX ("\\begin{" ++ name' ++ "}" ++ argStr ++
(concat contents) ++ "\\end{" ++ name' ++ "}")]))
unknownEnvironment = try (do
@@ -335,17 +330,16 @@ unknownCommand = try (do
notFollowedBy' (string "\\end{itemize}")
notFollowedBy' (string "\\end{enumerate}")
notFollowedBy' (string "\\end{document}")
- (name, star, opt, args) <- command
+ (name, star, args) <- command
spaces
- let optStr = if null opt then "" else "[" ++ opt ++ "]"
- let argStr = concatMap (\arg -> ("{" ++ arg ++ "}")) args
+ let argStr = concat args
state <- getState
if (name == "item") && ((stateParserContext state) == ListItemState) then
fail "should not be parsed as raw"
else
string ""
if stateParseRaw state then
- return (Plain [TeX ("\\" ++ name ++ star ++ optStr ++ argStr)])
+ return (Plain [TeX ("\\" ++ name ++ star ++ argStr)])
else
return (Plain [Str (joinWithSep " " args)]))
@@ -554,13 +548,19 @@ link = try (do
return (Link (normalizeSpaces label) ref))
image = try (do
- ("includegraphics", _, _, (src:lst)) <- command
- return (Image [Str "image"] (Src src "")))
+ ("includegraphics", _, args) <- command
+ let args' = filter (\arg -> (take 1 arg) /= "[") args
+ let src = if null args' then
+ Src "" ""
+ else
+ Src (stripFirstAndLast (head args')) ""
+ return (Image [Str "image"] src))
footnote = try (do
- ("footnote", _, _, (contents:[])) <- command
+ ("footnote", _, (contents:[])) <- command
+ let contents' = stripFirstAndLast contents
let blocks = case runParser parseBlocks defaultParserState "footnote" contents of
- Left err -> error $ "Input:\n" ++ show contents ++
+ Left err -> error $ "Input:\n" ++ show contents' ++
"\nError:\n" ++ show err
Right result -> result
state <- getState
@@ -574,12 +574,11 @@ footnote = try (do
-- | Parse any LaTeX command and return it in a raw TeX inline element.
rawLaTeXInline :: GenParser Char ParserState Inline
rawLaTeXInline = try (do
- (name, star, opt, args) <- command
- let optStr = if (null opt) then "" else "[" ++ opt ++ "]"
- let argStr = concatMap (\arg -> "{" ++ arg ++ "}") args
+ (name, star, args) <- command
+ let argStr = concat args
state <- getState
if ((name == "begin") || (name == "end") || (name == "item")) then
fail "not an inline command"
else
string ""
- return (TeX ("\\" ++ name ++ star ++ optStr ++ argStr)))
+ return (TeX ("\\" ++ name ++ star ++ argStr)))