diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/LaTeX.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 179 |
1 files changed, 123 insertions, 56 deletions
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 279f90318..351e1fef5 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -33,10 +33,9 @@ module Text.Pandoc.Readers.LaTeX ( readLaTeX, handleIncludes ) where -import Text.ParserCombinators.Parsec hiding ((<|>), space, many, optional) import Text.Pandoc.Definition import Text.Pandoc.Shared -import Text.Pandoc.Parsing +import Text.Pandoc.Parsing hiding ((<|>), many, optional, space) import qualified Text.Pandoc.UTF8 as UTF8 import Data.Char ( chr, ord ) import Control.Monad @@ -64,7 +63,7 @@ parseLaTeX = do let date' = stateDate st return $ Pandoc (Meta title' authors' date') $ toList bs -type LP = GenParser Char ParserState +type LP = Parser [Char] ParserState anyControlSeq :: LP String anyControlSeq = do @@ -82,9 +81,16 @@ controlSeq name = try $ do case name of "" -> mzero [c] | not (isLetter c) -> string [c] - cs -> string cs <* optional sp + cs -> string cs <* notFollowedBy letter <* optional sp return name +dimenarg :: LP String +dimenarg = try $ do + ch <- option "" $ string "=" + num <- many1 digit + dim <- oneOfStrings ["pt","pc","in","bp","cm","mm","dd","cc","sp"] + return $ ch ++ num ++ dim + sp :: LP () sp = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t') <|> (try $ newline >>~ lookAhead anyChar >>~ notFollowedBy blankline) @@ -112,18 +118,28 @@ comment = do newline return () +bgroup :: LP () +bgroup = () <$ char '{' + <|> () <$ controlSeq "bgroup" + <|> () <$ controlSeq "begingroup" + +egroup :: LP () +egroup = () <$ char '}' + <|> () <$ controlSeq "egroup" + <|> () <$ controlSeq "endgroup" + grouped :: Monoid a => LP a -> LP a -grouped parser = try $ char '{' *> (mconcat <$> manyTill parser (char '}')) +grouped parser = try $ bgroup *> (mconcat <$> manyTill parser egroup) braced :: LP String -braced = char '{' *> (concat <$> manyTill +braced = bgroup *> (concat <$> manyTill ( many1 (satisfy (\c -> c /= '\\' && c /= '}' && c /= '{')) <|> try (string "\\}") <|> try (string "\\{") <|> try (string "\\\\") <|> ((\x -> "{" ++ x ++ "}") <$> braced) <|> count 1 anyChar - ) (char '}')) + ) egroup) bracketed :: Monoid a => LP a -> LP a bracketed parser = try $ char '[' *> (mconcat <$> manyTill parser (char ']')) @@ -181,7 +197,7 @@ inlines = mconcat <$> many (notFollowedBy (char '}') *> inline) block :: LP Blocks block = (mempty <$ comment) - <|> (mempty <$ ((spaceChar <|> blankline) *> spaces)) + <|> (mempty <$ ((spaceChar <|> newline) *> spaces)) <|> environment <|> mempty <$ macro -- TODO improve macros, make them work everywhere <|> blockCommand @@ -251,6 +267,7 @@ blockCommands = M.fromList $ , ("end", mzero) , ("item", skipopts *> loose_item) , ("documentclass", skipopts *> braced *> preamble) + , ("centerline", (para . trimInlines) <$> (skipopts *> tok)) ] ++ map ignoreBlocks -- these commands will be ignored unless --parse-raw is specified, -- in which case they will appear as raw latex blocks @@ -281,7 +298,9 @@ authors :: LP () authors = try $ do char '{' let oneAuthor = mconcat <$> - many1 (notFollowedBy' (controlSeq "and") >> inline) + many1 (notFollowedBy' (controlSeq "and") >> + (inline <|> mempty <$ blockCommand)) + -- skip e.g. \vspace{10pt} auths <- sepBy oneAuthor (controlSeq "and") char '}' updateState (\s -> s { stateAuthors = map (normalizeSpaces . toList) auths }) @@ -304,16 +323,19 @@ inlineCommand = try $ do parseRaw <- stateParseRaw `fmap` getState star <- option "" (string "*") let name' = name ++ star + let rawargs = withRaw (skipopts *> option "" dimenarg + *> many braced) >>= applyMacros' . snd + let raw = if parseRaw + then (rawInline "latex" . (('\\':name') ++)) <$> rawargs + else mempty <$> rawargs case M.lookup name' inlineCommands of - Just p -> p + Just p -> p <|> raw Nothing -> case M.lookup name inlineCommands of - Just p -> p - Nothing - | parseRaw -> - (rawInline "latex" . (('\\':name') ++)) <$> - (withRaw (skipopts *> many braced) - >>= applyMacros' . snd) - | otherwise -> return mempty + Just p -> p <|> raw + Nothing -> raw + +unlessParseRaw :: LP () +unlessParseRaw = getState >>= guard . not . stateParseRaw isBlockCommand :: String -> Bool isBlockCommand s = maybe False (const True) $ M.lookup s blockCommands @@ -333,8 +355,8 @@ inlineCommands = M.fromList $ , ("dots", lit "…") , ("mdots", lit "…") , ("sim", lit "~") - , ("label", inBrackets <$> tok) - , ("ref", inBrackets <$> tok) + , ("label", unlessParseRaw >> (inBrackets <$> tok)) + , ("ref", unlessParseRaw >> (inBrackets <$> tok)) , ("(", mathInline $ manyTill anyChar (try $ string "\\)")) , ("[", mathDisplay $ manyTill anyChar (try $ string "\\]")) , ("ensuremath", mathInline $ braced) @@ -358,8 +380,6 @@ inlineCommands = M.fromList $ , ("scshape", smallcaps <$> inlines) , ("bfseries", strong <$> inlines) , ("/", pure mempty) -- italic correction - , ("cc", lit "ç") - , ("cC", lit "Ç") , ("aa", lit "å") , ("AA", lit "Å") , ("ss", lit "ß") @@ -374,11 +394,12 @@ inlineCommands = M.fromList $ , ("copyright", lit "©") , ("`", option (str "`") $ try $ tok >>= accent grave) , ("'", option (str "'") $ try $ tok >>= accent acute) - , ("^", option (str "^") $ try $ tok >>= accent hat) - , ("~", option (str "~") $ try $ tok >>= accent circ) + , ("^", option (str "^") $ try $ tok >>= accent circ) + , ("~", option (str "~") $ try $ tok >>= accent tilde) , ("\"", option (str "\"") $ try $ tok >>= accent umlaut) , (".", option (str ".") $ try $ tok >>= accent dot) , ("=", option (str "=") $ try $ tok >>= accent macron) + , ("c", option (str "c") $ try $ tok >>= accent cedilla) , ("i", lit "i") , ("\\", linebreak <$ (optional (bracketed inline) *> optional sp)) , (",", pure mempty) @@ -502,33 +523,66 @@ acute 'E' = 'É' acute 'I' = 'Í' acute 'O' = 'Ó' acute 'U' = 'Ú' +acute 'Y' = 'Ý' acute 'a' = 'á' acute 'e' = 'é' acute 'i' = 'í' acute 'o' = 'ó' acute 'u' = 'ú' +acute 'y' = 'ý' +acute 'C' = 'Ć' +acute 'c' = 'ć' +acute 'L' = 'Ĺ' +acute 'l' = 'ĺ' +acute 'N' = 'Ń' +acute 'n' = 'ń' +acute 'R' = 'Ŕ' +acute 'r' = 'ŕ' +acute 'S' = 'Ś' +acute 's' = 'ś' +acute 'Z' = 'Ź' +acute 'z' = 'ź' acute c = c -hat :: Char -> Char -hat 'A' = 'Â' -hat 'E' = 'Ê' -hat 'I' = 'Î' -hat 'O' = 'Ô' -hat 'U' = 'Û' -hat 'a' = 'ã' -hat 'e' = 'ê' -hat 'i' = 'î' -hat 'o' = 'ô' -hat 'u' = 'û' -hat c = c - circ :: Char -> Char -circ 'A' = 'Ã' -circ 'O' = 'Õ' -circ 'o' = 'õ' -circ 'N' = 'Ñ' -circ 'n' = 'ñ' -circ c = c +circ 'A' = 'Â' +circ 'E' = 'Ê' +circ 'I' = 'Î' +circ 'O' = 'Ô' +circ 'U' = 'Û' +circ 'a' = 'â' +circ 'e' = 'ê' +circ 'i' = 'î' +circ 'o' = 'ô' +circ 'u' = 'û' +circ 'C' = 'Ĉ' +circ 'c' = 'ĉ' +circ 'G' = 'Ĝ' +circ 'g' = 'ĝ' +circ 'H' = 'Ĥ' +circ 'h' = 'ĥ' +circ 'J' = 'Ĵ' +circ 'j' = 'ĵ' +circ 'S' = 'Ŝ' +circ 's' = 'ŝ' +circ 'W' = 'Ŵ' +circ 'w' = 'ŵ' +circ 'Y' = 'Ŷ' +circ 'y' = 'ŷ' +circ c = c + +tilde :: Char -> Char +tilde 'A' = 'Ã' +tilde 'a' = 'ã' +tilde 'O' = 'Õ' +tilde 'o' = 'õ' +tilde 'I' = 'Ĩ' +tilde 'i' = 'ĩ' +tilde 'U' = 'Ũ' +tilde 'u' = 'ũ' +tilde 'N' = 'Ñ' +tilde 'n' = 'ñ' +tilde c = c umlaut :: Char -> Char umlaut 'A' = 'Ä' @@ -568,6 +622,13 @@ macron 'o' = 'ō' macron 'u' = 'ū' macron c = c +cedilla :: Char -> Char +cedilla 'c' = 'ç' +cedilla 'C' = 'Ç' +cedilla 's' = 'ş' +cedilla 'S' = 'Ş' +cedilla c = c + tok :: LP Inlines tok = try $ grouped inline <|> inlineCommand <|> str <$> (count 1 $ inlineChar) @@ -646,15 +707,15 @@ verbatimEnv = do controlSeq "begin" name <- braced guard $ name == "verbatim" || name == "Verbatim" || - name == "lstlisting" + name == "lstlisting" || name == "minted" verbEnv name rest <- getInput return (r,rest) -rawLaTeXBlock :: GenParser Char ParserState String +rawLaTeXBlock :: Parser [Char] ParserState String rawLaTeXBlock = snd <$> withRaw (environment <|> blockCommand) -rawLaTeXInline :: GenParser Char ParserState Inline +rawLaTeXInline :: Parser [Char] ParserState Inline rawLaTeXInline = do (res, raw) <- withRaw inlineCommand if res == mempty @@ -678,7 +739,9 @@ environments = M.fromList verbEnv "code")) , ("verbatim", codeBlock <$> (verbEnv "verbatim")) , ("Verbatim", codeBlock <$> (verbEnv "Verbatim")) - , ("lstlisting", codeBlock <$> (verbEnv "listlisting")) + , ("lstlisting", codeBlock <$> (verbEnv "lstlisting")) + , ("minted", liftA2 (\l c -> codeBlockWith ("",[l],[]) c) + (grouped (many1 $ satisfy (/= '}'))) (verbEnv "minted")) , ("displaymath", mathEnv Nothing "displaymath") , ("equation", mathEnv Nothing "equation") , ("equation*", mathEnv Nothing "equation*") @@ -878,9 +941,9 @@ parseAligns :: LP [Alignment] parseAligns = try $ do char '{' optional $ char '|' - let cAlign = char 'c' >> return AlignCenter - let lAlign = char 'l' >> return AlignLeft - let rAlign = char 'r' >> return AlignRight + let cAlign = AlignCenter <$ char 'c' + let lAlign = AlignLeft <$ char 'l' + let rAlign = AlignRight <$ char 'r' let alignChar = optional sp *> (cAlign <|> lAlign <|> rAlign) aligns' <- sepEndBy alignChar (optional $ char '|') spaces @@ -891,16 +954,20 @@ parseAligns = try $ do hline :: LP () hline = () <$ (try $ spaces >> controlSeq "hline") +lbreak :: LP () +lbreak = () <$ (try $ spaces *> controlSeq "\\") + +amp :: LP () +amp = () <$ (try $ spaces *> char '&') + parseTableRow :: Int -- ^ number of columns -> LP [Blocks] parseTableRow cols = try $ do - let amp = try $ spaces *> string "&" - let tableCellInline = notFollowedBy (amp <|> controlSeq "\\") >> inline - cells' <- sepBy ((plain . trimInlines . mconcat) <$> many tableCellInline) amp + let tableCellInline = notFollowedBy (amp <|> lbreak) >> inline + let tableCell = (plain . trimInlines . mconcat) <$> many tableCellInline + cells' <- sepBy tableCell amp guard $ length cells' == cols spaces - optional $ controlSeq "\\" - spaces return cells' simpTable :: LP Blocks @@ -909,8 +976,8 @@ simpTable = try $ do aligns <- parseAligns let cols = length aligns optional hline - header' <- option [] $ try (parseTableRow cols <* hline) - rows <- many (parseTableRow cols <* optional hline) + header' <- option [] $ try (parseTableRow cols <* lbreak <* hline) + rows <- sepEndBy (parseTableRow cols) (lbreak <* optional hline) spaces let header'' = if null header' then replicate cols mempty |