aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
authorJohn MacFarlane <fiddlosopher@gmail.com>2012-01-29 23:54:00 -0800
committerJohn MacFarlane <fiddlosopher@gmail.com>2012-02-04 09:56:43 -0800
commit75485c2f112cdc2e1f95f871d01cc356510166ae (patch)
tree2d495f048bd887403db41418c14445b54af1b00c /src/Text/Pandoc/Readers
parent23ca68a5c41ef68397ed2217e31bf6e720fb0534 (diff)
downloadpandoc-75485c2f112cdc2e1f95f871d01cc356510166ae.tar.gz
Complete rewrite of LaTeX reader.
* The new reader is more robust, accurate, and extensible. It is still quite incomplete, but it should be easier now to add features. * Text.Pandoc.Parsing: Added withRaw combinator. * Markdown reader: do escapedChar before raw latex inline. Otherwise we capture commands like \{. * Fixed latex citation tests for new citeproc. * Handle \include{} commands in latex. This is done in pandoc.hs, not the (pure) latex reader. But the reader exports the needed function, handleIncludes. * Moved err and warn from pandoc.hs to Shared. * Fixed tests - raw tex should sometimes have trailing space. * Updated lhs-test for highlighting-kate changes.
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs1697
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs21
2 files changed, 737 insertions, 981 deletions
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 3d9689168..9eb9eb2f9 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1,5 +1,5 @@
{-
-Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2012 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,8 +18,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.LaTeX
- Copyright : Copyright (C) 2006-2010 John MacFarlane
- License : GNU GPL, version 2 or above
+ Copyright : Copyright (C) 2006-2012 John MacFarlane
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
@@ -27,20 +27,24 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of LaTeX to 'Pandoc' document.
-}
-module Text.Pandoc.Readers.LaTeX (
- readLaTeX,
- rawLaTeXInline,
- rawLaTeXEnvironment'
+module Text.Pandoc.Readers.LaTeX ( readLaTeX,
+ rawLaTeXInline,
+ rawLaTeXBlock,
+ handleIncludes
) where
-import Text.ParserCombinators.Parsec
+import Text.ParserCombinators.Parsec hiding ((<|>), space, many, optional)
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
-import Data.Maybe ( fromMaybe )
-import Data.Char ( chr, toUpper )
-import Data.List ( intercalate, isPrefixOf, isSuffixOf )
+import Data.Char ( chr, ord )
import Control.Monad
+import Text.Pandoc.Builder
+import Data.Char (isLetter)
+import Control.Applicative
+import Data.Monoid
+import System.FilePath (replaceExtension)
+import qualified Data.Map as M
-- | Parse LaTeX from string and return 'Pandoc' document.
readLaTeX :: ParserState -- ^ Parser state, including options for parser
@@ -48,1002 +52,757 @@ readLaTeX :: ParserState -- ^ Parser state, including options for parser
-> Pandoc
readLaTeX = readWith parseLaTeX
--- characters with special meaning
-specialChars :: [Char]
-specialChars = "\\`$%^&_~#{}[]\n \t|<>'\"-"
-
---
--- utility functions
---
-
--- | Returns text between brackets and its matching pair.
-bracketedText :: Char -> Char -> GenParser Char st [Char]
-bracketedText openB closeB = do
- result <- charsInBalanced openB closeB anyChar
- return $ [openB] ++ result ++ [closeB]
-
--- | Returns an option or argument of a LaTeX command.
-optOrArg :: GenParser Char st [Char]
-optOrArg = try $ spaces >> (bracketedText '{' '}' <|> bracketedText '[' ']')
-
--- | True if the string begins with '{'.
-isArg :: [Char] -> Bool
-isArg ('{':_) = True
-isArg _ = False
+parseLaTeX :: LP Pandoc
+parseLaTeX = do
+ bs <- blocks
+ eof
+ st <- getState
+ let title' = stateTitle st
+ let authors' = stateAuthors st
+ let date' = stateDate st
+ return $ Pandoc (Meta title' authors' date') $ toList bs
--- | Returns list of options and arguments of a LaTeX command.
-commandArgs :: GenParser Char st [[Char]]
-commandArgs = many optOrArg
+type LP = GenParser Char ParserState
--- | Parses LaTeX command, returns (name, star, list of options or arguments).
-command :: GenParser Char st ([Char], [Char], [[Char]])
-command = do
+anyControlSeq :: LP String
+anyControlSeq = do
char '\\'
- name <- many1 letter
- star <- option "" (string "*") -- some commands have starred versions
- args <- commandArgs
- return (name, star, args)
-
-begin :: [Char] -> GenParser Char st [Char]
-begin name = try $ do
- string "\\begin"
- spaces
- char '{'
- string name
- char '}'
- optional commandArgs
- spaces
+ next <- option '\n' anyChar
+ name <- case next of
+ '\n' -> return ""
+ c | isLetter c -> (c:) <$> (many letter <* optional sp)
+ | otherwise -> return [c]
return name
-end :: [Char] -> GenParser Char st [Char]
-end name = try $ do
- string "\\end"
- spaces
- char '{'
- string name
- char '}'
+controlSeq :: String -> LP String
+controlSeq name = try $ do
+ char '\\'
+ case name of
+ "" -> mzero
+ [c] | not (isLetter c) -> string [c]
+ cs -> string cs <* optional sp
return name
--- | Returns a list of block elements containing the contents of an
--- environment.
-environment :: [Char] -> GenParser Char ParserState [Block]
-environment name = try $ begin name >> spaces >> manyTill block (end name) >>~ spaces
-
-anyEnvironment :: GenParser Char ParserState Block
-anyEnvironment = try $ do
- string "\\begin"
- spaces
+sp :: LP ()
+sp = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t')
+ <|> (try $ newline >>~ lookAhead anyChar >>~ notFollowedBy blankline)
+
+isLowerHex :: Char -> Bool
+isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
+
+tildeEscape :: LP Char
+tildeEscape = try $ do
+ string "^^"
+ c <- satisfy (\x -> x >= '\0' && x <= '\128')
+ d <- if isLowerHex c
+ then option "" $ count 1 (satisfy isLowerHex)
+ else return ""
+ if null d
+ then case ord c of
+ x | x >= 64 && x <= 127 -> return $ chr (x - 64)
+ | otherwise -> return $ chr (x + 64)
+ else return $ chr $ read ('0':'x':c:d)
+
+comment :: LP ()
+comment = do
+ char '%'
+ skipMany (satisfy (/='\n'))
+ newline
+ return ()
+
+grouped :: Monoid a => LP a -> LP a
+grouped parser = try $ char '{' *> (mconcat <$> manyTill parser (char '}'))
+
+braced :: LP String
+braced = char '{' *> (concat <$> manyTill
+ ( many1 (satisfy (\c -> c /= '\\' && c /= '}' && c /= '{'))
+ <|> try (string "\\}")
+ <|> try (string "\\{")
+ <|> ((\x -> "{" ++ x ++ "}") <$> braced)
+ <|> count 1 anyChar
+ ) (char '}'))
+
+bracketed :: Monoid a => LP a -> LP a
+bracketed parser = try $ char '[' *> (mconcat <$> manyTill parser (char ']'))
+
+trim :: String -> String
+trim = removeLeadingTrailingSpace
+
+mathDisplay :: LP String -> LP Inlines
+mathDisplay p = displayMath <$> (try p >>= applyMacros' . trim)
+
+mathInline :: LP String -> LP Inlines
+mathInline p = math <$> (try p >>= applyMacros')
+
+double_quote :: LP Inlines
+double_quote = (doubleQuoted . mconcat) <$>
+ (try $ string "``" *> manyTill inline (try $ string "''"))
+
+single_quote :: LP Inlines
+single_quote = (singleQuoted . mconcat) <$>
+ (try $ char '`' *> manyTill inline (try $ char '\'' >> notFollowedBy letter))
+
+inline :: LP Inlines
+inline = (mempty <$ comment)
+ <|> (space <$ sp)
+ <|> inlineText
+ <|> inlineCommand
+ <|> grouped inline
+ <|> (char '-' *> option (str "-")
+ ((char '-') *> option (str "–") (str "—" <$ char '-')))
+ <|> double_quote
+ <|> single_quote
+ <|> (str "’" <$ char '\'')
+ <|> (str "\160" <$ char '~')
+ <|> (mathDisplay $ string "$$" *> manyTill anyChar (try $ string "$$"))
+ <|> (mathInline $ char '$' *> manyTill anyChar (char '$'))
+ <|> (superscript <$> (char '^' *> tok))
+ <|> (subscript <$> (char '_' *> tok))
+ <|> (failUnlessLHS *> char '|' *> doLHSverb)
+ <|> (str <$> count 1 tildeEscape)
+ <|> (str <$> string "]")
+ <|> (str <$> count 1 (satisfy (\c -> c /= '\\' && c /='\n' && c /='}' && c /='{'))) -- eat random leftover characters
+
+inlines :: LP Inlines
+inlines = mconcat <$> many (notFollowedBy (char '}') *> inline)
+
+block :: LP Blocks
+block = (mempty <$ comment)
+ <|> (mempty <$ ((spaceChar <|> blankline) *> spaces))
+ <|> environment
+ <|> mempty <$ macro -- TODO improve macros, make them work everywhere
+ <|> blockCommand
+ <|> grouped block
+ <|> paragraph
+
+
+blocks :: LP Blocks
+blocks = mconcat <$> many block
+
+blockCommand :: LP Blocks
+blockCommand = try $ do
+ name <- anyControlSeq
+ star <- option "" (string "*" <* optional sp)
+ let name' = name ++ star
+ case M.lookup name' blockCommands of
+ Just p -> p
+ Nothing -> case M.lookup name blockCommands of
+ Just p -> p
+ Nothing -> mzero
+
+inBrackets :: Inlines -> Inlines
+inBrackets x = (str "[") <> x <> (str "]")
+
+blockCommands :: M.Map String (LP Blocks)
+blockCommands = M.fromList
+ [ ("par", pure mempty)
+ , ("title", mempty <$ (tok >>= addTitle))
+ , ("subtitle", mempty <$ (tok >>= addSubtitle))
+ , ("author", mempty <$ authors)
+ , ("date", mempty <$ (tok >>= addDate))
+ , ("maketitle", pure mempty)
+ -- \ignore{} is used conventionally in literate haskell for definitions
+ -- that are to be processed by the compiler but not printed.
+ , ("ignore", mempty <$ tok)
+ , ("hyperdef", mempty <$ (tok *> tok))
+ , ("chapter", updateState (\s -> s{ stateHasChapters = True }) *> section 0)
+ , ("section", section 1)
+ , ("subsection", section 2)
+ , ("subsubsection", section 3)
+ , ("paragraph", section 4)
+ , ("subparagraph", section 5)
+ , ("opening", (para . trimInlines) <$> tok)
+ , ("closing", (para . trimInlines) <$> tok)
+ , ("rule", optional opt *> tok *> tok *> pure horizontalRule)
+ , ("begin", mzero) -- these are here so they won't be interpreted as inline
+ , ("end", mzero)
+ , ("item", loose_item)
+ , ("documentclass", optional opt *> braced *> preamble)
+ -- should be parsed by macro, but we need this
+ -- here so these aren't parsed as inline
+ , ("newcommand", mempty <$ (tok *> optional opt *> tok))
+ , ("renewcommand", mempty <$ (tok *> optional opt *> tok))
+ , ("newenvironment", mempty <$ (tok *> tok *> tok))
+ , ("renewenvironment", mempty <$ (tok *> tok *> tok))
+ , ("special", pure mempty)
+ , ("pdfannot", pure mempty)
+ , ("pdfstringdef", pure mempty)
+ , ("index", pure mempty)
+ , ("bibliography", pure mempty)
+ ]
+
+addTitle :: Inlines -> LP ()
+addTitle tit = updateState (\s -> s{ stateTitle = toList tit })
+
+addSubtitle :: Inlines -> LP ()
+addSubtitle tit = updateState (\s -> s{ stateTitle = stateTitle s ++
+ toList (str ":" <> linebreak <> tit) })
+
+authors :: LP ()
+authors = try $ do
char '{'
- name <- many letter
- star <- option "" (string "*") -- some environments have starred variants
- char '}'
- optional commandArgs
- spaces
- contents <- manyTill block (end (name ++ star))
- spaces
- return $ BlockQuote contents
-
---
--- parsing documents
---
+ let oneAuthor = mconcat <$> many1 (notFollowedBy' (controlSeq "and") >> inline)
+ auths <- sepBy oneAuthor (controlSeq "and")
+ updateState (\s -> s { stateAuthors = map (normalizeSpaces . toList) auths })
+
+addDate :: Inlines -> LP ()
+addDate dat = updateState (\s -> s{ stateDate = toList dat })
+
+section :: Int -> LP Blocks
+section lvl = do
+ hasChapters <- stateHasChapters `fmap` getState
+ let lvl' = if hasChapters then lvl + 1 else lvl
+ optional sp
+ optional opt
+ contents <- grouped inline
+ return $ header lvl' contents
+
+inlineCommand :: LP Inlines
+inlineCommand = try $ do
+ name <- anyControlSeq
+ guard $ not $ isBlockCommand name
+ parseRaw <- stateParseRaw `fmap` getState
+ star <- option "" (string "*")
+ let name' = name ++ star
+ case M.lookup name' inlineCommands of
+ Just p -> p
+ Nothing -> case M.lookup name inlineCommands of
+ Just p -> p
+ Nothing
+ | parseRaw ->
+ (rawInline "latex" . (('\\':name') ++)) <$>
+ (withRaw (optional opt *> many braced)
+ >>= applyMacros' . snd)
+ | otherwise -> return mempty
+
+isBlockCommand :: String -> Bool
+isBlockCommand s = maybe False (const True) $ M.lookup s blockCommands
+
+inlineCommands :: M.Map String (LP Inlines)
+inlineCommands = M.fromList
+ [ ("emph", emph <$> tok)
+ , ("textit", emph <$> tok)
+ , ("textsc", smallcaps <$> tok)
+ , ("sout", strikeout <$> tok)
+ , ("textsuperscript", superscript <$> tok)
+ , ("textsubscript", subscript <$> tok)
+ , ("textbackslash", lit "\\")
+ , ("backslash", lit "\\")
+ , ("textbf", strong <$> tok)
+ , ("ldots", lit "…")
+ , ("dots", lit "…")
+ , ("mdots", lit "…")
+ , ("sim", lit "~")
+ , ("label", inBrackets <$> tok)
+ , ("ref", inBrackets <$> tok)
+ , ("(", mathInline $ manyTill anyChar (try $ string "\\)"))
+ , ("[", mathDisplay $ manyTill anyChar (try $ string "\\]"))
+ , ("ensuremath", mathInline $ braced)
+ , ("$", lit "$")
+ , ("%", lit "%")
+ , ("&", lit "&")
+ , ("#", lit "#")
+ , ("_", lit "_")
+ , ("{", lit "{")
+ , ("}", lit "}")
+ -- old TeX commands
+ , ("em", emph <$> inlines)
+ , ("it", emph <$> inlines)
+ , ("sl", emph <$> inlines)
+ , ("bf", strong <$> inlines)
+ , ("rm", inlines)
+ , ("itshape", emph <$> inlines)
+ , ("slshape", emph <$> inlines)
+ , ("scshape", smallcaps <$> inlines)
+ , ("bfseries", strong <$> inlines)
+ , ("/", pure mempty) -- italic correction
+ , ("cc", lit "ç")
+ , ("cC", lit "Ç")
+ , ("aa", lit "å")
+ , ("AA", lit "Å")
+ , ("ss", lit "ß")
+ , ("o", lit "ø")
+ , ("O", lit "Ø")
+ , ("L", lit "Ł")
+ , ("l", lit "ł")
+ , ("ae", lit "æ")
+ , ("AE", lit "Æ")
+ , ("pounds", lit "£")
+ , ("euro", lit "€")
+ , ("copyright", lit "©")
+ , ("sect", lit "§")
+ , ("`", option (str "`") $ try $ tok >>= accent grave)
+ , ("'", option (str "'") $ try $ tok >>= accent acute)
+ , ("^", option (str "^") $ try $ tok >>= accent hat)
+ , ("~", option (str "~") $ try $ tok >>= accent circ)
+ , ("\"", option (str "\"") $ try $ tok >>= accent umlaut)
+ , ("i", lit "i")
+ , ("\\", linebreak <$ optional (bracketed inline *> optional sp))
+ , (",", pure mempty)
+ , ("@", pure mempty)
+ , (" ", lit "\160")
+ , ("bar", lit "|")
+ , ("textless", lit "<")
+ , ("textgreater", lit ">")
+ , ("thanks", (note . mconcat) <$> (char '{' *> manyTill block (char '}')))
+ , ("footnote", (note . mconcat) <$> (char '{' *> manyTill block (char '}')))
+ , ("verb", doverb)
+ , ("lstinline", doverb)
+ , ("texttt", (code . stringify . toList) <$> tok)
+ , ("url", (unescapeURL <$> braced) >>= \url ->
+ pure (link url "" (codeWith ("",["url"],[]) url)))
+ , ("href", (unescapeURL <$> braced <* optional sp) >>= \url ->
+ tok >>= \lab ->
+ pure (link url "" lab))
+ , ("includegraphics", optional opt *> (unescapeURL <$> braced) >>=
+ (\src -> pure (image src "" (str "image"))))
+ , ("cite", citation NormalCitation False)
+ , ("citep", citation NormalCitation False)
+ , ("citep*", citation NormalCitation False)
+ , ("citeal", citation NormalCitation False)
+ , ("citealp", citation NormalCitation False)
+ , ("citealp*", citation NormalCitation False)
+ , ("autocite", citation NormalCitation False)
+ , ("footcite", citation NormalCitation False)
+ , ("parencite", citation NormalCitation False)
+ , ("supercite", citation NormalCitation False)
+ , ("footcitetext", citation NormalCitation False)
+ , ("citeyearpar", citation SuppressAuthor False)
+ , ("citeyear", citation SuppressAuthor False)
+ , ("autocite*", citation SuppressAuthor False)
+ , ("cite*", citation SuppressAuthor False)
+ , ("parencite*", citation SuppressAuthor False)
+ , ("textcite", citation AuthorInText False)
+ , ("citet", citation AuthorInText False)
+ , ("citet*", citation AuthorInText False)
+ , ("citealt", citation AuthorInText False)
+ , ("citealt*", citation AuthorInText False)
+ , ("textcites", citation AuthorInText True)
+ , ("cites", citation NormalCitation True)
+ , ("autocites", citation NormalCitation True)
+ , ("footcites", citation NormalCitation True)
+ , ("parencites", citation NormalCitation True)
+ , ("supercites", citation NormalCitation True)
+ , ("footcitetexts", citation NormalCitation True)
+ , ("Autocite", citation NormalCitation False)
+ , ("Footcite", citation NormalCitation False)
+ , ("Parencite", citation NormalCitation False)
+ , ("Supercite", citation NormalCitation False)
+ , ("Footcitetext", citation NormalCitation False)
+ , ("Citeyearpar", citation SuppressAuthor False)
+ , ("Citeyear", citation SuppressAuthor False)
+ , ("Autocite*", citation SuppressAuthor False)
+ , ("Cite*", citation SuppressAuthor False)
+ , ("Parencite*", citation SuppressAuthor False)
+ , ("Textcite", citation AuthorInText False)
+ , ("Textcites", citation AuthorInText True)
+ , ("Cites", citation NormalCitation True)
+ , ("Autocites", citation NormalCitation True)
+ , ("Footcites", citation NormalCitation True)
+ , ("Parencites", citation NormalCitation True)
+ , ("Supercites", citation NormalCitation True)
+ , ("Footcitetexts", citation NormalCitation True)
+ , ("citetext", complexNatbibCitation NormalCitation)
+ , ("citeauthor", (try (tok *> optional sp *> controlSeq "citetext") *>
+ complexNatbibCitation AuthorInText)
+ <|> citation AuthorInText False)
+ ]
+
+unescapeURL :: String -> String
+unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs
+ where isEscapable '%' = True
+ isEscapable '#' = True
+ isEscapable _ = False
+unescapeURL (x:xs) = x:unescapeURL xs
+unescapeURL [] = ""
+
+doverb :: LP Inlines
+doverb = do
+ marker <- anyChar
+ code <$> manyTill (satisfy (/='\n')) (char marker)
+
+doLHSverb :: LP Inlines
+doLHSverb = codeWith ("",["haskell"],[]) <$> manyTill (satisfy (/='\n')) (char '|')
+
+lit :: String -> LP Inlines
+lit = pure . str
+
+accent :: (Char -> Char) -> Inlines -> LP Inlines
+accent f ils =
+ case toList ils of
+ (Str (x:xs) : ys) -> return $ fromList $ (Str (f x : xs) : ys)
+ [] -> mzero
+ _ -> return ils
+
+grave :: Char -> Char
+grave 'A' = 'À'
+grave 'E' = 'È'
+grave 'I' = 'Ì'
+grave 'O' = 'Ò'
+grave 'U' = 'Ù'
+grave 'a' = 'à'
+grave 'e' = 'è'
+grave 'i' = 'ì'
+grave 'o' = 'ò'
+grave 'u' = 'ù'
+grave c = c
+
+acute :: Char -> Char
+acute 'A' = 'Á'
+acute 'E' = 'É'
+acute 'I' = 'Í'
+acute 'O' = 'Ó'
+acute 'U' = 'Ú'
+acute 'a' = 'á'
+acute 'e' = 'é'
+acute 'i' = 'í'
+acute 'o' = 'ó'
+acute 'u' = 'ú'
+acute c = c
+
+hat :: Char -> Char
+hat 'A' = 'Â'
+hat 'E' = 'Ê'
+hat 'I' = 'Î'
+hat 'O' = 'Ô'
+hat 'U' = 'Û'
+hat 'a' = 'ã'
+hat 'e' = 'ê'
+hat 'i' = 'î'
+hat 'o' = 'ô'
+hat 'u' = 'û'
+hat c = c
+
+circ :: Char -> Char
+circ 'A' = 'Ã'
+circ 'O' = 'Õ'
+circ 'o' = 'õ'
+circ 'N' = 'Ñ'
+circ 'n' = 'ñ'
+circ c = c
+
+umlaut :: Char -> Char
+umlaut 'A' = 'Ä'
+umlaut 'E' = 'Ë'
+umlaut 'I' = 'Ï'
+umlaut 'O' = 'Ö'
+umlaut 'U' = 'Ü'
+umlaut 'a' = 'ä'
+umlaut 'e' = 'ë'
+umlaut 'i' = 'ï'
+umlaut 'o' = 'ö'
+umlaut 'u' = 'ü'
+umlaut c = c
+
+tok :: LP Inlines
+tok = try $ grouped inline <|> inlineCommand <|> str <$> (count 1 $ inlineChar)
+
+opt :: LP Inlines
+opt = bracketed inline <* optional sp
+
+inlineText :: LP Inlines
+inlineText = str <$> many1 inlineChar
+
+inlineChar :: LP Char
+inlineChar = satisfy $ \c ->
+ not (c == '\\' || c == '$' || c == '%' || c == '^' || c == '_' ||
+ c == '&' || c == '~' || c == '#' || c == '{' || c == '}' ||
+ c == '^' || c == '\'' || c == '`' || c == '-' || c == ']' ||
+ c == ' ' || c == '\t' || c == '\n' )
+
+environment :: LP Blocks
+environment = do
+ controlSeq "begin"
+ name <- braced
+ parseRaw <- stateParseRaw `fmap` getState
+ let addBegin x = "\\begin{" ++ name ++ "}" ++ x
+ case M.lookup name environments of
+ Just p -> p
+ Nothing -> if parseRaw
+ then (rawBlock "latex" . addBegin) <$>
+ (withRaw (env name blocks) >>= applyMacros' . snd)
+ else env name blocks
+
+-- | Replace "include" commands with file contents.
+handleIncludes :: String -> IO String
+handleIncludes [] = return []
+handleIncludes ('\\':xs) =
+ case runParser include defaultParserState "input" ('\\':xs) of
+ Right (f, rest) -> do ys <- catch (readFile (replaceExtension f ".tex"))
+ (\e -> warn
+ ("could not open included file `" ++
+ f ++ "': " ++ show e) >> return "")
+ (ys ++) `fmap` handleIncludes rest
+ _ -> case runParser verbatimEnv defaultParserState "input" ('\\':xs) of
+ Right (r, rest) -> (r ++) `fmap` handleIncludes rest
+ _ -> ('\\':) `fmap` handleIncludes xs
+handleIncludes (x:xs) = (x:) `fmap` handleIncludes xs
+
+include :: LP (FilePath, String)
+include = do
+ controlSeq "include"
+ f <- braced
+ rest <- getInput
+ return (f, rest)
+
+verbatimEnv :: LP (String, String)
+verbatimEnv = do
+ (_,r) <- withRaw $ do
+ controlSeq "begin"
+ name <- braced
+ guard $ name == "verbatim" || name == "Verbatim" ||
+ name == "lstlisting"
+ verbEnv name
+ rest <- getInput
+ return (r,rest)
--- | Process LaTeX preamble, extracting metadata.
-processLaTeXPreamble :: GenParser Char ParserState ()
-processLaTeXPreamble = do
- try $ string "\\documentclass"
- skipMany $ bibliographic <|> macro <|> commentBlock <|> skipChar
+-- | Parse any LaTeX environment and return a string containing
+-- the whole literal environment as raw TeX.
+rawLaTeXBlock :: GenParser Char ParserState String
+rawLaTeXBlock =
+ (rawLaTeXEnvironment <|> (snd <$> withRaw blockCommand)) >>= applyMacros'
--- | Parse LaTeX and return 'Pandoc'.
-parseLaTeX :: GenParser Char ParserState Pandoc
-parseLaTeX = do
- spaces
- skipMany $ comment >> spaces
- blocks <- try (processLaTeXPreamble >> environment "document")
- <|> (many block >>~ (spaces >> eof))
- state <- getState
- let blocks' = filter (/= Null) blocks
- let title' = stateTitle state
- let authors' = stateAuthors state
- let date' = stateDate state
- return $ Pandoc (Meta title' authors' date') blocks'
-
---
--- parsing blocks
---
-
-parseBlocks :: GenParser Char ParserState [Block]
-parseBlocks = spaces >> many block
-
-block :: GenParser Char ParserState Block
-block = choice [ hrule
- , codeBlock
- , header
- , list
- , blockQuote
- , simpleTable
- , commentBlock
- , macro
- , bibliographic
- , para
- , itemBlock
- , unknownEnvironment
- , ignore
- , unknownCommand
- ] <?> "block"
-
---
--- header blocks
---
-
-header :: GenParser Char ParserState Block
-header = section <|> chapter
-
-chapter :: GenParser Char ParserState Block
-chapter = try $ do
- string "\\chapter"
- result <- headerWithLevel 1
- updateState $ \s -> s{ stateHasChapters = True }
- return result
-
-section :: GenParser Char ParserState Block
-section = try $ do
- char '\\'
- subs <- many (try (string "sub"))
- base <- try (string "section" >> return 1) <|> (string "paragraph" >> return 4)
- st <- getState
- let lev = if stateHasChapters st
- then length subs + base + 1
- else length subs + base
- headerWithLevel lev
+rawLaTeXEnvironment :: GenParser Char ParserState String
+rawLaTeXEnvironment = try $ do
+ controlSeq "begin"
+ name <- braced
+ let addBegin x = "\\begin{" ++ name ++ "}" ++ x
+ addBegin <$> (withRaw (env name blocks) >>= applyMacros' . snd)
-headerWithLevel :: Int -> GenParser Char ParserState Block
-headerWithLevel lev = try $ do
- spaces
- optional (char '*')
- spaces
- optional $ bracketedText '[' ']' -- alt title
+rawLaTeXInline :: GenParser Char ParserState Inline
+rawLaTeXInline = do
+ (res, raw) <- withRaw inlineCommand
+ if res == mempty
+ then return (Str "")
+ else RawInline "latex" <$> (applyMacros' raw)
+
+environments :: M.Map String (LP Blocks)
+environments = M.fromList
+ [ ("document", env "document" blocks)
+ , ("letter", env "letter" blocks)
+ , ("center", env "center" blocks)
+ , ("tabular", env "tabular" simpTable)
+ , ("quote", blockQuote <$> env "quote" blocks)
+ , ("quotation", blockQuote <$> env "quotation" blocks)
+ , ("itemize", bulletList <$> listenv "itemize" (many item))
+ , ("description", definitionList <$> listenv "description" (many descItem))
+ , ("enumerate", ordered_list)
+ , ("code", failUnlessLHS *>
+ (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> verbEnv "code"))
+ , ("verbatim", codeBlock <$> (verbEnv "verbatim"))
+ , ("Verbatim", codeBlock <$> (verbEnv "Verbatim"))
+ , ("lstlisting", codeBlock <$> (verbEnv "listlisting"))
+ , ("displaymath", mathEnv Nothing "displaymath")
+ , ("equation", mathEnv Nothing "equation")
+ , ("equation*", mathEnv Nothing "equation*")
+ , ("gather", mathEnv (Just "gathered") "gather")
+ , ("gather*", mathEnv (Just "gathered") "gather*")
+ , ("multiline", mathEnv (Just "gathered") "multiline")
+ , ("multiline*", mathEnv (Just "gathered") "multiline*")
+ , ("eqnarray", mathEnv (Just "aligned*") "eqnarray")
+ , ("eqnarray*", mathEnv (Just "aligned*") "eqnarray*")
+ , ("align", mathEnv (Just "aligned*") "align")
+ , ("align*", mathEnv (Just "aligned*") "align*")
+ , ("alignat", mathEnv (Just "aligned*") "alignat")
+ , ("alignat*", mathEnv (Just "aligned*") "alignat*")
+ ]
+
+item :: LP Blocks
+item = blocks *> controlSeq "item" *> optional opt *> blocks
+
+loose_item :: LP Blocks
+loose_item = do
+ ctx <- stateParserContext `fmap` getState
+ if ctx == ListItemState
+ then mzero
+ else return mempty
+
+descItem :: LP (Inlines, [Blocks])
+descItem = do
+ blocks -- skip blocks before item
+ controlSeq "item"
+ optional sp
+ ils <- opt
+ bs <- blocks
+ return (ils, [bs])
+
+env :: String -> LP a -> LP a
+env name p = p <* (controlSeq "end" *> braced >>= guard . (== name))
+
+listenv :: String -> LP a -> LP a
+listenv name p = try $ do
+ oldCtx <- stateParserContext `fmap` getState
+ updateState $ \st -> st{ stateParserContext = ListItemState }
+ res <- env name p
+ updateState $ \st -> st{ stateParserContext = oldCtx }
+ return res
+
+mathEnv :: Maybe String -> String -> LP Blocks
+mathEnv innerEnv name = para <$> mathDisplay (inner <$> verbEnv name)
+ where inner x = case innerEnv of
+ Nothing -> x
+ Just y -> "\\begin{" ++ y ++ "}\n" ++ x ++
+ "\\end{" ++ y ++ "}"
+
+verbEnv :: String -> LP String
+verbEnv name = do
+ optional opt
+ optional blankline
+ let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name)
+ res <- manyTill anyChar endEnv
+ return $ stripTrailingNewlines res
+
+ordered_list :: LP Blocks
+ordered_list = do
+ optional sp
+ (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
+ try $ char '[' *> anyOrderedListMarker <* char ']'
spaces
- char '{'
- title' <- manyTill inline (char '}')
+ optional $ try $ controlSeq "setlength" *> grouped (controlSeq "itemindent") *> braced
spaces
- return $ Header lev (normalizeSpaces title')
+ start <- option 1 $ try $ do controlSeq "setcounter"
+ grouped (string "enum" *> many1 (oneOf "iv"))
+ optional sp
+ num <- grouped (many1 digit)
+ spaces
+ return $ (read num + 1 :: Int)
+ bs <- listenv "enumerate" (many item)
+ return $ orderedListWith (start, style, delim) bs
+
+paragraph :: LP Blocks
+paragraph = do
+ x <- mconcat <$> many1 inline
+ if x == mempty
+ then return mempty
+ else return $ para $ trimInlines x
+
+preamble :: LP Blocks
+preamble = mempty <$> manyTill preambleBlock beginDoc
+ where beginDoc = lookAhead $ controlSeq "begin" *> string "{document}"
+ preambleBlock = (mempty <$ comment)
+ <|> (mempty <$ sp)
+ <|> (mempty <$ blanklines)
+ <|> (mempty <$ macro)
+ <|> blockCommand
+ <|> (mempty <$ anyControlSeq)
+ <|> (mempty <$ braced)
+ <|> (mempty <$ anyChar)
+
+-------
+
+-- citations
+
+addPrefix :: Inlines -> [Citation] -> [Citation]
+addPrefix p (k:ks) = k {citationPrefix = toList p ++ citationPrefix k} : ks
+addPrefix _ _ = []
---
--- hrule block
---
+addSuffix :: Inlines -> [Citation] -> [Citation]
+addSuffix s ks@(_:_) =
+ let k = last ks
+ in init ks ++ [k {citationSuffix = citationSuffix k ++ toList s}]
+addSuffix _ _ = []
+
+simpleCiteArgs :: LP [Citation]
+simpleCiteArgs = try $ do
+ first <- optionMaybe opt
+ second <- optionMaybe opt
+ char '{'
+ keys <- manyTill citationLabel (char '}')
+ let (pre, suf) = case (first , second ) of
+ (Just s , Nothing) -> (mempty, s )
+ (Just s , Just t ) -> (s , t )
+ _ -> (mempty, mempty)
+ conv k = Citation { citationId = k
+ , citationPrefix = []
+ , citationSuffix = []
+ , citationMode = NormalCitation
+ , citationHash = 0
+ , citationNoteNum = 0
+ }
+ return $ addPrefix pre $ addSuffix suf $ map conv keys
-hrule :: GenParser Char st Block
-hrule = oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n",
- "\\newpage" ] >> spaces >> return HorizontalRule
+citationLabel :: LP String
+citationLabel = trim <$>
+ (many1 (satisfy $ \c -> c /=',' && c /='}') <* optional (char ',') <* optional sp)
--- tables
+cites :: CitationMode -> Bool -> LP [Citation]
+cites mode multi = try $ do
+ cits <- if multi
+ then many1 simpleCiteArgs
+ else count 1 simpleCiteArgs
+ let (c:cs) = concat cits
+ return $ case mode of
+ AuthorInText -> c {citationMode = mode} : cs
+ _ -> map (\a -> a {citationMode = mode}) (c:cs)
-simpleTable :: GenParser Char ParserState Block
-simpleTable = try $ do
- string "\\begin"
- spaces
- string "{tabular}"
- spaces
- aligns <- parseAligns
- let cols = length aligns
- optional hline
- header' <- option [] $ parseTableHeader cols
- rows <- many (parseTableRow cols >>~ optional hline)
- spaces
- end "tabular"
- spaces
- let header'' = if null header'
- then replicate cols []
- else header'
- return $ Table [] aligns (replicate cols 0) header'' rows
+citation :: CitationMode -> Bool -> LP Inlines
+citation mode multi = (flip cite mempty) <$> cites mode multi
+
+complexNatbibCitation :: CitationMode -> LP Inlines
+complexNatbibCitation mode = try $ do
+ let ils = (trimInlines . mconcat) <$>
+ many (notFollowedBy (oneOf "\\};") >> inline)
+ let parseOne = try $ do
+ skipSpaces
+ pref <- ils
+ cit' <- inline -- expect a citation
+ let citlist = toList cit'
+ cits' <- case citlist of
+ [Cite cs _] -> return cs
+ _ -> mzero
+ suff <- ils
+ skipSpaces
+ optional $ char ';'
+ return $ addPrefix pref $ addSuffix suff $ cits'
+ (c:cits) <- grouped parseOne
+ return $ cite (c{ citationMode = mode }:cits) mempty
-hline :: GenParser Char st ()
-hline = try $ spaces >> string "\\hline" >> return ()
+-- tables
-parseAligns :: GenParser Char ParserState [Alignment]
+parseAligns :: LP [Alignment]
parseAligns = try $ do
char '{'
optional $ char '|'
let cAlign = char 'c' >> return AlignCenter
let lAlign = char 'l' >> return AlignLeft
let rAlign = char 'r' >> return AlignRight
- let alignChar = cAlign <|> lAlign <|> rAlign
+ let alignChar = optional sp *> (cAlign <|> lAlign <|> rAlign)
aligns' <- sepEndBy alignChar (optional $ char '|')
+ spaces
char '}'
spaces
return aligns'
-parseTableHeader :: Int -- ^ number of columns
- -> GenParser Char ParserState [TableCell]
-parseTableHeader cols = try $ do
- cells' <- parseTableRow cols
- hline
- return cells'
+hline :: LP ()
+hline = () <$ (try $ spaces >> controlSeq "hline")
parseTableRow :: Int -- ^ number of columns
- -> GenParser Char ParserState [TableCell]
+ -> LP [Blocks]
parseTableRow cols = try $ do
- let tableCellInline = notFollowedBy (char '&' <|>
- (try $ char '\\' >> char '\\')) >> inline
- cells' <- sepBy (spaces >> liftM ((:[]) . Plain . normalizeSpaces)
- (many tableCellInline)) (char '&')
+ let amp = try $ spaces *> string "&"
+ let tableCellInline = notFollowedBy (amp <|> controlSeq "\\") >> inline
+ cells' <- sepBy (spaces *> ((plain . trimInlines . mconcat) <$>
+ many tableCellInline)) amp
guard $ length cells' == cols
spaces
- (try $ string "\\\\" >> spaces) <|>
- (lookAhead (end "tabular") >> return ())
+ try $ controlSeq "\\" <|> lookAhead (try $ controlSeq "end" >> string "{tabular}")
return cells'
---
--- code blocks
---
-
-codeBlock :: GenParser Char ParserState Block
-codeBlock = codeBlockWith "verbatim" <|> codeBlockWith "Verbatim" <|> codeBlockWith "lstlisting" <|> lhsCodeBlock
--- Note: Verbatim is from fancyvrb.
-
-codeBlockWith :: String -> GenParser Char st Block
-codeBlockWith env = try $ do
- string "\\begin"
- spaces -- don't use begin function because it
- string $ "{" ++ env ++ "}" -- gobbles whitespace; we want to gobble
- optional blanklines -- blank lines, but not leading space
- contents <- manyTill anyChar (try (string $ "\\end{" ++ env ++ "}"))
- spaces
- let classes = if env == "code" then ["haskell"] else []
- return $ CodeBlock ("",classes,[]) (stripTrailingNewlines contents)
-
-lhsCodeBlock :: GenParser Char ParserState Block
-lhsCodeBlock = do
- failUnlessLHS
- (CodeBlock (_,_,_) cont) <- codeBlockWith "code"
- return $ CodeBlock ("", ["sourceCode","literate","haskell"], []) cont
-
---
--- block quotes
---
-
-blockQuote :: GenParser Char ParserState Block
-blockQuote = (environment "quote" <|> environment "quotation") >>~ spaces >>=
- return . BlockQuote
-
---
--- list blocks
---
-
-list :: GenParser Char ParserState Block
-list = bulletList <|> orderedList <|> definitionList <?> "list"
-
-listItem :: GenParser Char ParserState ([Inline], [Block])
-listItem = try $ do
- ("item", _, args) <- command
- spaces
- state <- getState
- let oldParserContext = stateParserContext state
- updateState (\s -> s {stateParserContext = ListItemState})
- blocks <- many block
- updateState (\s -> s {stateParserContext = oldParserContext})
- opt <- case args of
- ([x]) | "[" `isPrefixOf` x && "]" `isSuffixOf` x ->
- parseFromString (many inline) $ tail $ init x
- _ -> return []
- return (opt, blocks)
-
-orderedList :: GenParser Char ParserState Block
-orderedList = try $ do
- string "\\begin"
- spaces
- string "{enumerate}"
- spaces
- (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
- try $ do failIfStrict
- char '['
- res <- anyOrderedListMarker
- char ']'
- return res
- spaces
- option "" $ try $ do string "\\setlength{\\itemindent}"
- char '{'
- manyTill anyChar (char '}')
- spaces
- start <- option 1 $ try $ do failIfStrict
- string "\\setcounter{enum"
- many1 (oneOf "iv")
- string "}{"
- num <- many1 digit
- char '}'
- spaces
- return $ (read num) + 1
- items <- many listItem
- end "enumerate"
- spaces
- return $ OrderedList (start, style, delim) $ map snd items
-
-bulletList :: GenParser Char ParserState Block
-bulletList = try $ do
- begin "itemize"
- items <- many listItem
- end "itemize"
- spaces
- return (BulletList $ map snd items)
-
-definitionList :: GenParser Char ParserState Block
-definitionList = try $ do
- begin "description"
- items <- many listItem
- end "description"
- spaces
- return $ DefinitionList $ map (\(t,d) -> (t,[d])) items
-
---
--- paragraph block
---
-
-para :: GenParser Char ParserState Block
-para = do
- res <- many1 inline
- spaces
- return $ if null (filter (`notElem` [Str "", Space]) res)
- then Null
- else Para $ normalizeSpaces res
-
---
--- title authors date
---
-
-bibliographic :: GenParser Char ParserState Block
-bibliographic = choice [ maketitle, title, subtitle, authors, date ]
-
-maketitle :: GenParser Char st Block
-maketitle = try (string "\\maketitle") >> spaces >> return Null
-
-title :: GenParser Char ParserState Block
-title = try $ do
- string "\\title{"
- tit <- manyTill inline (char '}')
- spaces
- updateState (\state -> state { stateTitle = tit })
- return Null
-
-subtitle :: GenParser Char ParserState Block
-subtitle = try $ do
- string "\\subtitle{"
- tit <- manyTill inline (char '}')
- spaces
- updateState (\state -> state { stateTitle = stateTitle state ++
- Str ":" : LineBreak : tit })
- return Null
-
-authors :: GenParser Char ParserState Block
-authors = try $ do
- string "\\author{"
- let andsep = try $ string "\\and" >> notFollowedBy letter >>
- spaces >> return '&'
- raw <- sepBy (many $ notFollowedBy (char '}' <|> andsep) >> inline) andsep
- let authors' = map normalizeSpaces raw
- char '}'
- spaces
- updateState (\s -> s { stateAuthors = authors' })
- return Null
-
-date :: GenParser Char ParserState Block
-date = try $ do
- string "\\date{"
- date' <- manyTill inline (char '}')
- spaces
- updateState (\state -> state { stateDate = normalizeSpaces date' })
- return Null
-
---
--- item block
--- for use in unknown environments that aren't being parsed as raw latex
---
-
--- this forces items to be parsed in different blocks
-itemBlock :: GenParser Char ParserState Block
-itemBlock = try $ do
- ("item", _, args) <- command
- state <- getState
- if stateParserContext state == ListItemState
- then fail "item should be handled by list block"
- else if null args
- then return Null
- else return $ Plain [Str (stripFirstAndLast (head args))]
-
---
--- raw LaTeX
---
-
--- | Parse any LaTeX environment and return a Para block containing
--- the whole literal environment as raw TeX.
-rawLaTeXEnvironment :: GenParser Char st Block
-rawLaTeXEnvironment = do
- contents <- rawLaTeXEnvironment'
- spaces
- return $ RawBlock "latex" contents
-
--- | Parse any LaTeX environment and return a string containing
--- the whole literal environment as raw TeX.
-rawLaTeXEnvironment' :: GenParser Char st String
-rawLaTeXEnvironment' = try $ do
- string "\\begin"
- spaces
- char '{'
- name <- many1 letter
- star <- option "" (string "*") -- for starred variants
- let name' = name ++ star
- char '}'
- args <- option [] commandArgs
- let argStr = concat args
- contents <- manyTill (choice [ (many1 (noneOf "\\")),
- rawLaTeXEnvironment',
- string "\\" ])
- (end name')
- return $ "\\begin{" ++ name' ++ "}" ++ argStr ++
- concat contents ++ "\\end{" ++ name' ++ "}"
-
-unknownEnvironment :: GenParser Char ParserState Block
-unknownEnvironment = try $ do
- state <- getState
- result <- if stateParseRaw state -- check whether we should include raw TeX
- then rawLaTeXEnvironment -- if so, get whole raw environment
- else anyEnvironment -- otherwise just the contents
- return result
-
--- \ignore{} is used conventionally in literate haskell for definitions
--- that are to be processed by the compiler but not printed.
-ignore :: GenParser Char ParserState Block
-ignore = try $ do
- ("ignore", _, _) <- command
- spaces
- return Null
-
-demacro :: (String, String, [String]) -> GenParser Char ParserState Inline
-demacro (n,st,args) = try $ do
- let raw = "\\" ++ n ++ st ++ concat args
- s' <- applyMacros' raw
- if raw == s'
- then return $ RawInline "latex" raw
- else do
- inp <- getInput
- setInput $ s' ++ inp
- return $ Str ""
-
-unknownCommand :: GenParser Char ParserState Block
-unknownCommand = try $ do
- spaces
- notFollowedBy' $ oneOfStrings ["\\begin","\\end","\\item"] >>
- notFollowedBy letter
- state <- getState
- when (stateParserContext state == ListItemState) $
- notFollowedBy' (string "\\item")
- if stateParseRaw state
- then command >>= demacro >>= return . Plain . (:[])
- else do
- (name, _, args) <- command
- spaces
- unless (name `elem` commandsToIgnore) $ do
- -- put arguments back in input to be parsed
- inp <- getInput
- setInput $ intercalate " " args ++ inp
- return Null
-
-commandsToIgnore :: [String]
-commandsToIgnore = ["special","pdfannot","pdfstringdef", "index","bibliography"]
-
-skipChar :: GenParser Char ParserState Block
-skipChar = do
- satisfy (/='\\') <|>
- (notFollowedBy' (try $
- string "\\begin" >> spaces >> string "{document}") >>
- anyChar)
- spaces
- return Null
-
-commentBlock :: GenParser Char st Block
-commentBlock = many1 (comment >> spaces) >> return Null
-
---
--- inline
---
-
-inline :: GenParser Char ParserState Inline
-inline = choice [ str
- , endline
- , whitespace
- , quoted
- , apostrophe
- , strong
- , math
- , ellipses
- , emDash
- , enDash
- , hyphen
- , emph
- , strikeout
- , superscript
- , subscript
- , code
- , url
- , link
- , image
- , footnote
- , linebreak
- , accentedChar
- , nonbreakingSpace
- , cite
- , specialChar
- , ensureMath
- , rawLaTeXInline'
- , escapedChar
- , emptyGroup
- , unescapedChar
- , comment
- ] <?> "inline"
-
-
--- latex comment
-comment :: GenParser Char st Inline
-comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return (Str "")
-
-accentedChar :: GenParser Char st Inline
-accentedChar = normalAccentedChar <|> specialAccentedChar
-
-normalAccentedChar :: GenParser Char st Inline
-normalAccentedChar = try $ do
- char '\\'
- accent <- oneOf "'`^\"~"
- character <- (try $ char '{' >> letter >>~ char '}') <|> letter
- let table = fromMaybe [] $ lookup character accentTable
- let result = case lookup accent table of
- Just num -> chr num
- Nothing -> '?'
- return $ Str [result]
-
--- an association list of letters and association list of accents
--- and decimal character numbers.
-accentTable :: [(Char, [(Char, Int)])]
-accentTable =
- [ ('A', [('`', 192), ('\'', 193), ('^', 194), ('~', 195), ('"', 196)]),
- ('E', [('`', 200), ('\'', 201), ('^', 202), ('"', 203)]),
- ('I', [('`', 204), ('\'', 205), ('^', 206), ('"', 207)]),
- ('N', [('~', 209)]),
- ('O', [('`', 210), ('\'', 211), ('^', 212), ('~', 213), ('"', 214)]),
- ('U', [('`', 217), ('\'', 218), ('^', 219), ('"', 220)]),
- ('a', [('`', 224), ('\'', 225), ('^', 227), ('"', 228)]),
- ('e', [('`', 232), ('\'', 233), ('^', 234), ('"', 235)]),
- ('i', [('`', 236), ('\'', 237), ('^', 238), ('"', 239)]),
- ('n', [('~', 241)]),
- ('o', [('`', 242), ('\'', 243), ('^', 244), ('~', 245), ('"', 246)]),
- ('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ]
-
-specialAccentedChar :: GenParser Char st Inline
-specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig, lslash,
- oslash, pound, euro, copyright, sect ]
-
-ccedil :: GenParser Char st Inline
-ccedil = try $ do
- char '\\'
- letter' <- oneOfStrings ["cc", "cC"]
- notFollowedBy letter
- let num = if letter' == "cc" then 231 else 199
- return $ Str [chr num]
-
-aring :: GenParser Char st Inline
-aring = try $ do
- char '\\'
- letter' <- oneOfStrings ["aa", "AA"]
- notFollowedBy letter
- let num = if letter' == "aa" then 229 else 197
- return $ Str [chr num]
-
-iuml :: GenParser Char st Inline
-iuml = try (string "\\\"") >> oneOfStrings ["\\i", "{\\i}"] >>
- return (Str [chr 239])
-
-szlig :: GenParser Char st Inline
-szlig = try (string "\\ss") >> notFollowedBy letter >> return (Str [chr 223])
-
-oslash :: GenParser Char st Inline
-oslash = try $ do
- char '\\'
- letter' <- choice [char 'o', char 'O']
- notFollowedBy letter
- let num = if letter' == 'o' then 248 else 216
- return $ Str [chr num]
-
-lslash :: GenParser Char st Inline
-lslash = try $ do
- cmd <- oneOfStrings ["{\\L}","{\\l}"]
- <|> (oneOfStrings ["\\L ","\\l "] >>~ notFollowedBy letter)
- return $ if 'l' `elem` cmd
- then Str "\x142"
- else Str "\x141"
-
-aelig :: GenParser Char st Inline
-aelig = try $ do
- char '\\'
- letter' <- oneOfStrings ["ae", "AE"]
- notFollowedBy letter
- let num = if letter' == "ae" then 230 else 198
- return $ Str [chr num]
-
-pound :: GenParser Char st Inline
-pound = try (string "\\pounds" >> notFollowedBy letter) >> return (Str [chr 163])
-
-euro :: GenParser Char st Inline
-euro = try (string "\\euro" >> notFollowedBy letter) >> return (Str [chr 8364])
-
-copyright :: GenParser Char st Inline
-copyright = try (string "\\copyright" >> notFollowedBy letter) >> return (Str [chr 169])
-
-sect :: GenParser Char st Inline
-sect = try (string "\\S" >> notFollowedBy letter) >> return (Str [chr 167])
-
-escapedChar :: GenParser Char st Inline
-escapedChar = do
- result <- escaped (oneOf specialChars)
- return $ if result == '\n' then Str " " else Str [result]
+parseTableHeader :: Int -- ^ number of columns
+ -> LP [Blocks]
+parseTableHeader cols = try $ parseTableRow cols <* hline
-emptyGroup :: GenParser Char st Inline
-emptyGroup = try $ do
- char '{'
+simpTable :: LP Blocks
+simpTable = try $ do
spaces
- char '}'
- return $ Str ""
-
--- nonescaped special characters
-unescapedChar :: GenParser Char st Inline
-unescapedChar = oneOf "`$^&_#{}[]|<>" >>= return . (\c -> Str [c])
-
-specialChar :: GenParser Char st Inline
-specialChar = choice [ spacer, interwordSpace, sentenceEnd,
- backslash, tilde, caret,
- bar, lt, gt, doubleQuote ]
-
-spacer :: GenParser Char st Inline
-spacer = try (string "\\,") >> return (Str "")
-
-sentenceEnd :: GenParser Char st Inline
-sentenceEnd = try (string "\\@") >> return (Str "")
-
-interwordSpace :: GenParser Char st Inline
-interwordSpace = try (string "\\ ") >> return (Str "\160")
-
-backslash :: GenParser Char st Inline
-backslash = try (string "\\textbackslash") >> optional (try $ string "{}") >> return (Str "\\")
-
-tilde :: GenParser Char st Inline
-tilde = try (string "\\ensuremath{\\sim}") >> return (Str "~")
-
-caret :: GenParser Char st Inline
-caret = try (string "\\^{}") >> return (Str "^")
-
-bar :: GenParser Char st Inline
-bar = try (string "\\textbar") >> optional (try $ string "{}") >> return (Str "\\")
-
-lt :: GenParser Char st Inline
-lt = try (string "\\textless") >> optional (try $ string "{}") >> return (Str "<")
-
-gt :: GenParser Char st Inline
-gt = try (string "\\textgreater") >> optional (try $ string "{}") >> return (Str ">")
-
-doubleQuote :: GenParser Char st Inline
-doubleQuote = char '"' >> return (Str "\"")
-
-code :: GenParser Char ParserState Inline
-code = code1 <|> code2 <|> code3 <|> lhsInlineCode
-
-code1 :: GenParser Char st Inline
-code1 = try $ do
- string "\\verb"
- marker <- anyChar
- result <- manyTill anyChar (char marker)
- return $ Code nullAttr $ removeLeadingTrailingSpace result
-
-code2 :: GenParser Char st Inline
-code2 = try $ do
- string "\\texttt{"
- result <- manyTill (noneOf "\\\n~$%^&{}") (char '}')
- return $ Code nullAttr result
-
-code3 :: GenParser Char st Inline
-code3 = try $ do
- string "\\lstinline"
- marker <- anyChar
- result <- manyTill anyChar (char marker)
- return $ Code nullAttr $ removeLeadingTrailingSpace result
-
-lhsInlineCode :: GenParser Char ParserState Inline
-lhsInlineCode = try $ do
- failUnlessLHS
- char '|'
- result <- manyTill (noneOf "|\n") (char '|')
- return $ Code ("",["haskell"],[]) result
-
-emph :: GenParser Char ParserState Inline
-emph = try $ oneOfStrings [ "\\emph{", "\\textit{" ] >>
- manyTill inline (char '}') >>= return . Emph
-
-strikeout :: GenParser Char ParserState Inline
-strikeout = try $ string "\\sout{" >> manyTill inline (char '}') >>=
- return . Strikeout
-
-superscript :: GenParser Char ParserState Inline
-superscript = try $ string "\\textsuperscript{" >>
- manyTill inline (char '}') >>= return . Superscript
-
--- note: \textsubscript isn't a standard latex command, but we use
--- a defined version in pandoc.
-subscript :: GenParser Char ParserState Inline
-subscript = try $ string "\\textsubscript{" >> manyTill inline (char '}') >>=
- return . Subscript
-
-apostrophe :: GenParser Char ParserState Inline
-apostrophe = char '\'' >> return (Str "\x2019")
-
-quoted :: GenParser Char ParserState Inline
-quoted = doubleQuoted <|> singleQuoted
-
-singleQuoted :: GenParser Char ParserState Inline
-singleQuoted = enclosed singleQuoteStart singleQuoteEnd inline >>=
- return . Quoted SingleQuote . normalizeSpaces
-
-doubleQuoted :: GenParser Char ParserState Inline
-doubleQuoted = enclosed doubleQuoteStart doubleQuoteEnd inline >>=
- return . Quoted DoubleQuote . normalizeSpaces
-
-singleQuoteStart :: GenParser Char st Char
-singleQuoteStart = char '`'
-
-singleQuoteEnd :: GenParser Char st ()
-singleQuoteEnd = try $ char '\'' >> notFollowedBy alphaNum
-
-doubleQuoteStart :: CharParser st String
-doubleQuoteStart = string "``"
-
-doubleQuoteEnd :: CharParser st String
-doubleQuoteEnd = try $ string "''"
-
-ellipses :: GenParser Char st Inline
-ellipses = try $ do
- char '\\'
- optional $ char 'l'
- string "dots"
- optional $ try $ string "{}"
- return (Str "…")
-
-enDash :: GenParser Char st Inline
-enDash = try (string "--") >> return (Str "-")
-
-emDash :: GenParser Char st Inline
-emDash = try (string "---") >> return (Str "—")
-
-hyphen :: GenParser Char st Inline
-hyphen = char '-' >> return (Str "-")
-
-strong :: GenParser Char ParserState Inline
-strong = try (string "\\textbf{") >> manyTill inline (char '}') >>=
- return . Strong
-
-whitespace :: GenParser Char st Inline
-whitespace = many1 (oneOf " \t") >> return Space
-
-nonbreakingSpace :: GenParser Char st Inline
-nonbreakingSpace = char '~' >> return (Str "\160")
-
--- hard line break
-linebreak :: GenParser Char st Inline
-linebreak = try $ do
- string "\\\\"
- optional $ bracketedText '[' ']' -- e.g. \\[10pt]
+ aligns <- parseAligns
+ let cols = length aligns
+ optional hline
+ header' <- option [] $ parseTableHeader cols
+ rows <- many (parseTableRow cols <* optional hline)
spaces
- return LineBreak
-
-str :: GenParser Char st Inline
-str = many1 (noneOf specialChars) >>= return . Str
-
--- endline internal to paragraph
-endline :: GenParser Char st Inline
-endline = try $ newline >> notFollowedBy blankline >> return Space
-
--- math
-math :: GenParser Char ParserState Inline
-math = (math3 >>= applyMacros' >>= return . Math DisplayMath)
- <|> (math1 >>= applyMacros' >>= return . Math InlineMath)
- <|> (math2 >>= applyMacros' >>= return . Math InlineMath)
- <|> (math4 >>= applyMacros' >>= return . Math DisplayMath)
- <|> (math5 >>= applyMacros' >>= return . Math DisplayMath)
- <|> (math6 >>= applyMacros' >>= return . Math DisplayMath)
- <?> "math"
-
-math1 :: GenParser Char st String
-math1 = try $ char '$' >> manyTill anyChar (char '$')
-
-math2 :: GenParser Char st String
-math2 = try $ string "\\(" >> manyTill anyChar (try $ string "\\)")
-
-math3 :: GenParser Char st String
-math3 = try $ char '$' >> math1 >>~ char '$'
-
-math4 :: GenParser Char st String
-math4 = try $ do
- name <- begin "displaymath" <|> begin "equation" <|> begin "equation*" <|>
- begin "gather" <|> begin "gather*" <|> begin "gathered" <|>
- begin "multline" <|> begin "multline*"
- manyTill anyChar (end name)
-
-math5 :: GenParser Char st String
-math5 = try $ (string "\\[") >> spaces >> manyTill anyChar (try $ string "\\]")
-
-math6 :: GenParser Char st String
-math6 = try $ do
- name <- begin "eqnarray" <|> begin "eqnarray*" <|> begin "align" <|>
- begin "align*" <|> begin "alignat" <|> begin "alignat*" <|>
- begin "split" <|> begin "aligned" <|> begin "alignedat"
- res <- manyTill anyChar (end name)
- return $ filter (/= '&') res -- remove alignment codes
-
-ensureMath :: GenParser Char st Inline
-ensureMath = try $ do
- (n, _, args) <- command
- guard $ n == "ensuremath" && not (null args)
- return $ Math InlineMath $ tail $ init $ head args
-
---
--- links and images
---
-
-url :: GenParser Char ParserState Inline
-url = try $ do
- string "\\url"
- url' <- charsInBalanced '{' '}' anyChar
- return $ Link [Code ("",["url"],[]) url'] (escapeURI url', "")
-
-link :: GenParser Char ParserState Inline
-link = try $ do
- string "\\href{"
- url' <- manyTill anyChar (char '}')
- char '{'
- label' <- manyTill inline (char '}')
- return $ Link (normalizeSpaces label') (escapeURI url', "")
-
-image :: GenParser Char ParserState Inline
-image = try $ do
- ("includegraphics", _, args) <- command
- let args' = filter isArg args -- filter out options
- let (src,tit) = case args' of
- [] -> ("", "")
- (x:_) -> (stripFirstAndLast x, "")
- return $ Image [Str "image"] (escapeURI src, tit)
-
-footnote :: GenParser Char ParserState Inline
-footnote = try $ do
- (name, _, (contents:[])) <- command
- if ((name == "footnote") || (name == "thanks"))
- then string ""
- else fail "not a footnote or thanks command"
- let contents' = stripFirstAndLast contents
- -- parse the extracted block, which may contain various block elements:
- rest <- getInput
- setInput $ contents'
- blocks <- parseBlocks
- setInput rest
- return $ Note blocks
-
--- | citations
-cite :: GenParser Char ParserState Inline
-cite = simpleCite <|> complexNatbibCites
-
-simpleCiteArgs :: GenParser Char ParserState [Citation]
-simpleCiteArgs = try $ do
- first <- optionMaybe $ (char '[') >> manyTill inline (char ']')
- second <- optionMaybe $ (char '[') >> manyTill inline (char ']')
- char '{'
- keys <- many1Till citationLabel (char '}')
- let (pre, suf) = case (first , second ) of
- (Just s , Nothing) -> ([], s )
- (Just s , Just t ) -> (s , t )
- _ -> ([], [])
- conv k = Citation { citationId = k
- , citationPrefix = []
- , citationSuffix = []
- , citationMode = NormalCitation
- , citationHash = 0
- , citationNoteNum = 0
- }
- return $ addPrefix pre $ addSuffix suf $ map conv keys
-
-
-simpleCite :: GenParser Char ParserState Inline
-simpleCite = try $ do
- char '\\'
- let biblatex = [a ++ "cite" | a <- ["auto", "foot", "paren", "super", ""]]
- ++ ["footcitetext"]
- normal = ["cite" ++ a ++ b | a <- ["al", ""], b <- ["p", "p*", ""]]
- ++ biblatex
- supress = ["citeyearpar", "citeyear", "autocite*", "cite*", "parencite*"]
- intext = ["textcite"] ++ ["cite" ++ a ++ b | a <- ["al", ""], b <- ["t", "t*"]]
- mintext = ["textcites"]
- mnormal = map (++ "s") biblatex
- cmdend = notFollowedBy (letter <|> char '*')
- capit [] = []
- capit (x:xs) = toUpper x : xs
- addUpper xs = xs ++ map capit xs
- toparser l t = try $ oneOfStrings (addUpper l) >> cmdend >> return t
- (mode, multi) <- toparser normal (NormalCitation, False)
- <|> toparser supress (SuppressAuthor, False)
- <|> toparser intext (AuthorInText , False)
- <|> toparser mnormal (NormalCitation, True )
- <|> toparser mintext (AuthorInText , True )
- cits <- if multi then
- many1 simpleCiteArgs
- else
- simpleCiteArgs >>= \c -> return [c]
- let (c:cs) = concat cits
- cits' = case mode of
- AuthorInText -> c {citationMode = mode} : cs
- _ -> map (\a -> a {citationMode = mode}) (c:cs)
- return $ Cite cits' []
-
-complexNatbibCites :: GenParser Char ParserState Inline
-complexNatbibCites = complexNatbibTextual <|> complexNatbibParenthetical
-
-complexNatbibTextual :: GenParser Char ParserState Inline
-complexNatbibTextual = try $ do
- string "\\citeauthor{"
- manyTill (noneOf "}") (char '}')
- skipSpaces
- Cite (c:cs) _ <- complexNatbibParenthetical
- return $ Cite (c {citationMode = AuthorInText} : cs) []
-
-
-complexNatbibParenthetical :: GenParser Char ParserState Inline
-complexNatbibParenthetical = try $ do
- string "\\citetext{"
- cits <- many1Till parseOne (char '}')
- return $ Cite (concat cits) []
- where
- parseOne = do
- skipSpaces
- pref <- many (notFollowedBy (oneOf "\\}") >> inline)
- (Cite cites _) <- simpleCite
- suff <- many (notFollowedBy (oneOf "\\};") >> inline)
- skipSpaces
- optional $ char ';'
- return $ addPrefix pref $ addSuffix suff $ cites
-
-addPrefix :: [Inline] -> [Citation] -> [Citation]
-addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
-addPrefix _ _ = []
-
-addSuffix :: [Inline] -> [Citation] -> [Citation]
-addSuffix s ks@(_:_) = let k = last ks
- in init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
-addSuffix _ _ = []
-
-citationLabel :: GenParser Char ParserState String
-citationLabel = do
- res <- many1 $ noneOf ",}"
- optional $ char ','
- return $ removeLeadingTrailingSpace res
-
--- | Parse any LaTeX inline command and return it in a raw TeX inline element.
-rawLaTeXInline' :: GenParser Char ParserState Inline
-rawLaTeXInline' = do
- notFollowedBy' $ oneOfStrings ["\\begin", "\\end", "\\item", "\\ignore",
- "\\section"]
- rawLaTeXInline
+ let header'' = if null header'
+ then replicate cols mempty
+ else header'
+ return $ table mempty (zip aligns (repeat 0)) header'' rows
--- | Parse any LaTeX command and return it in a raw TeX inline element.
-rawLaTeXInline :: GenParser Char ParserState Inline
-rawLaTeXInline = try $ do
- state <- getState
- if stateParseRaw state
- then command >>= demacro
- else do
- (name,st,args) <- command
- x <- demacro (name,st,args)
- unless (x == Str "" || name `elem` commandsToIgnore) $ do
- inp <- getInput
- setInput $ intercalate " " args ++ inp
- return $ Str ""
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index c78727715..8da0f7c16 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -39,7 +39,7 @@ import Text.Pandoc.Definition
import Text.Pandoc.Generic
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
-import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment' )
+import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag,
isTextTag, isCommentTag )
import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
@@ -729,8 +729,8 @@ rawVerbatimBlock = try $ do
rawTeXBlock :: GenParser Char ParserState Block
rawTeXBlock = do
failIfStrict
- result <- liftM (RawBlock "latex") rawLaTeXEnvironment'
- <|> liftM (RawBlock "context") rawConTeXtEnvironment'
+ result <- liftM (RawBlock "latex") rawLaTeXBlock
+ <|> liftM (RawBlock "context") rawConTeXtEnvironment
spaces
return result
@@ -933,8 +933,8 @@ inlineParsers = [ whitespace
, inlineNote -- after superscript because of ^[link](/foo)^
, autoLink
, rawHtmlInline
- , rawLaTeXInline'
, escapedChar
+ , rawLaTeXInline'
, exampleRef
, smartPunctuation inline
, charRef
@@ -977,8 +977,7 @@ symbol :: GenParser Char ParserState Inline
symbol = do
result <- noneOf "<\\\n\t "
<|> try (do lookAhead $ char '\\'
- notFollowedBy' $ rawLaTeXEnvironment'
- <|> rawConTeXtEnvironment'
+ notFollowedBy' rawTeXBlock
char '\\')
return $ Str [result]
@@ -1246,18 +1245,16 @@ inlineNote = try $ do
rawLaTeXInline' :: GenParser Char ParserState Inline
rawLaTeXInline' = try $ do
failIfStrict
- lookAhead $ char '\\'
- notFollowedBy' $ rawLaTeXEnvironment'
- <|> rawConTeXtEnvironment'
+ lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env
RawInline _ s <- rawLaTeXInline
return $ RawInline "tex" s -- "tex" because it might be context or latex
-rawConTeXtEnvironment' :: GenParser Char st String
-rawConTeXtEnvironment' = try $ do
+rawConTeXtEnvironment :: GenParser Char st String
+rawConTeXtEnvironment = try $ do
string "\\start"
completion <- inBrackets (letter <|> digit <|> spaceChar)
<|> (many1 letter)
- contents <- manyTill (rawConTeXtEnvironment' <|> (count 1 anyChar))
+ contents <- manyTill (rawConTeXtEnvironment <|> (count 1 anyChar))
(try $ string "\\stop" >> string completion)
return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion