aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs3
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs14
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs106
-rw-r--r--src/Text/Pandoc/Readers/RST.hs176
-rw-r--r--src/Text/Pandoc/Readers/TeXMath.hs250
5 files changed, 137 insertions, 412 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 5c188e3d9..6d54e7349 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -44,7 +44,8 @@ module Text.Pandoc.Readers.HTML (
import Text.ParserCombinators.Parsec
import Text.Pandoc.Definition
-import Text.Pandoc.Shared
+import Text.Pandoc.Shared
+import Text.Pandoc.Parsing
import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
import Data.Maybe ( fromMaybe )
import Data.List ( isPrefixOf, isSuffixOf, intercalate )
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 36940fab0..bbc5bb872 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -35,7 +35,8 @@ module Text.Pandoc.Readers.LaTeX (
import Text.ParserCombinators.Parsec
import Text.Pandoc.Definition
-import Text.Pandoc.Shared
+import Text.Pandoc.Shared
+import Text.Pandoc.Parsing
import Data.Maybe ( fromMaybe )
import Data.Char ( chr )
import Data.List ( isPrefixOf, isSuffixOf )
@@ -453,7 +454,7 @@ inline = choice [ str
, accentedChar
, nonbreakingSpace
, specialChar
- , rawLaTeXInline
+ , rawLaTeXInline'
, escapedChar
, unescapedChar
] <?> "inline"
@@ -771,11 +772,16 @@ footnote = try $ do
setInput rest
return $ Note blocks
+-- | Parse any LaTeX inline command and return it in a raw TeX inline element.
+rawLaTeXInline' :: GenParser Char ParserState Inline
+rawLaTeXInline' = do
+ notFollowedBy' $ oneOfStrings ["\\begin", "\\end", "\\item", "\\ignore",
+ "\\section"]
+ rawLaTeXInline
+
-- | Parse any LaTeX command and return it in a raw TeX inline element.
rawLaTeXInline :: GenParser Char ParserState Inline
rawLaTeXInline = try $ do
- notFollowedBy' $ oneOfStrings ["\\begin", "\\end", "\\item", "\\ignore",
- "\\section"]
state <- getState
if stateParseRaw state
then do
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 0d3b30d10..086f85bb4 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -32,12 +32,13 @@ module Text.Pandoc.Readers.Markdown (
) where
import Data.List ( transpose, isSuffixOf, sortBy, findIndex, intercalate )
+import qualified Data.Map as M
import Data.Ord ( comparing )
import Data.Char ( isAlphaNum )
import Data.Maybe
-import qualified Data.Map as M
import Text.Pandoc.Definition
-import Text.Pandoc.Shared
+import Text.Pandoc.Shared
+import Text.Pandoc.Parsing
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment' )
import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag,
anyHtmlInlineTag, anyHtmlTag,
@@ -68,7 +69,7 @@ setextHChars = "=-"
-- treat these as potentially non-text when parsing inline:
specialChars :: [Char]
-specialChars = "\\[]*_~`<>$!^-.&@'\"\8216\8217\8220\8221;"
+specialChars = "\\[]*_~`<>$!^-.&@'\";"
--
-- auxiliary functions
@@ -203,10 +204,10 @@ referenceKey = try $ do
tit <- option "" referenceTitle
blanklines
endPos <- getPosition
- let newkey = (lab, (escapeURI $ removeTrailingSpace src, tit))
+ let target = (escapeURI $ removeTrailingSpace src, tit)
st <- getState
let oldkeys = stateKeys st
- updateState $ \s -> s { stateKeys = newkey : oldkeys }
+ updateState $ \s -> s { stateKeys = M.insert (Key lab) target oldkeys }
-- return blanks so line count isn't affected
return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
@@ -716,7 +717,7 @@ dashedLine ch = do
-- Parse a table header with dashed lines of '-' preceded by
-- one (or zero) line of text.
simpleTableHeader :: Bool -- ^ Headerless table
- -> GenParser Char ParserState ([[Char]], [Alignment], [Int])
+ -> GenParser Char ParserState ([[Block]], [Alignment], [Int])
simpleTableHeader headless = try $ do
rawContent <- if headless
then return ""
@@ -735,7 +736,9 @@ simpleTableHeader headless = try $ do
let rawHeads' = if headless
then replicate (length dashes) ""
else rawHeads
- return (rawHeads', aligns, indices)
+ heads <- mapM (parseFromString (many plain)) $
+ map removeLeadingTrailingSpace rawHeads'
+ return (heads, aligns, indices)
-- Parse a table footer - dashed lines followed by blank line.
tableFooter :: GenParser Char ParserState [Char]
@@ -764,65 +767,27 @@ multilineRow :: [Int]
-> GenParser Char ParserState [[Block]]
multilineRow indices = do
colLines <- many1 (rawTableLine indices)
- optional blanklines
let cols = map unlines $ transpose colLines
mapM (parseFromString (many plain)) cols
--- Calculate relative widths of table columns, based on indices
-widthsFromIndices :: Int -- Number of columns on terminal
- -> [Int] -- Indices
- -> [Double] -- Fractional relative sizes of columns
-widthsFromIndices _ [] = []
-widthsFromIndices numColumns indices =
- let lengths' = zipWith (-) indices (0:indices)
- lengths = reverse $
- case reverse lengths' of
- [] -> []
- [x] -> [x]
- -- compensate for the fact that intercolumn
- -- spaces are counted in widths of all columns
- -- but the last...
- (x:y:zs) -> if x < y && y - x <= 2
- then y:y:zs
- else x:y:zs
- totLength = sum lengths
- quotient = if totLength > numColumns
- then fromIntegral totLength
- else fromIntegral numColumns
- fracs = map (\l -> (fromIntegral l) / quotient) lengths in
- tail fracs
-
-- Parses a table caption: inlines beginning with 'Table:'
-- and followed by blank lines.
tableCaption :: GenParser Char ParserState [Inline]
tableCaption = try $ do
skipNonindentSpaces
- string "Table:"
+ string ":" <|> string "Table:"
result <- many1 inline
blanklines
return $ normalizeSpaces result
--- Parse a table using 'headerParser', 'lineParser', and 'footerParser'.
-tableWith :: GenParser Char ParserState ([[Char]], [Alignment], [Int])
- -> ([Int] -> GenParser Char ParserState [[Block]])
- -> GenParser Char ParserState end
- -> GenParser Char ParserState Block
-tableWith headerParser lineParser footerParser = try $ do
- (rawHeads, aligns, indices) <- headerParser
- lines' <- many1Till (lineParser indices) footerParser
- caption <- option [] tableCaption
- heads <- mapM (parseFromString (many plain)) rawHeads
- state <- getState
- let numColumns = stateColumns state
- let widths = widthsFromIndices numColumns indices
- return $ Table caption aligns widths heads lines'
-
-- Parse a simple table with '---' header and one line per row.
simpleTable :: Bool -- ^ Headerless table
-> GenParser Char ParserState Block
simpleTable headless = do
Table c a _w h l <- tableWith (simpleTableHeader headless) tableLine
- (if headless then tableFooter else tableFooter <|> blanklines)
+ (return ())
+ (if headless then tableFooter else tableFooter <|> blanklines)
+ tableCaption
-- Simple tables get 0s for relative column widths (i.e., use default)
return $ Table c a (replicate (length a) 0) h l
@@ -833,10 +798,10 @@ simpleTable headless = do
multilineTable :: Bool -- ^ Headerless table
-> GenParser Char ParserState Block
multilineTable headless =
- tableWith (multilineTableHeader headless) multilineRow tableFooter
+ tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter tableCaption
multilineTableHeader :: Bool -- ^ Headerless table
- -> GenParser Char ParserState ([String], [Alignment], [Int])
+ -> GenParser Char ParserState ([[Block]], [Alignment], [Int])
multilineTableHeader headless = try $ do
if headless
then return '\n'
@@ -860,7 +825,9 @@ multilineTableHeader headless = try $ do
let rawHeads = if headless
then replicate (length dashes) ""
else map (intercalate " ") rawHeadsList
- return ((map removeLeadingTrailingSpace rawHeads), aligns, indices)
+ heads <- mapM (parseFromString (many plain)) $
+ map removeLeadingTrailingSpace rawHeads
+ return (heads, aligns, indices)
-- Returns an alignment type for a table, based on a list of strings
-- (the rows of the column header) and a number (the length of the
@@ -880,9 +847,14 @@ alignType strLst len =
(True, True) -> AlignCenter
(False, False) -> AlignDefault
+gridTable :: Bool -- ^ Headerless table
+ -> GenParser Char ParserState Block
+gridTable = gridTableWith block tableCaption
+
table :: GenParser Char ParserState Block
table = multilineTable False <|> simpleTable True <|>
- simpleTable False <|> multilineTable True <?> "table"
+ simpleTable False <|> multilineTable True <|>
+ gridTable False <|> gridTable True <?> "table"
--
-- inline
@@ -1081,30 +1053,28 @@ failIfInQuoteContext context = do
singleQuoteStart :: GenParser Char ParserState Char
singleQuoteStart = do
failIfInQuoteContext InSingleQuote
- char '\8216' <|>
- (try $ do char '\''
- notFollowedBy (oneOf ")!],.;:-? \t\n")
- notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
- satisfy (not . isAlphaNum)))
- -- possess/contraction
- return '\'')
+ try $ do char '\''
+ notFollowedBy (oneOf ")!],.;:-? \t\n")
+ notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
+ satisfy (not . isAlphaNum)))
+ -- possess/contraction
+ return '\''
singleQuoteEnd :: GenParser Char st Char
singleQuoteEnd = try $ do
- char '\8217' <|> char '\''
+ char '\''
notFollowedBy alphaNum
return '\''
doubleQuoteStart :: GenParser Char ParserState Char
doubleQuoteStart = do
failIfInQuoteContext InDoubleQuote
- char '\8220' <|>
- (try $ do char '"'
- notFollowedBy (oneOf " \t\n")
- return '"')
+ try $ do char '"'
+ notFollowedBy (oneOf " \t\n")
+ return '"'
doubleQuoteEnd :: GenParser Char st Char
-doubleQuoteEnd = char '\8221' <|> char '"'
+doubleQuoteEnd = char '"'
ellipses :: GenParser Char st Inline
ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses
@@ -1229,7 +1199,7 @@ referenceLink lab = do
optional (newline >> skipSpaces) >> reference))
let ref' = if null ref then lab else ref
state <- getState
- case lookupKeySrc (stateKeys state) ref' of
+ case lookupKeySrc (stateKeys state) (Key ref') of
Nothing -> fail "no corresponding key"
Just target -> return target
@@ -1314,7 +1284,7 @@ inlineCitation = try $ do
chkCit :: Target -> GenParser Char ParserState (Maybe Target)
chkCit t = do
st <- getState
- case lookupKeySrc (stateKeys st) [Str $ fst t] of
+ case lookupKeySrc (stateKeys st) (Key [Str $ fst t]) of
Just _ -> fail "This is a link"
Nothing -> if elem (fst t) $ stateCitations st
then return $ Just t
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index c293c4fcd..13afe5053 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -31,10 +31,13 @@ module Text.Pandoc.Readers.RST (
readRST
) where
import Text.Pandoc.Definition
-import Text.Pandoc.Shared
+import Text.Pandoc.Shared
+import Text.Pandoc.Parsing
import Text.ParserCombinators.Parsec
-import Control.Monad ( when, unless, liftM )
-import Data.List ( findIndex, delete, intercalate, transpose )
+import Control.Monad ( when, unless )
+import Data.List ( findIndex, intercalate, transpose, sort )
+import qualified Data.Map as M
+import Text.Printf ( printf )
-- | Parse reStructuredText string and return Pandoc document.
readRST :: ParserState -- ^ Parser state, including options for parser
@@ -93,9 +96,6 @@ parseRST = do
docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= return . concat
setInput docMinusKeys
setPosition startPos
- st <- getState
- let reversedKeys = stateKeys st
- updateState $ \s -> s { stateKeys = reverse reversedKeys }
-- now parse it for real...
blocks <- parseBlocks
let blocks' = filter (/= Null) blocks
@@ -540,10 +540,10 @@ referenceName = quotedReferenceName <|>
referenceKey :: GenParser Char ParserState [Char]
referenceKey = do
startPos <- getPosition
- key <- choice [imageKey, anonymousKey, regularKey]
+ (key, target) <- choice [imageKey, anonymousKey, regularKey]
st <- getState
let oldkeys = stateKeys st
- updateState $ \s -> s { stateKeys = key : oldkeys }
+ updateState $ \s -> s { stateKeys = M.insert key target oldkeys }
optional blanklines
endPos <- getPosition
-- return enough blanks to replace key
@@ -558,28 +558,29 @@ targetURI = do
blanklines
return $ escapeURI $ removeLeadingTrailingSpace $ contents
-imageKey :: GenParser Char ParserState ([Inline], (String, [Char]))
+imageKey :: GenParser Char ParserState (Key, Target)
imageKey = try $ do
string ".. |"
ref <- manyTill inline (char '|')
skipSpaces
string "image::"
src <- targetURI
- return (normalizeSpaces ref, (src, ""))
+ return (Key (normalizeSpaces ref), (src, ""))
-anonymousKey :: GenParser Char st ([Inline], (String, [Char]))
+anonymousKey :: GenParser Char st (Key, Target)
anonymousKey = try $ do
oneOfStrings [".. __:", "__"]
src <- targetURI
- return ([Str "_"], (src, ""))
+ pos <- getPosition
+ return (Key [Str $ "_" ++ printf "%09d" (sourceLine pos)], (src, ""))
-regularKey :: GenParser Char ParserState ([Inline], (String, [Char]))
+regularKey :: GenParser Char ParserState (Key, Target)
regularKey = try $ do
string ".. _"
ref <- referenceName
char ':'
src <- targetURI
- return (normalizeSpaces ref, (src, ""))
+ return (Key (normalizeSpaces ref), (src, ""))
--
-- tables
@@ -607,41 +608,20 @@ dashedLine ch = do
simpleDashedLines :: Char -> GenParser Char st [(Int,Int)]
simpleDashedLines ch = try $ many1 (dashedLine ch)
-gridPart :: Char -> GenParser Char st (Int, Int)
-gridPart ch = do
- dashes <- many1 (char ch)
- char '+'
- return (length dashes, length dashes + 1)
-
-gridDashedLines :: Char -> GenParser Char st [(Int,Int)]
-gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline
-
-- Parse a table row separator
simpleTableSep :: Char -> GenParser Char ParserState Char
simpleTableSep ch = try $ simpleDashedLines ch >> newline
-gridTableSep :: Char -> GenParser Char ParserState Char
-gridTableSep ch = try $ gridDashedLines ch >> return '\n'
-
-- Parse a table footer
simpleTableFooter :: GenParser Char ParserState [Char]
simpleTableFooter = try $ simpleTableSep '=' >> blanklines
-gridTableFooter :: GenParser Char ParserState [Char]
-gridTableFooter = blanklines
-
-- Parse a raw line and split it into chunks by indices.
simpleTableRawLine :: [Int] -> GenParser Char ParserState [String]
simpleTableRawLine indices = do
line <- many1Till anyChar newline
return (simpleTableSplitLine indices line)
-gridTableRawLine :: [Int] -> GenParser Char ParserState [String]
-gridTableRawLine indices = do
- char '|'
- line <- many1Till anyChar newline
- return (gridTableSplitLine indices $ removeTrailingSpace line)
-
-- Parse a table row and return a list of blocks (columns).
simpleTableRow :: [Int] -> GenParser Char ParserState [[Block]]
simpleTableRow indices = do
@@ -651,64 +631,13 @@ simpleTableRow indices = do
let cols = map unlines . transpose $ firstLine : colLines
mapM (parseFromString (many plain)) cols
-gridTableRow :: [Int]
- -> GenParser Char ParserState [[Block]]
-gridTableRow indices = do
- colLines <- many1 (gridTableRawLine indices)
- let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $
- transpose colLines
- mapM (liftM compactifyCell . parseFromString (many block)) cols
-
-compactifyCell :: [Block] -> [Block]
-compactifyCell bs = head $ compactify [bs]
-
simpleTableSplitLine :: [Int] -> String -> [String]
simpleTableSplitLine indices line =
map removeLeadingTrailingSpace
$ tail $ splitByIndices (init indices) line
-gridTableSplitLine :: [Int] -> String -> [String]
-gridTableSplitLine indices line =
- map removeFinalBar $ tail $ splitByIndices (init indices) line
-
-removeFinalBar :: String -> String
-removeFinalBar = reverse . dropWhile (=='|') . dropWhile (`elem` " \t") .
- reverse
-
-removeOneLeadingSpace :: [String] -> [String]
-removeOneLeadingSpace xs =
- if all startsWithSpace xs
- then map (drop 1) xs
- else xs
- where startsWithSpace "" = True
- startsWithSpace (y:_) = y == ' '
-
--- Calculate relative widths of table columns, based on indices
-widthsFromIndices :: Int -- Number of columns on terminal
- -> [Int] -- Indices
- -> [Double] -- Fractional relative sizes of columns
-widthsFromIndices _ [] = []
-widthsFromIndices numColumns indices =
- let lengths' = zipWith (-) indices (0:indices)
- lengths = reverse $
- case reverse lengths' of
- [] -> []
- [x] -> [x]
- -- compensate for the fact that intercolumn
- -- spaces are counted in widths of all columns
- -- but the last...
- (x:y:zs) -> if x < y && y - x <= 2
- then y:y:zs
- else x:y:zs
- totLength = sum lengths
- quotient = if totLength > numColumns
- then fromIntegral totLength
- else fromIntegral numColumns
- fracs = map (\l -> (fromIntegral l) / quotient) lengths in
- tail fracs
-
simpleTableHeader :: Bool -- ^ Headerless table
- -> GenParser Char ParserState ([[Char]], [Alignment], [Int])
+ -> GenParser Char ParserState ([[Block]], [Alignment], [Int])
simpleTableHeader headless = try $ do
optional blanklines
rawContent <- if headless
@@ -722,64 +651,23 @@ simpleTableHeader headless = try $ do
let rawHeads = if headless
then replicate (length dashes) ""
else simpleTableSplitLine indices rawContent
- return (rawHeads, aligns, indices)
+ heads <- mapM (parseFromString (many plain)) $
+ map removeLeadingTrailingSpace rawHeads
+ return (heads, aligns, indices)
-gridTableHeader :: Bool -- ^ Headerless table
- -> GenParser Char ParserState ([String], [Alignment], [Int])
-gridTableHeader headless = try $ do
- optional blanklines
- dashes <- gridDashedLines '-'
- rawContent <- if headless
- then return $ repeat ""
- else many1
- (notFollowedBy (gridTableSep '=') >> char '|' >> many1Till anyChar newline)
- if headless
- then return ()
- else gridTableSep '=' >> return ()
- let lines' = map snd dashes
- let indices = scanl (+) 0 lines'
- let aligns = replicate (length lines') AlignDefault -- RST does not have a notion of alignments
- let rawHeads = if headless
- then replicate (length dashes) ""
- else map (intercalate " ") $ transpose
- $ map (gridTableSplitLine indices) rawContent
- return (rawHeads, aligns, indices)
-
--- Parse a table using 'headerParser', 'lineParser', and 'footerParser'.
-tableWith :: GenParser Char ParserState ([[Char]], [Alignment], [Int])
- -> ([Int] -> GenParser Char ParserState [[Block]])
- -> GenParser Char ParserState sep
- -> GenParser Char ParserState end
- -> GenParser Char ParserState Block
-tableWith headerParser rowParser lineParser footerParser = try $ do
- (rawHeads, aligns, indices) <- headerParser
- lines' <- rowParser indices `sepEndBy` lineParser
- footerParser
- heads <- mapM (parseFromString (many plain)) rawHeads
- state <- getState
- let captions = [] -- no notion of captions in RST
- let numColumns = stateColumns state
- let widths = widthsFromIndices numColumns indices
- return $ Table captions aligns widths heads lines'
-
--- Parse a simple table with '---' header and one line per row.
+-- Parse a simple table.
simpleTable :: Bool -- ^ Headerless table
-> GenParser Char ParserState Block
simpleTable headless = do
- Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter
+ Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter (return [])
-- Simple tables get 0s for relative column widths (i.e., use default)
return $ Table c a (replicate (length a) 0) h l
where
sep = return () -- optional (simpleTableSep '-')
--- Parse a grid table: starts with row of '-' on top, then header
--- (which may be grid), then the rows,
--- which may be grid, separated by blank lines, and
--- ending with a footer (dashed line followed by blank line).
gridTable :: Bool -- ^ Headerless table
- -> GenParser Char ParserState Block
-gridTable headless =
- tableWith (gridTableHeader headless) gridTableRow (gridTableSep '-') gridTableFooter
+ -> GenParser Char ParserState Block
+gridTable = gridTableWith block (return [])
table :: GenParser Char ParserState Block
table = gridTable False <|> simpleTable False <|>
@@ -889,17 +777,21 @@ explicitLink = try $ do
referenceLink :: GenParser Char ParserState Inline
referenceLink = try $ do
label' <- (quotedReferenceName <|> simpleReferenceName) >>~ char '_'
- key <- option label' (do{char '_'; return [Str "_"]}) -- anonymous link
state <- getState
let keyTable = stateKeys state
+ let isAnonKey (Key [Str ('_':_)]) = True
+ isAnonKey _ = False
+ key <- option (Key label') $
+ do char '_'
+ let anonKeys = sort $ filter isAnonKey $ M.keys keyTable
+ if null anonKeys
+ then pzero
+ else return (head anonKeys)
(src,tit) <- case lookupKeySrc keyTable key of
Nothing -> fail "no corresponding key"
Just target -> return target
- -- if anonymous link, remove first anon key so it won't be used again
- let keyTable' = if (key == [Str "_"]) -- anonymous link?
- then delete ([Str "_"], (src,tit)) keyTable -- remove first anon key
- else keyTable
- setState $ state { stateKeys = keyTable' }
+ -- if anonymous link, remove key so it won't be used again
+ when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable }
return $ Link (normalizeSpaces label') (src, tit)
autoURI :: GenParser Char ParserState Inline
@@ -922,7 +814,7 @@ image = try $ do
ref <- manyTill inline (char '|')
state <- getState
let keyTable = stateKeys state
- (src,tit) <- case lookupKeySrc keyTable ref of
+ (src,tit) <- case lookupKeySrc keyTable (Key ref) of
Nothing -> fail "no corresponding key"
Just target -> return target
return $ Image (normalizeSpaces ref) (src, tit)
diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs
index 080354be1..40cf39987 100644
--- a/src/Text/Pandoc/Readers/TeXMath.hs
+++ b/src/Text/Pandoc/Readers/TeXMath.hs
@@ -28,208 +28,64 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of TeX math to a list of 'Pandoc' inline elements.
-}
module Text.Pandoc.Readers.TeXMath (
- readTeXMath
+ readTeXMath
) where
import Text.ParserCombinators.Parsec
import Text.Pandoc.Definition
+import Text.TeXMath.Parser
--- | Converts a string of raw TeX math to a list of 'Pandoc' inlines.
+-- | Converts a raw TeX math formula to a list of 'Pandoc' inlines.
+-- Defaults to raw formula between @$@ characters if entire formula
+-- can't be converted.
readTeXMath :: String -- ^ String to parse (assumes @'\n'@ line endings)
-> [Inline]
-readTeXMath inp = case parse teXMath ("formula: " ++ inp) inp of
- Left _ -> [Str inp] -- if unparseable, just include original
- Right res -> res
-
-teXMath :: GenParser Char st [Inline]
-teXMath = manyTill mathPart eof >>= return . concat
-
-mathPart :: GenParser Char st [Inline]
-mathPart = whitespace <|> superscript <|> subscript <|> symbol <|>
- argument <|> digits <|> letters <|> misc
-
-whitespace :: GenParser Char st [Inline]
-whitespace = many1 space >> return []
-
-symbol :: GenParser Char st [Inline]
-symbol = try $ do
- char '\\'
- res <- many1 letter
- case lookup res teXsymbols of
- Just m -> return [Str m]
- Nothing -> return [Str $ "\\" ++ res]
-
-argument :: GenParser Char st [Inline]
-argument = try $ do
- char '{'
- res <- many mathPart
- char '}'
- return $ if null res
- then [Str " "]
- else [Str "{"] ++ concat res ++ [Str "}"]
-
-digits :: GenParser Char st [Inline]
-digits = do
- res <- many1 digit
- return [Str res]
-
-letters :: GenParser Char st [Inline]
-letters = do
- res <- many1 letter
- return [Emph [Str res]]
-
-misc :: GenParser Char st [Inline]
-misc = do
- res <- noneOf "}"
- return [Str [res]]
-
-scriptArg :: GenParser Char st [Inline]
-scriptArg = try $ do
- (try (do{char '{'; r <- many mathPart; char '}'; return $ concat r}))
- <|> symbol
- <|> (do{c <- (letter <|> digit); return [Str [c]]})
-
-superscript :: GenParser Char st [Inline]
-superscript = try $ do
- char '^'
- arg <- scriptArg
- return [Superscript arg]
-
-subscript :: GenParser Char st [Inline]
-subscript = try $ do
- char '_'
- arg <- scriptArg
- return [Subscript arg]
-
-withThinSpace :: String -> String
-withThinSpace str = "\x2009" ++ str ++ "\x2009"
-
-teXsymbols :: [(String, String)]
-teXsymbols =
- [("alpha","\x3B1")
- ,("beta", "\x3B2")
- ,("chi", "\x3C7")
- ,("delta", "\x3B4")
- ,("Delta", "\x394")
- ,("epsilon", "\x3B5")
- ,("varepsilon", "\x25B")
- ,("eta", "\x3B7")
- ,("gamma", "\x3B3")
- ,("Gamma", "\x393")
- ,("iota", "\x3B9")
- ,("kappa", "\x3BA")
- ,("lambda", "\x3BB")
- ,("Lambda", "\x39B")
- ,("mu", "\x3BC")
- ,("nu", "\x3BD")
- ,("omega", "\x3C9")
- ,("Omega", "\x3A9")
- ,("phi", "\x3C6")
- ,("varphi", "\x3D5")
- ,("Phi", "\x3A6")
- ,("pi", "\x3C0")
- ,("Pi", "\x3A0")
- ,("psi", "\x3C8")
- ,("Psi", "\x3A8")
- ,("rho", "\x3C1")
- ,("sigma", "\x3C3")
- ,("Sigma", "\x3A3")
- ,("tau", "\x3C4")
- ,("theta", "\x3B8")
- ,("vartheta", "\x3D1")
- ,("Theta", "\x398")
- ,("upsilon", "\x3C5")
- ,("xi", "\x3BE")
- ,("Xi", "\x39E")
- ,("zeta", "\x3B6")
- ,("ne", "\x2260")
- ,("lt", withThinSpace "<")
- ,("le", withThinSpace "\x2264")
- ,("leq", withThinSpace "\x2264")
- ,("ge", withThinSpace "\x2265")
- ,("geq", withThinSpace "\x2265")
- ,("prec", withThinSpace "\x227A")
- ,("succ", withThinSpace "\x227B")
- ,("preceq", withThinSpace "\x2AAF")
- ,("succeq", withThinSpace "\x2AB0")
- ,("in", withThinSpace "\x2208")
- ,("notin", withThinSpace "\x2209")
- ,("subset", withThinSpace "\x2282")
- ,("supset", withThinSpace "\x2283")
- ,("subseteq", withThinSpace "\x2286")
- ,("supseteq", withThinSpace "\x2287")
- ,("equiv", withThinSpace "\x2261")
- ,("cong", withThinSpace "\x2245")
- ,("approx", withThinSpace "\x2248")
- ,("propto", withThinSpace "\x221D")
- ,("cdot", withThinSpace "\x22C5")
- ,("star", withThinSpace "\x22C6")
- ,("backslash", "\\")
- ,("times", withThinSpace "\x00D7")
- ,("divide", withThinSpace "\x00F7")
- ,("circ", withThinSpace "\x2218")
- ,("oplus", withThinSpace "\x2295")
- ,("otimes", withThinSpace "\x2297")
- ,("odot", withThinSpace "\x2299")
- ,("sum", "\x2211")
- ,("prod", "\x220F")
- ,("wedge", withThinSpace "\x2227")
- ,("bigwedge", withThinSpace "\x22C0")
- ,("vee", withThinSpace "\x2228")
- ,("bigvee", withThinSpace "\x22C1")
- ,("cap", withThinSpace "\x2229")
- ,("bigcap", withThinSpace "\x22C2")
- ,("cup", withThinSpace "\x222A")
- ,("bigcup", withThinSpace "\x22C3")
- ,("neg", "\x00AC")
- ,("implies", withThinSpace "\x21D2")
- ,("iff", withThinSpace "\x21D4")
- ,("forall", "\x2200")
- ,("exists", "\x2203")
- ,("bot", "\x22A5")
- ,("top", "\x22A4")
- ,("vdash", "\x22A2")
- ,("models", withThinSpace "\x22A8")
- ,("uparrow", "\x2191")
- ,("downarrow", "\x2193")
- ,("rightarrow", withThinSpace "\x2192")
- ,("to", withThinSpace "\x2192")
- ,("rightarrowtail", "\x21A3")
- ,("twoheadrightarrow", withThinSpace "\x21A0")
- ,("twoheadrightarrowtail", withThinSpace "\x2916")
- ,("mapsto", withThinSpace "\x21A6")
- ,("leftarrow", withThinSpace "\x2190")
- ,("leftrightarrow", withThinSpace "\x2194")
- ,("Rightarrow", withThinSpace "\x21D2")
- ,("Leftarrow", withThinSpace "\x21D0")
- ,("Leftrightarrow", withThinSpace "\x21D4")
- ,("partial", "\x2202")
- ,("nabla", "\x2207")
- ,("pm", "\x00B1")
- ,("emptyset", "\x2205")
- ,("infty", "\x221E")
- ,("aleph", "\x2135")
- ,("ldots", "...")
- ,("therefore", "\x2234")
- ,("angle", "\x2220")
- ,("quad", "\x00A0\x00A0")
- ,("cdots", "\x22EF")
- ,("vdots", "\x22EE")
- ,("ddots", "\x22F1")
- ,("diamond", "\x22C4")
- ,("Box", "\x25A1")
- ,("lfloor", "\x230A")
- ,("rfloor", "\x230B")
- ,("lceiling", "\x2308")
- ,("rceiling", "\x2309")
- ,("langle", "\x2329")
- ,("rangle", "\x232A")
- ,("int", "\8747")
- ,("{", "{")
- ,("}", "}")
- ,("[", "[")
- ,("]", "]")
- ,("|", "|")
- ,("||", "||")
- ]
+readTeXMath inp = case readTeXMath' inp of
+ Nothing -> [Str ("$" ++ inp ++ "$")]
+ Just res -> res
+
+-- | Like 'readTeXMath', but without the default.
+readTeXMath' :: String -- ^ String to parse (assumes @'\n'@ line endings)
+ -> Maybe [Inline]
+readTeXMath' inp = case parse formula "formula" inp of
+ Left _ -> Just [Str inp]
+ Right exps -> expsToInlines exps
+
+expsToInlines :: [Exp] -> Maybe [Inline]
+expsToInlines xs = do
+ res <- mapM expToInlines xs
+ return (concat res)
+
+expToInlines :: Exp -> Maybe [Inline]
+expToInlines (ENumber s) = Just [Str s]
+expToInlines (EIdentifier s) = Just [Emph [Str s]]
+expToInlines (EMathOperator s) = Just [Str s]
+expToInlines (ESymbol t s) = Just $ addSpace t (Str s)
+ where addSpace Op x = [x, thinspace]
+ addSpace Bin x = [medspace, x, medspace]
+ addSpace Rel x = [widespace, x, widespace]
+ addSpace Pun x = [x, thinspace]
+ addSpace _ x = [x]
+ thinspace = Str "\x2006"
+ medspace = Str "\x2005"
+ widespace = Str "\x2004"
+expToInlines (EStretchy x) = expToInlines x
+expToInlines (EGrouped xs) = expsToInlines xs
+expToInlines (ESpace _) = Just [Str " "] -- variable widths not supported
+expToInlines (EBinary _ _ _) = Nothing
+expToInlines (ESub x y) = do
+ x' <- expToInlines x
+ y' <- expToInlines y
+ return $ x' ++ [Subscript y']
+expToInlines (ESuper x y) = do
+ x' <- expToInlines x
+ y' <- expToInlines y
+ return $ x' ++ [Superscript y']
+expToInlines (ESubsup x y z) = do
+ x' <- expToInlines x
+ y' <- expToInlines y
+ z' <- expToInlines z
+ return $ x' ++ [Subscript y'] ++ [Superscript z']
+expToInlines (EText _ x) = Just [Emph [Str x]]
+expToInlines _ = Nothing