aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs84
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs28
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs127
-rw-r--r--src/Text/Pandoc/Readers/RST.hs136
-rw-r--r--src/Text/Pandoc/Readers/TeXMath.hs24
5 files changed, 181 insertions, 218 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 5c188e3d9..5ccbc4fb1 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -44,13 +44,14 @@ module Text.Pandoc.Readers.HTML (
import Text.ParserCombinators.Parsec
import Text.Pandoc.Definition
-import Text.Pandoc.Shared
+import Text.Pandoc.Shared
+import Text.Pandoc.Parsing
import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
import Data.Maybe ( fromMaybe )
import Data.List ( isPrefixOf, isSuffixOf, intercalate )
import Data.Char ( toLower, isAlphaNum )
import Network.URI ( parseURIReference, URI (..) )
-import Control.Monad ( liftM )
+import Control.Monad ( liftM, when )
-- | Convert HTML-formatted string to 'Pandoc' document.
readHtml :: ParserState -- ^ Parser state
@@ -198,11 +199,11 @@ inlinesTilEnd tag = manyTill inline (htmlEndTag tag)
-- | Parse blocks between open and close tag.
blocksIn :: String -> GenParser Char ParserState [Block]
-blocksIn tag = try $ htmlTag tag >> spaces >> blocksTilEnd tag
+blocksIn tag = try $ htmlOpenTag tag >> spaces >> blocksTilEnd tag
-- | Parse inlines between open and close tag.
inlinesIn :: String -> GenParser Char ParserState [Inline]
-inlinesIn tag = try $ htmlTag tag >> spaces >> inlinesTilEnd tag
+inlinesIn tag = try $ htmlOpenTag tag >> spaces >> inlinesTilEnd tag
-- | Extract type from a tag: e.g. @br@ from @\<br\>@
extractTagType :: String -> String
@@ -258,18 +259,33 @@ anyHtmlEndTag = try $ do
then return $ "<!-- unsafe HTML removed -->"
else return result
-htmlTag :: String -> GenParser Char ParserState (String, [(String, String)])
-htmlTag tag = try $ do
+htmlTag :: Bool
+ -> String
+ -> GenParser Char ParserState (String, [(String, String)])
+htmlTag selfClosing tag = try $ do
char '<'
spaces
stringAnyCase tag
attribs <- many htmlAttribute
spaces
- optional (string "/")
- spaces
+ -- note: we want to handle both HTML and XHTML,
+ -- so we don't require the /
+ when selfClosing $ optional $ char '/' >> spaces
char '>'
return (tag, (map (\(name, content, _) -> (name, content)) attribs))
+htmlOpenTag :: String
+ -> GenParser Char ParserState (String, [(String, String)])
+htmlOpenTag = htmlTag False
+
+htmlCloseTag :: String
+ -> GenParser Char ParserState (String, [(String, String)])
+htmlCloseTag = htmlTag False . ('/':)
+
+htmlSelfClosingTag :: String
+ -> GenParser Char ParserState (String, [(String, String)])
+htmlSelfClosingTag = htmlTag True
+
-- parses a quoted html attribute value
quoted :: Char -> GenParser Char st (String, String)
quoted quoteChar = do
@@ -344,7 +360,7 @@ anyHtmlInlineTag = try $ do
-- Scripts must be treated differently, because they can contain '<>' etc.
htmlScript :: GenParser Char ParserState [Char]
htmlScript = try $ do
- lookAhead $ htmlTag "script"
+ lookAhead $ htmlOpenTag "script"
open <- anyHtmlTag
rest <- liftM concat $ manyTill scriptChunk (htmlEndTag "script")
st <- getState
@@ -379,7 +395,7 @@ scriptChunk = jsComment <|> jsString <|> jsChars
-- Style tags must be treated differently, because they can contain CSS
htmlStyle :: GenParser Char ParserState [Char]
htmlStyle = try $ do
- lookAhead $ htmlTag "style"
+ lookAhead $ htmlOpenTag "style"
open <- anyHtmlTag
rest <- manyTill anyChar (htmlEndTag "style")
st <- getState
@@ -411,7 +427,8 @@ rawVerbatimBlock = try $ do
-- We don't want to parse </body> or </html> as raw HTML, since these
-- are handled in parseHtml.
rawHtmlBlock' :: GenParser Char ParserState Block
-rawHtmlBlock' = do notFollowedBy' (htmlTag "/body" <|> htmlTag "/html")
+rawHtmlBlock' = do notFollowedBy' (htmlCloseTag "body" <|>
+ htmlCloseTag "html")
rawHtmlBlock
-- | Parses an HTML comment.
@@ -441,13 +458,13 @@ definition = try $ do
nonTitleNonHead :: GenParser Char ParserState Char
nonTitleNonHead = try $ do
- notFollowedBy $ (htmlTag "title" >> return ' ') <|>
+ notFollowedBy $ (htmlOpenTag "title" >> return ' ') <|>
(htmlEndTag "head" >> return ' ')
(rawHtmlBlock >> return ' ') <|> anyChar
parseTitle :: GenParser Char ParserState [Inline]
parseTitle = try $ do
- (tag, _) <- htmlTag "title"
+ (tag, _) <- htmlOpenTag "title"
contents <- inlinesTilEnd tag
spaces
return contents
@@ -455,7 +472,7 @@ parseTitle = try $ do
-- parse header and return meta-information (for now, just title)
parseHead :: GenParser Char ParserState Meta
parseHead = try $ do
- htmlTag "head"
+ htmlOpenTag "head"
spaces
skipMany nonTitleNonHead
contents <- option [] parseTitle
@@ -463,13 +480,10 @@ parseHead = try $ do
htmlEndTag "head"
return $ Meta contents [] []
-skipHtmlTag :: String -> GenParser Char ParserState ()
-skipHtmlTag tag = optional (htmlTag tag)
-
-- h1 class="title" representation of title in body
bodyTitle :: GenParser Char ParserState [Inline]
bodyTitle = try $ do
- (_, attribs) <- htmlTag "h1"
+ (_, attribs) <- htmlOpenTag "h1"
case (extractAttribute "class" attribs) of
Just "title" -> return ""
_ -> fail "not title"
@@ -487,11 +501,11 @@ parseHtml :: GenParser Char ParserState Pandoc
parseHtml = do
sepEndBy (choice [xmlDec, definition, htmlComment]) spaces
spaces
- skipHtmlTag "html"
+ optional $ htmlOpenTag "html"
spaces
meta <- option (Meta [] [] []) parseHead
spaces
- skipHtmlTag "body"
+ optional $ htmlOpenTag "body"
spaces
optional bodyTitle -- skip title in body, because it's represented in meta
blocks <- parseBlocks
@@ -527,7 +541,7 @@ header = choice (map headerLevel (enumFromTo 1 5)) <?> "header"
headerLevel :: Int -> GenParser Char ParserState Block
headerLevel n = try $ do
let level = "h" ++ show n
- htmlTag level
+ htmlOpenTag level
contents <- inlinesTilEnd level
return $ Header n (normalizeSpaces contents)
@@ -537,7 +551,7 @@ headerLevel n = try $ do
hrule :: GenParser Char ParserState Block
hrule = try $ do
- (_, attribs) <- htmlTag "hr"
+ (_, attribs) <- htmlSelfClosingTag "hr"
state <- getState
if not (null attribs) && stateParseRaw state
then unexpected "attributes in hr" -- parse as raw in this case
@@ -551,7 +565,7 @@ hrule = try $ do
-- skipped, because they are not portable to output formats other than HTML.
codeBlock :: GenParser Char ParserState Block
codeBlock = try $ do
- htmlTag "pre"
+ htmlOpenTag "pre"
result <- manyTill
(many1 (satisfy (/= '<')) <|>
((anyHtmlTag <|> anyHtmlEndTag) >> return ""))
@@ -572,7 +586,7 @@ codeBlock = try $ do
--
blockQuote :: GenParser Char ParserState Block
-blockQuote = try $ htmlTag "blockquote" >> spaces >>
+blockQuote = try $ htmlOpenTag "blockquote" >> spaces >>
blocksTilEnd "blockquote" >>= (return . BlockQuote)
--
@@ -584,7 +598,7 @@ list = choice [ bulletList, orderedList, definitionList ] <?> "list"
orderedList :: GenParser Char ParserState Block
orderedList = try $ do
- (_, attribs) <- htmlTag "ol"
+ (_, attribs) <- htmlOpenTag "ol"
(start, style) <- option (1, DefaultStyle) $
do failIfStrict
let sta = fromMaybe "1" $
@@ -609,7 +623,7 @@ orderedList = try $ do
bulletList :: GenParser Char ParserState Block
bulletList = try $ do
- htmlTag "ul"
+ htmlOpenTag "ul"
spaces
-- note: if they have an <ol> or <ul> not in scope of a <li>,
-- treat it as a list item, though it's not valid xhtml...
@@ -620,7 +634,7 @@ bulletList = try $ do
definitionList :: GenParser Char ParserState Block
definitionList = try $ do
failIfStrict -- def lists not part of standard markdown
- htmlTag "dl"
+ htmlOpenTag "dl"
spaces
items <- sepEndBy1 definitionListItem spaces
htmlEndTag "dl"
@@ -638,7 +652,7 @@ definitionListItem = try $ do
--
para :: GenParser Char ParserState Block
-para = try $ htmlTag "p" >> inlinesTilEnd "p" >>=
+para = try $ htmlOpenTag "p" >> inlinesTilEnd "p" >>=
return . Para . normalizeSpaces
--
@@ -672,8 +686,8 @@ inline = choice [ charRef
code :: GenParser Char ParserState Inline
code = try $ do
- htmlTag "code"
- result <- manyTill anyChar (htmlEndTag "code")
+ result <- (htmlOpenTag "code" >> manyTill (noneOf "<>") (htmlEndTag "code"))
+ <|> (htmlOpenTag "tt" >> manyTill (noneOf "<>") (htmlEndTag "tt"))
-- remove internal line breaks, leading and trailing space,
-- and decode character references
return $ Code $ decodeCharacterReferences $ removeLeadingTrailingSpace $
@@ -686,7 +700,7 @@ rawHtmlInline = do
if stateParseRaw state then return (HtmlInline result) else return (Str "")
betweenTags :: [Char] -> GenParser Char ParserState [Inline]
-betweenTags tag = try $ htmlTag tag >> inlinesTilEnd tag >>=
+betweenTags tag = try $ htmlOpenTag tag >> inlinesTilEnd tag >>=
return . normalizeSpaces
emph :: GenParser Char ParserState Inline
@@ -708,7 +722,7 @@ strikeout = failIfStrict >> (betweenTags "s" <|> betweenTags "strike") >>=
spanStrikeout :: GenParser Char ParserState Inline
spanStrikeout = try $ do
failIfStrict -- strict markdown has no strikeout, so treat as raw HTML
- (_, attributes) <- htmlTag "span"
+ (_, attributes) <- htmlOpenTag "span"
result <- case (extractAttribute "class" attributes) of
Just "strikeout" -> inlinesTilEnd "span"
_ -> fail "not a strikeout"
@@ -719,7 +733,7 @@ whitespace = many1 space >> return Space
-- hard line break
linebreak :: GenParser Char ParserState Inline
-linebreak = htmlTag "br" >> optional newline >> return LineBreak
+linebreak = htmlSelfClosingTag "br" >> optional newline >> return LineBreak
str :: GenParser Char st Inline
str = many1 (noneOf "< \t\n&") >>= return . Str
@@ -740,7 +754,7 @@ extractAttribute name ((attrName, contents):rest) =
link :: GenParser Char ParserState Inline
link = try $ do
- (_, attributes) <- htmlTag "a"
+ (_, attributes) <- htmlOpenTag "a"
url <- case (extractAttribute "href" attributes) of
Just url -> return url
Nothing -> fail "no href"
@@ -750,7 +764,7 @@ link = try $ do
image :: GenParser Char ParserState Inline
image = try $ do
- (_, attributes) <- htmlTag "img"
+ (_, attributes) <- htmlSelfClosingTag "img"
url <- case (extractAttribute "src" attributes) of
Just url -> return url
Nothing -> fail "no src"
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 01fca9f2b..406809dfc 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -35,7 +35,8 @@ module Text.Pandoc.Readers.LaTeX (
import Text.ParserCombinators.Parsec
import Text.Pandoc.Definition
-import Text.Pandoc.Shared
+import Text.Pandoc.Shared
+import Text.Pandoc.Parsing
import Data.Maybe ( fromMaybe )
import Data.Char ( chr )
import Data.List ( isPrefixOf, isSuffixOf )
@@ -167,16 +168,37 @@ block = choice [ hrule
--
header :: GenParser Char ParserState Block
-header = try $ do
+header = section <|> chapter
+
+chapter :: GenParser Char ParserState Block
+chapter = try $ do
+ string "\\chapter"
+ result <- headerWithLevel 1
+ updateState $ \s -> s{ stateHasChapters = True }
+ return result
+
+section :: GenParser Char ParserState Block
+section = try $ do
char '\\'
subs <- many (try (string "sub"))
base <- try (string "section" >> return 1) <|> (string "paragraph" >> return 4)
+ st <- getState
+ let lev = if stateHasChapters st
+ then length subs + base + 1
+ else length subs + base
+ headerWithLevel lev
+
+headerWithLevel :: Int -> GenParser Char ParserState Block
+headerWithLevel lev = try $ do
+ spaces
optional (char '*')
+ spaces
optional $ bracketedText '[' ']' -- alt title
+ spaces
char '{'
title' <- manyTill inline (char '}')
spaces
- return $ Header (length subs + base) (normalizeSpaces title')
+ return $ Header lev (normalizeSpaces title')
--
-- hrule block
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index a6d383fca..b655ea1a9 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -37,7 +37,8 @@ import Data.Ord ( comparing )
import Data.Char ( isAlphaNum )
import Data.Maybe
import Text.Pandoc.Definition
-import Text.Pandoc.Shared
+import Text.Pandoc.Shared
+import Text.Pandoc.Parsing
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment' )
import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag,
anyHtmlInlineTag, anyHtmlTag,
@@ -45,7 +46,8 @@ import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag,
htmlBlockElement, htmlComment, unsanitaryURI )
import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
import Text.ParserCombinators.Parsec
-import Control.Monad (when, liftM, unless)
+import Control.Monad (when, liftM, unless, guard)
+import Text.TeXMath.Macros (applyMacros, Macro, pMacroDefinition)
-- | Read markdown from an input string and return a Pandoc document.
readMarkdown :: ParserState -- ^ Parser state, including options for parser
@@ -68,7 +70,7 @@ setextHChars = "=-"
-- treat these as potentially non-text when parsing inline:
specialChars :: [Char]
-specialChars = "\\[]*_~`<>$!^-.&'\";"
+specialChars = "\\[]*_~`<>$!^-.&@'\";"
--
-- auxiliary functions
@@ -184,7 +186,18 @@ parseMarkdown = do
-- now parse it for real...
(title, author, date) <- option ([],[],[]) titleBlock
blocks <- parseBlocks
- return $ Pandoc (Meta title author date) $ filter (/= Null) blocks
+ let doc = Pandoc (Meta title author date) $ filter (/= Null) blocks
+ -- if there are labeled examples, change references into numbers
+ examples <- liftM stateExamples getState
+ let handleExampleRef :: Inline -> Inline
+ handleExampleRef z@(Str ('@':xs)) =
+ case M.lookup xs examples of
+ Just n -> Str (show n)
+ Nothing -> z
+ handleExampleRef z = z
+ if M.null examples
+ then return doc
+ else return $ processWith handleExampleRef doc
--
-- initial pass for references and notes
@@ -272,6 +285,7 @@ block = do
, plain
, nullBlock ]
else [ codeBlockDelimited
+ , macro
, header
, table
, codeBlockIndented
@@ -716,7 +730,7 @@ dashedLine ch = do
-- Parse a table header with dashed lines of '-' preceded by
-- one (or zero) line of text.
simpleTableHeader :: Bool -- ^ Headerless table
- -> GenParser Char ParserState ([[Char]], [Alignment], [Int])
+ -> GenParser Char ParserState ([[Block]], [Alignment], [Int])
simpleTableHeader headless = try $ do
rawContent <- if headless
then return ""
@@ -735,7 +749,9 @@ simpleTableHeader headless = try $ do
let rawHeads' = if headless
then replicate (length dashes) ""
else rawHeads
- return (rawHeads', aligns, indices)
+ heads <- mapM (parseFromString (many plain)) $
+ map removeLeadingTrailingSpace rawHeads'
+ return (heads, aligns, indices)
-- Parse a table footer - dashed lines followed by blank line.
tableFooter :: GenParser Char ParserState [Char]
@@ -764,65 +780,27 @@ multilineRow :: [Int]
-> GenParser Char ParserState [[Block]]
multilineRow indices = do
colLines <- many1 (rawTableLine indices)
- optional blanklines
let cols = map unlines $ transpose colLines
mapM (parseFromString (many plain)) cols
--- Calculate relative widths of table columns, based on indices
-widthsFromIndices :: Int -- Number of columns on terminal
- -> [Int] -- Indices
- -> [Double] -- Fractional relative sizes of columns
-widthsFromIndices _ [] = []
-widthsFromIndices numColumns indices =
- let lengths' = zipWith (-) indices (0:indices)
- lengths = reverse $
- case reverse lengths' of
- [] -> []
- [x] -> [x]
- -- compensate for the fact that intercolumn
- -- spaces are counted in widths of all columns
- -- but the last...
- (x:y:zs) -> if x < y && y - x <= 2
- then y:y:zs
- else x:y:zs
- totLength = sum lengths
- quotient = if totLength > numColumns
- then fromIntegral totLength
- else fromIntegral numColumns
- fracs = map (\l -> (fromIntegral l) / quotient) lengths in
- tail fracs
-
-- Parses a table caption: inlines beginning with 'Table:'
-- and followed by blank lines.
tableCaption :: GenParser Char ParserState [Inline]
tableCaption = try $ do
skipNonindentSpaces
- string "Table:"
+ string ":" <|> string "Table:"
result <- many1 inline
blanklines
return $ normalizeSpaces result
--- Parse a table using 'headerParser', 'lineParser', and 'footerParser'.
-tableWith :: GenParser Char ParserState ([[Char]], [Alignment], [Int])
- -> ([Int] -> GenParser Char ParserState [[Block]])
- -> GenParser Char ParserState end
- -> GenParser Char ParserState Block
-tableWith headerParser lineParser footerParser = try $ do
- (rawHeads, aligns, indices) <- headerParser
- lines' <- many1Till (lineParser indices) footerParser
- caption <- option [] tableCaption
- heads <- mapM (parseFromString (many plain)) rawHeads
- state <- getState
- let numColumns = stateColumns state
- let widths = widthsFromIndices numColumns indices
- return $ Table caption aligns widths heads lines'
-
-- Parse a simple table with '---' header and one line per row.
simpleTable :: Bool -- ^ Headerless table
-> GenParser Char ParserState Block
simpleTable headless = do
Table c a _w h l <- tableWith (simpleTableHeader headless) tableLine
- (if headless then tableFooter else tableFooter <|> blanklines)
+ (return ())
+ (if headless then tableFooter else tableFooter <|> blanklines)
+ tableCaption
-- Simple tables get 0s for relative column widths (i.e., use default)
return $ Table c a (replicate (length a) 0) h l
@@ -833,10 +811,10 @@ simpleTable headless = do
multilineTable :: Bool -- ^ Headerless table
-> GenParser Char ParserState Block
multilineTable headless =
- tableWith (multilineTableHeader headless) multilineRow tableFooter
+ tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter tableCaption
multilineTableHeader :: Bool -- ^ Headerless table
- -> GenParser Char ParserState ([String], [Alignment], [Int])
+ -> GenParser Char ParserState ([[Block]], [Alignment], [Int])
multilineTableHeader headless = try $ do
if headless
then return '\n'
@@ -860,7 +838,9 @@ multilineTableHeader headless = try $ do
let rawHeads = if headless
then replicate (length dashes) ""
else map (intercalate " ") rawHeadsList
- return ((map removeLeadingTrailingSpace rawHeads), aligns, indices)
+ heads <- mapM (parseFromString (many plain)) $
+ map removeLeadingTrailingSpace rawHeads
+ return (heads, aligns, indices)
-- Returns an alignment type for a table, based on a list of strings
-- (the rows of the column header) and a number (the length of the
@@ -880,9 +860,37 @@ alignType strLst len =
(True, True) -> AlignCenter
(False, False) -> AlignDefault
+gridTable :: Bool -- ^ Headerless table
+ -> GenParser Char ParserState Block
+gridTable = gridTableWith block tableCaption
+
table :: GenParser Char ParserState Block
table = multilineTable False <|> simpleTable True <|>
- simpleTable False <|> multilineTable True <?> "table"
+ simpleTable False <|> multilineTable True <|>
+ gridTable False <|> gridTable True <?> "table"
+
+--
+-- Macros
+--
+
+-- | Parse a \newcommand or \renewcommand macro definition.
+macro :: GenParser Char ParserState Block
+macro = getState >>= guard . stateApplyMacros >>
+ pMacroDefinition >>= addMacro >> blanklines >> return Null
+
+-- | Add a macro to the list of macros in state.
+addMacro :: Macro -> GenParser Char ParserState ()
+addMacro m = do
+ updateState $ \st -> st{ stateMacros = m : stateMacros st }
+
+-- | Apply current macros to string.
+applyMacros' :: String -> GenParser Char ParserState String
+applyMacros' target = do
+ apply <- liftM stateApplyMacros getState
+ if apply
+ then do macros <- liftM stateMacros getState
+ return $ applyMacros macros target
+ else return target
--
-- inline
@@ -916,6 +924,7 @@ inlineParsers = [ str
, rawHtmlInline'
, rawLaTeXInline'
, escapedChar
+ , exampleRef
, symbol
, ltSign ]
@@ -951,6 +960,14 @@ ltSign = do
specialCharsMinusLt :: [Char]
specialCharsMinusLt = filter (/= '<') specialChars
+exampleRef :: GenParser Char ParserState Inline
+exampleRef = try $ do
+ char '@'
+ lab <- many1 (alphaNum <|> oneOf "-_")
+ -- We just return a Str. These are replaced with numbers
+ -- later. See the end of parseMarkdown.
+ return $ Str $ '@' : lab
+
symbol :: GenParser Char ParserState Inline
symbol = do
result <- oneOf specialCharsMinusLt
@@ -977,8 +994,8 @@ mathChunk = do char '\\'
<|> many1 (noneOf " \t\n\\$")
math :: GenParser Char ParserState Inline
-math = (mathDisplay >>= return . Math DisplayMath)
- <|> (mathInline >>= return . Math InlineMath)
+math = (mathDisplay >>= applyMacros' >>= return . Math DisplayMath)
+ <|> (mathInline >>= applyMacros' >>= return . Math InlineMath)
mathDisplay :: GenParser Char ParserState String
mathDisplay = try $ do
@@ -1285,7 +1302,7 @@ rawHtmlInline' = do
st <- getState
result <- if stateStrict st
then choice [htmlBlockElement, anyHtmlTag, anyHtmlEndTag]
- else anyHtmlInlineTag
+ else choice [htmlComment, anyHtmlInlineTag]
return $ HtmlInline result
#ifdef _CITEPROC
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 7b4b5eee8..13afe5053 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -31,9 +31,10 @@ module Text.Pandoc.Readers.RST (
readRST
) where
import Text.Pandoc.Definition
-import Text.Pandoc.Shared
+import Text.Pandoc.Shared
+import Text.Pandoc.Parsing
import Text.ParserCombinators.Parsec
-import Control.Monad ( when, unless, liftM )
+import Control.Monad ( when, unless )
import Data.List ( findIndex, intercalate, transpose, sort )
import qualified Data.Map as M
import Text.Printf ( printf )
@@ -424,7 +425,7 @@ bulletListStart = try $ do
-- parses ordered list start and returns its length (inc following whitespace)
orderedListStart :: ListNumberStyle
-> ListNumberDelim
- -> GenParser Char st Int
+ -> GenParser Char ParserState Int
orderedListStart style delim = try $ do
(_, markerLen) <- withHorizDisplacement (orderedListMarker style delim)
white <- many1 spaceChar
@@ -607,41 +608,20 @@ dashedLine ch = do
simpleDashedLines :: Char -> GenParser Char st [(Int,Int)]
simpleDashedLines ch = try $ many1 (dashedLine ch)
-gridPart :: Char -> GenParser Char st (Int, Int)
-gridPart ch = do
- dashes <- many1 (char ch)
- char '+'
- return (length dashes, length dashes + 1)
-
-gridDashedLines :: Char -> GenParser Char st [(Int,Int)]
-gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline
-
-- Parse a table row separator
simpleTableSep :: Char -> GenParser Char ParserState Char
simpleTableSep ch = try $ simpleDashedLines ch >> newline
-gridTableSep :: Char -> GenParser Char ParserState Char
-gridTableSep ch = try $ gridDashedLines ch >> return '\n'
-
-- Parse a table footer
simpleTableFooter :: GenParser Char ParserState [Char]
simpleTableFooter = try $ simpleTableSep '=' >> blanklines
-gridTableFooter :: GenParser Char ParserState [Char]
-gridTableFooter = blanklines
-
-- Parse a raw line and split it into chunks by indices.
simpleTableRawLine :: [Int] -> GenParser Char ParserState [String]
simpleTableRawLine indices = do
line <- many1Till anyChar newline
return (simpleTableSplitLine indices line)
-gridTableRawLine :: [Int] -> GenParser Char ParserState [String]
-gridTableRawLine indices = do
- char '|'
- line <- many1Till anyChar newline
- return (gridTableSplitLine indices $ removeTrailingSpace line)
-
-- Parse a table row and return a list of blocks (columns).
simpleTableRow :: [Int] -> GenParser Char ParserState [[Block]]
simpleTableRow indices = do
@@ -651,64 +631,13 @@ simpleTableRow indices = do
let cols = map unlines . transpose $ firstLine : colLines
mapM (parseFromString (many plain)) cols
-gridTableRow :: [Int]
- -> GenParser Char ParserState [[Block]]
-gridTableRow indices = do
- colLines <- many1 (gridTableRawLine indices)
- let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $
- transpose colLines
- mapM (liftM compactifyCell . parseFromString (many block)) cols
-
-compactifyCell :: [Block] -> [Block]
-compactifyCell bs = head $ compactify [bs]
-
simpleTableSplitLine :: [Int] -> String -> [String]
simpleTableSplitLine indices line =
map removeLeadingTrailingSpace
$ tail $ splitByIndices (init indices) line
-gridTableSplitLine :: [Int] -> String -> [String]
-gridTableSplitLine indices line =
- map removeFinalBar $ tail $ splitByIndices (init indices) line
-
-removeFinalBar :: String -> String
-removeFinalBar = reverse . dropWhile (=='|') . dropWhile (`elem` " \t") .
- reverse
-
-removeOneLeadingSpace :: [String] -> [String]
-removeOneLeadingSpace xs =
- if all startsWithSpace xs
- then map (drop 1) xs
- else xs
- where startsWithSpace "" = True
- startsWithSpace (y:_) = y == ' '
-
--- Calculate relative widths of table columns, based on indices
-widthsFromIndices :: Int -- Number of columns on terminal
- -> [Int] -- Indices
- -> [Double] -- Fractional relative sizes of columns
-widthsFromIndices _ [] = []
-widthsFromIndices numColumns indices =
- let lengths' = zipWith (-) indices (0:indices)
- lengths = reverse $
- case reverse lengths' of
- [] -> []
- [x] -> [x]
- -- compensate for the fact that intercolumn
- -- spaces are counted in widths of all columns
- -- but the last...
- (x:y:zs) -> if x < y && y - x <= 2
- then y:y:zs
- else x:y:zs
- totLength = sum lengths
- quotient = if totLength > numColumns
- then fromIntegral totLength
- else fromIntegral numColumns
- fracs = map (\l -> (fromIntegral l) / quotient) lengths in
- tail fracs
-
simpleTableHeader :: Bool -- ^ Headerless table
- -> GenParser Char ParserState ([[Char]], [Alignment], [Int])
+ -> GenParser Char ParserState ([[Block]], [Alignment], [Int])
simpleTableHeader headless = try $ do
optional blanklines
rawContent <- if headless
@@ -722,64 +651,23 @@ simpleTableHeader headless = try $ do
let rawHeads = if headless
then replicate (length dashes) ""
else simpleTableSplitLine indices rawContent
- return (rawHeads, aligns, indices)
+ heads <- mapM (parseFromString (many plain)) $
+ map removeLeadingTrailingSpace rawHeads
+ return (heads, aligns, indices)
-gridTableHeader :: Bool -- ^ Headerless table
- -> GenParser Char ParserState ([String], [Alignment], [Int])
-gridTableHeader headless = try $ do
- optional blanklines
- dashes <- gridDashedLines '-'
- rawContent <- if headless
- then return $ repeat ""
- else many1
- (notFollowedBy (gridTableSep '=') >> char '|' >> many1Till anyChar newline)
- if headless
- then return ()
- else gridTableSep '=' >> return ()
- let lines' = map snd dashes
- let indices = scanl (+) 0 lines'
- let aligns = replicate (length lines') AlignDefault -- RST does not have a notion of alignments
- let rawHeads = if headless
- then replicate (length dashes) ""
- else map (intercalate " ") $ transpose
- $ map (gridTableSplitLine indices) rawContent
- return (rawHeads, aligns, indices)
-
--- Parse a table using 'headerParser', 'lineParser', and 'footerParser'.
-tableWith :: GenParser Char ParserState ([[Char]], [Alignment], [Int])
- -> ([Int] -> GenParser Char ParserState [[Block]])
- -> GenParser Char ParserState sep
- -> GenParser Char ParserState end
- -> GenParser Char ParserState Block
-tableWith headerParser rowParser lineParser footerParser = try $ do
- (rawHeads, aligns, indices) <- headerParser
- lines' <- rowParser indices `sepEndBy` lineParser
- footerParser
- heads <- mapM (parseFromString (many plain)) rawHeads
- state <- getState
- let captions = [] -- no notion of captions in RST
- let numColumns = stateColumns state
- let widths = widthsFromIndices numColumns indices
- return $ Table captions aligns widths heads lines'
-
--- Parse a simple table with '---' header and one line per row.
+-- Parse a simple table.
simpleTable :: Bool -- ^ Headerless table
-> GenParser Char ParserState Block
simpleTable headless = do
- Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter
+ Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter (return [])
-- Simple tables get 0s for relative column widths (i.e., use default)
return $ Table c a (replicate (length a) 0) h l
where
sep = return () -- optional (simpleTableSep '-')
--- Parse a grid table: starts with row of '-' on top, then header
--- (which may be grid), then the rows,
--- which may be grid, separated by blank lines, and
--- ending with a footer (dashed line followed by blank line).
gridTable :: Bool -- ^ Headerless table
- -> GenParser Char ParserState Block
-gridTable headless =
- tableWith (gridTableHeader headless) gridTableRow (gridTableSep '-') gridTableFooter
+ -> GenParser Char ParserState Block
+gridTable = gridTableWith block (return [])
table :: GenParser Char ParserState Block
table = gridTable False <|> simpleTable False <|>
diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs
index 40cf39987..ca839dd08 100644
--- a/src/Text/Pandoc/Readers/TeXMath.hs
+++ b/src/Text/Pandoc/Readers/TeXMath.hs
@@ -86,6 +86,28 @@ expToInlines (ESubsup x y z) = do
y' <- expToInlines y
z' <- expToInlines z
return $ x' ++ [Subscript y'] ++ [Superscript z']
-expToInlines (EText _ x) = Just [Emph [Str x]]
+expToInlines (EDown x y) = expToInlines (ESub x y)
+expToInlines (EUp x y) = expToInlines (ESuper x y)
+expToInlines (EDownup x y z) = expToInlines (ESubsup x y z)
+expToInlines (EText "normal" x) = Just [Str x]
+expToInlines (EText "bold" x) = Just [Strong [Str x]]
+expToInlines (EText "monospace" x) = Just [Code x]
+expToInlines (EText "italic" x) = Just [Emph [Str x]]
+expToInlines (EText _ x) = Just [Str x]
+expToInlines (EOver (EGrouped [EIdentifier [c]]) (ESymbol Accent [accent])) =
+ case accent of
+ '\x203E' -> Just [Emph [Str [c,'\x0304']]] -- bar
+ '\x00B4' -> Just [Emph [Str [c,'\x0301']]] -- acute
+ '\x0060' -> Just [Emph [Str [c,'\x0300']]] -- grave
+ '\x02D8' -> Just [Emph [Str [c,'\x0306']]] -- breve
+ '\x02C7' -> Just [Emph [Str [c,'\x030C']]] -- check
+ '.' -> Just [Emph [Str [c,'\x0307']]] -- dot
+ '\x00B0' -> Just [Emph [Str [c,'\x030A']]] -- ring
+ '\x20D7' -> Just [Emph [Str [c,'\x20D7']]] -- arrow right
+ '\x20D6' -> Just [Emph [Str [c,'\x20D6']]] -- arrow left
+ '\x005E' -> Just [Emph [Str [c,'\x0302']]] -- hat
+ '\x0302' -> Just [Emph [Str [c,'\x0302']]] -- hat
+ '~' -> Just [Emph [Str [c,'\x0303']]] -- tilde
+ _ -> Nothing
expToInlines _ = Nothing