aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs20
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs61
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs86
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs133
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs52
-rw-r--r--src/Text/Pandoc/Readers/TeXMath.hs24
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs2
7 files changed, 239 insertions, 139 deletions
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 6a799e270..56cb16b20 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -1,5 +1,6 @@
module Text.Pandoc.Readers.DocBook ( readDocBook ) where
-import Data.Char (toUpper, isDigit)
+import Data.Char (toUpper)
+import Text.Pandoc.Shared (safeRead)
import Text.Pandoc.Options
import Text.Pandoc.Definition
import Text.Pandoc.Builder
@@ -11,6 +12,7 @@ import Data.Char (isSpace)
import Control.Monad.State
import Control.Applicative ((<$>))
import Data.List (intersperse)
+import Data.Maybe (fromMaybe)
{-
@@ -682,10 +684,9 @@ parseBlock (Elem e) =
"lowerroman" -> LowerRoman
"upperroman" -> UpperRoman
_ -> Decimal
- let start = case attrValue "override" <$>
- filterElement (named "listitem") e of
- Just x@(_:_) | all isDigit x -> read x
- _ -> 1
+ let start = fromMaybe 1 $
+ (attrValue "override" <$> filterElement (named "listitem") e)
+ >>= safeRead
orderedListWith (start,listStyle,DefaultDelim)
<$> listitems
"variablelist" -> definitionList <$> deflistitems
@@ -779,7 +780,7 @@ parseBlock (Elem e) =
caption <- case filterChild isCaption e of
Just t -> getInlines t
Nothing -> return mempty
- let e' = maybe e id $ filterChild (named "tgroup") e
+ let e' = fromMaybe e $ filterChild (named "tgroup") e
let isColspec x = named "colspec" x || named "col" x
let colspecs = case filterChild (named "colgroup") e' of
Just c -> filterChildren isColspec c
@@ -801,11 +802,14 @@ parseBlock (Elem e) =
Just "center" -> AlignCenter
_ -> AlignDefault
let toWidth c = case findAttr (unqual "colwidth") c of
- Just w -> read $ filter (\x ->
+ Just w -> fromMaybe 0
+ $ safeRead $ '0': filter (\x ->
(x >= '0' && x <= '9')
|| x == '.') w
Nothing -> 0 :: Double
- let numrows = maximum $ map length bodyrows
+ let numrows = case bodyrows of
+ [] -> 0
+ xs -> maximum $ map length xs
let aligns = case colspecs of
[] -> replicate numrows AlignDefault
cs -> map toAlignment cs
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 7ca554fa3..506fe7770 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -76,9 +76,18 @@ pBody :: TagParser [Block]
pBody = pInTags "body" block
pHead :: TagParser [Block]
-pHead = pInTags "head" $ pTitle <|> ([] <$ pAnyTag)
+pHead = pInTags "head" $ pTitle <|> pMetaTag <|> ([] <$ pAnyTag)
where pTitle = pInTags "title" inline >>= setTitle . normalizeSpaces
setTitle t = [] <$ (updateState $ B.setMeta "title" (B.fromList t))
+ pMetaTag = do
+ mt <- pSatisfy (~== TagOpen "meta" [])
+ let name = fromAttrib "name" mt
+ if null name
+ then return []
+ else do
+ let content = fromAttrib "content" mt
+ updateState $ B.setMeta name (B.text content)
+ return []
block :: TagParser [Block]
block = choice
@@ -92,6 +101,7 @@ block = choice
, pHead
, pBody
, pPlain
+ , pDiv
, pRawHtmlBlock
]
@@ -177,6 +187,13 @@ pRawTag = do
then return []
else return $ renderTags' [tag]
+pDiv :: TagParser [Block]
+pDiv = try $ do
+ getOption readerParseRaw >>= guard
+ TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="div") (const True)
+ contents <- pInTags "div" block
+ return [Div (mkAttr attr) contents]
+
pRawHtmlBlock :: TagParser [Block]
pRawHtmlBlock = do
raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag
@@ -199,7 +216,7 @@ pHeader = try $ do
let bodyTitle = TagOpen tagtype attr ~== TagOpen "h1" [("class","title")]
let level = read (drop 1 tagtype)
contents <- liftM concat $ manyTill inline (pCloses tagtype <|> eof)
- let ident = maybe "" id $ lookup "id" attr
+ let ident = fromMaybe "" $ lookup "id" attr
let classes = maybe [] words $ lookup "class" attr
let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"]
return $ if bodyTitle
@@ -249,7 +266,7 @@ pCol = try $ do
skipMany pBlank
return $ case lookup "width" attribs of
Just x | not (null x) && last x == '%' ->
- maybe 0.0 id $ safeRead ('0':'.':init x)
+ fromMaybe 0.0 $ safeRead ('0':'.':init x)
_ -> 0.0
pColgroup :: TagParser [Double]
@@ -295,11 +312,7 @@ pCodeBlock = try $ do
let result = case reverse result' of
'\n':_ -> init result'
_ -> result'
- let attribsId = fromMaybe "" $ lookup "id" attr
- let attribsClasses = words $ fromMaybe "" $ lookup "class" attr
- let attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr
- let attribs = (attribsId, attribsClasses, attribsKV)
- return [CodeBlock attribs result]
+ return [CodeBlock (mkAttr attr) result]
inline :: TagParser [Inline]
inline = choice
@@ -314,6 +327,7 @@ inline = choice
, pLink
, pImage
, pCode
+ , pSpan
, pRawHtmlInline
]
@@ -397,11 +411,14 @@ pCode :: TagParser [Inline]
pCode = try $ do
(TagOpen open attr) <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True)
result <- manyTill pAnyTag (pCloses open)
- let ident = fromMaybe "" $ lookup "id" attr
- let classes = words $ fromMaybe [] $ lookup "class" attr
- let rest = filter (\(x,_) -> x /= "id" && x /= "class") attr
- return [Code (ident,classes,rest)
- $ intercalate " " $ lines $ innerText result]
+ return [Code (mkAttr attr) $ intercalate " " $ lines $ innerText result]
+
+pSpan :: TagParser [Inline]
+pSpan = try $ do
+ getOption readerParseRaw >>= guard
+ TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True)
+ contents <- pInTags "span" inline
+ return [Span (mkAttr attr) contents]
pRawHtmlInline :: TagParser [Inline]
pRawHtmlInline = do
@@ -459,7 +476,13 @@ pBlank = try $ do
pTagContents :: Parser [Char] ParserState Inline
pTagContents =
- pStr <|> pSpace <|> smartPunctuation pTagContents <|> pSymbol <|> pBad
+ Math InlineMath `fmap` mathInline
+ <|> Math DisplayMath `fmap` mathDisplay
+ <|> pStr
+ <|> pSpace
+ <|> smartPunctuation pTagContents
+ <|> pSymbol
+ <|> pBad
pStr :: Parser [Char] ParserState Inline
pStr = do
@@ -474,6 +497,7 @@ isSpecial '"' = True
isSpecial '\'' = True
isSpecial '.' = True
isSpecial '-' = True
+isSpecial '$' = True
isSpecial '\8216' = True
isSpecial '\8217' = True
isSpecial '\8220' = True
@@ -549,7 +573,7 @@ blockHtmlTags = ["address", "article", "aside", "blockquote", "body", "button",
"noframes", "noscript", "object", "ol", "output", "p", "pre", "progress",
"section", "table", "tbody", "textarea", "thead", "tfoot", "ul", "dd",
"dt", "frameset", "li", "tbody", "td", "tfoot",
- "th", "thead", "tr", "script", "style", "video"]
+ "th", "thead", "tr", "script", "style", "svg", "video"]
-- We want to allow raw docbook in markdown documents, so we
-- include docbook block tags here too.
@@ -648,3 +672,10 @@ htmlTag f = try $ do
_ -> do
rendered <- manyTill anyChar (char '>')
return (next, rendered ++ ">")
+
+mkAttr :: [(String, String)] -> Attr
+mkAttr attr = (attribsId, attribsClasses, attribsKV)
+ where attribsId = fromMaybe "" $ lookup "id" attr
+ attribsClasses = words $ fromMaybe "" $ lookup "class" attr
+ attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr
+
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index ff5b73348..51271edc5 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -38,12 +38,13 @@ import Text.Pandoc.Definition
import Text.Pandoc.Walk
import Text.Pandoc.Shared
import Text.Pandoc.Options
-import Text.Pandoc.Parsing hiding ((<|>), many, optional, space)
+import Text.Pandoc.Parsing hiding ((<|>), many, optional, space,
+ mathDisplay, mathInline)
import qualified Text.Pandoc.UTF8 as UTF8
import Data.Char ( chr, ord )
import Control.Monad
import Text.Pandoc.Builder
-import Data.Char (isLetter)
+import Data.Char (isLetter, isAlphaNum)
import Control.Applicative
import Data.Monoid
import Data.Maybe (fromMaybe)
@@ -163,28 +164,40 @@ mathChars = concat <$>
<|> (\c -> ['\\',c]) <$> (try $ char '\\' *> anyChar)
)
+quoted' :: (Inlines -> Inlines) -> LP String -> LP () -> LP Inlines
+quoted' f starter ender = do
+ startchs <- starter
+ try ((f . mconcat) <$> manyTill inline ender) <|> lit startchs
+
double_quote :: LP Inlines
-double_quote = (doubleQuoted . mconcat) <$>
- (try $ string "``" *> manyTill inline (try $ string "''"))
+double_quote =
+ ( quoted' doubleQuoted (try $ string "``") (void $ try $ string "''")
+ <|> quoted' doubleQuoted (string "“") (void $ char '”')
+ -- the following is used by babel for localized quotes:
+ <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'")
+ <|> quoted' doubleQuoted (string "\"") (void $ char '"')
+ )
single_quote :: LP Inlines
-single_quote = (singleQuoted . mconcat) <$>
- (try $ char '`' *> manyTill inline (try $ char '\'' >> notFollowedBy letter))
+single_quote =
+ ( quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter)
+ <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter)
+ )
inline :: LP Inlines
inline = (mempty <$ comment)
<|> (space <$ sp)
<|> inlineText
<|> inlineCommand
- <|> grouped inline
+ <|> inlineGroup
<|> (char '-' *> option (str "-")
((char '-') *> option (str "–") (str "—" <$ char '-')))
<|> double_quote
<|> single_quote
- <|> (str "“" <$ try (string "``")) -- nb. {``} won't be caught by double_quote
<|> (str "”" <$ try (string "''"))
- <|> (str "‘" <$ char '`') -- nb. {`} won't be caught by single_quote
+ <|> (str "”" <$ char '”')
<|> (str "’" <$ char '\'')
+ <|> (str "’" <$ char '’')
<|> (str "\160" <$ char '~')
<|> (mathDisplay $ string "$$" *> mathChars <* string "$$")
<|> (mathInline $ char '$' *> mathChars <* char '$')
@@ -199,6 +212,15 @@ inline = (mempty <$ comment)
inlines :: LP Inlines
inlines = mconcat <$> many (notFollowedBy (char '}') *> inline)
+inlineGroup :: LP Inlines
+inlineGroup = do
+ ils <- grouped inline
+ if isNull ils
+ then return mempty
+ else return $ spanWith nullAttr ils
+ -- we need the span so we can detitlecase bibtex entries;
+ -- we need to know when something is {C}apitalized
+
block :: LP Blocks
block = (mempty <$ comment)
<|> (mempty <$ ((spaceChar <|> newline) *> spaces))
@@ -364,6 +386,7 @@ inlineCommands = M.fromList $
, ("backslash", lit "\\")
, ("slash", lit "/")
, ("textbf", strong <$> tok)
+ , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok)
, ("ldots", lit "…")
, ("dots", lit "…")
, ("mdots", lit "…")
@@ -434,6 +457,7 @@ inlineCommands = M.fromList $
, ("footnote", (note . mconcat) <$> (char '{' *> manyTill block (char '}')))
, ("verb", doverb)
, ("lstinline", doverb)
+ , ("Verb", doverb)
, ("texttt", (code . stringify . toList) <$> tok)
, ("url", (unescapeURL <$> braced) >>= \url ->
pure (link url "" (str url)))
@@ -518,9 +542,7 @@ inNote ils =
unescapeURL :: String -> String
unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs
- where isEscapable '%' = True
- isEscapable '#' = True
- isEscapable _ = False
+ where isEscapable c = c `elem` "#$%&~_^\\{}"
unescapeURL (x:xs) = x:unescapeURL xs
unescapeURL [] = ""
@@ -747,7 +769,7 @@ inlineText :: LP Inlines
inlineText = str <$> many1 inlineChar
inlineChar :: LP Char
-inlineChar = noneOf "\\$%^_&~#{}^'`-[] \t\n"
+inlineChar = noneOf "\\$%^_&~#{}^'`\"‘’“”-[] \t\n"
environment :: LP Blocks
environment = do
@@ -852,9 +874,8 @@ verbatimEnv = do
(_,r) <- withRaw $ do
controlSeq "begin"
name <- braced
- guard $ name == "verbatim" || name == "Verbatim" ||
- name == "lstlisting" || name == "minted" ||
- name == "alltt"
+ guard $ name `elem` ["verbatim", "Verbatim", "lstlisting",
+ "minted", "alltt"]
verbEnv name
rest <- getInput
return (r,rest)
@@ -1030,14 +1051,14 @@ paragraph = do
preamble :: LP Blocks
preamble = mempty <$> manyTill preambleBlock beginDoc
where beginDoc = lookAhead $ controlSeq "begin" *> string "{document}"
- preambleBlock = (mempty <$ comment)
- <|> (mempty <$ sp)
- <|> (mempty <$ blanklines)
- <|> (mempty <$ macro)
- <|> blockCommand
- <|> (mempty <$ anyControlSeq)
- <|> (mempty <$ braced)
- <|> (mempty <$ anyChar)
+ preambleBlock = (void comment)
+ <|> (void sp)
+ <|> (void blanklines)
+ <|> (void macro)
+ <|> (void blockCommand)
+ <|> (void anyControlSeq)
+ <|> (void braced)
+ <|> (void anyChar)
-------
@@ -1058,6 +1079,7 @@ simpleCiteArgs = try $ do
first <- optionMaybe $ toList <$> opt
second <- optionMaybe $ toList <$> opt
char '{'
+ optional sp
keys <- manyTill citationLabel (char '}')
let (pre, suf) = case (first , second ) of
(Just s , Nothing) -> (mempty, s )
@@ -1073,18 +1095,24 @@ simpleCiteArgs = try $ do
return $ addPrefix pre $ addSuffix suf $ map conv keys
citationLabel :: LP String
-citationLabel = trim <$>
- (many1 (satisfy $ \c -> c /=',' && c /='}') <* optional (char ',') <* optional sp)
+citationLabel = optional sp *>
+ (many1 (satisfy isBibtexKeyChar)
+ <* optional sp
+ <* optional (char ',')
+ <* optional sp)
+ where isBibtexKeyChar c = isAlphaNum c || c `elem` ".:;?!`'()/*@_+=-[]*"
cites :: CitationMode -> Bool -> LP [Citation]
cites mode multi = try $ do
cits <- if multi
then many1 simpleCiteArgs
else count 1 simpleCiteArgs
- let (c:cs) = concat cits
+ let cs = concat cits
return $ case mode of
- AuthorInText -> c {citationMode = mode} : cs
- _ -> map (\a -> a {citationMode = mode}) (c:cs)
+ AuthorInText -> case cs of
+ (c:rest) -> c {citationMode = mode} : rest
+ [] -> []
+ _ -> map (\a -> a {citationMode = mode}) cs
citation :: String -> CitationMode -> Bool -> LP Inlines
citation name mode multi = do
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 9b98cbc3e..3feafd362 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -215,10 +215,10 @@ pandocTitleBlock = try $ do
author' <- author
date' <- date
return $
- ( if B.isNull title' then id else B.setMeta "title" title'
- . if null author' then id else B.setMeta "author" author'
- . if B.isNull date' then id else B.setMeta "date" date' )
- nullMeta
+ (if B.isNull title' then id else B.setMeta "title" title')
+ . (if null author' then id else B.setMeta "author" author')
+ . (if B.isNull date' then id else B.setMeta "date" date')
+ $ nullMeta
updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
yamlMetaBlock :: MarkdownParser (F Blocks)
@@ -227,6 +227,7 @@ yamlMetaBlock = try $ do
pos <- getPosition
string "---"
blankline
+ notFollowedBy blankline -- if --- is followed by a blank it's an HRULE
rawYamlLines <- manyTill anyLine stopLine
-- by including --- and ..., we allow yaml blocks with just comments:
let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."]))
@@ -443,6 +444,9 @@ block = choice [ mempty <$ blanklines
, codeBlockFenced
, yamlMetaBlock
, guardEnabled Ext_latex_macros *> (macro >>= return . return)
+ -- note: bulletList needs to be before header because of
+ -- the possibility of empty list items: -
+ , bulletList
, header
, lhsCodeBlock
, rawTeXBlock
@@ -453,7 +457,6 @@ block = choice [ mempty <$ blanklines
, codeBlockIndented
, blockQuote
, hrule
- , bulletList
, orderedList
, definitionList
, noteBlock
@@ -698,7 +701,7 @@ bulletListStart = try $ do
skipNonindentSpaces
notFollowedBy' (() <$ hrule) -- because hrules start out just like lists
satisfy isBulletListMarker
- spaceChar
+ spaceChar <|> lookAhead newline
skipSpaces
anyOrderedListStart :: MarkdownParser (Int, ListNumberStyle, ListNumberDelim)
@@ -726,11 +729,15 @@ listStart = bulletListStart <|> (anyOrderedListStart >> return ())
-- parse a line of a list item (start = parser for beginning of list item)
listLine :: MarkdownParser String
listLine = try $ do
- notFollowedBy blankline
notFollowedBy' (do indentSpaces
- many (spaceChar)
+ many spaceChar
listStart)
- chunks <- manyTill (liftM snd (htmlTag isCommentTag) <|> count 1 anyChar) newline
+ notFollowedBy' $ htmlTag (~== TagClose "div")
+ chunks <- manyTill
+ ( many1 (satisfy $ \c -> c /= '\n' && c /= '<')
+ <|> liftM snd (htmlTag isCommentTag)
+ <|> count 1 anyChar
+ ) newline
return $ concat chunks
-- parse raw text for one list item, excluding start marker and continuations
@@ -739,7 +746,7 @@ rawListItem :: MarkdownParser a
rawListItem start = try $ do
start
first <- listLine
- rest <- many (notFollowedBy listStart >> listLine)
+ rest <- many (notFollowedBy listStart >> notFollowedBy blankline >> listLine)
blanks <- many blankline
return $ unlines (first:rest) ++ blanks
@@ -757,6 +764,7 @@ listContinuationLine :: MarkdownParser String
listContinuationLine = try $ do
notFollowedBy blankline
notFollowedBy' listStart
+ notFollowedBy' $ htmlTag (~== TagClose "div")
optional indentSpaces
result <- anyLine
return $ result ++ "\n"
@@ -781,8 +789,8 @@ listItem start = try $ do
orderedList :: MarkdownParser (F Blocks)
orderedList = try $ do
(start, style, delim) <- lookAhead anyOrderedListStart
- unless ((style == DefaultStyle || style == Decimal || style == Example) &&
- (delim == DefaultDelim || delim == Period)) $
+ unless (style `elem` [DefaultStyle, Decimal, Example] &&
+ delim `elem` [DefaultDelim, Period]) $
guardEnabled Ext_fancy_lists
when (style == Example) $ guardEnabled Ext_example_lists
items <- fmap sequence $ many1 $ listItem
@@ -871,8 +879,11 @@ para = try $ do
$ try $ do
newline
(blanklines >> return mempty)
- <|> (guardDisabled Ext_blank_before_blockquote >> lookAhead blockQuote)
- <|> (guardDisabled Ext_blank_before_header >> lookAhead header)
+ <|> (guardDisabled Ext_blank_before_blockquote >> () <$ lookAhead blockQuote)
+ <|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced)
+ <|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header)
+ <|> (guardEnabled Ext_lists_without_preceding_blankline >>
+ () <$ lookAhead listStart)
return $ do
result' <- result
case B.toList result' of
@@ -891,7 +902,9 @@ plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline
--
htmlElement :: MarkdownParser String
-htmlElement = strictHtmlBlock <|> liftM snd (htmlTag isBlockTag)
+htmlElement = rawVerbatimBlock
+ <|> strictHtmlBlock
+ <|> liftM snd (htmlTag isBlockTag)
htmlBlock :: MarkdownParser (F Blocks)
htmlBlock = do
@@ -912,8 +925,8 @@ strictHtmlBlock = htmlInBalanced (not . isInlineTag)
rawVerbatimBlock :: MarkdownParser String
rawVerbatimBlock = try $ do
- (TagOpen tag _, open) <- htmlTag (tagOpen (\t ->
- t == "pre" || t == "style" || t == "script")
+ (TagOpen tag _, open) <- htmlTag (tagOpen (flip elem
+ ["pre", "style", "script"])
(const True))
contents <- manyTill anyChar (htmlTag (~== TagClose tag))
return $ open ++ contents ++ renderTags [TagClose tag]
@@ -1113,12 +1126,12 @@ multilineTableHeader headless = try $ do
then liftM (map (:[]) . tail .
splitStringByIndices (init indices)) $ lookAhead anyLine
else return $ transpose $ map
- (\ln -> tail $ splitStringByIndices (init indices) ln)
+ (tail . splitStringByIndices (init indices))
rawContent
let aligns = zipWith alignType rawHeadsList lengths
let rawHeads = if headless
then replicate (length dashes) ""
- else map unwords rawHeadsList
+ else map (unlines . map trim) rawHeadsList
heads <- fmap sequence $
mapM (parseFromString (mconcat <$> many plain)) $
map trim rawHeads
@@ -1175,7 +1188,7 @@ gridTableHeader headless = try $ do
-- RST does not have a notion of alignments
let rawHeads = if headless
then replicate (length dashes) ""
- else map unwords $ transpose
+ else map (unlines . map trim) $ transpose
$ map (gridTableSplitLine indices) rawContent
heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads
return (heads, aligns, indices)
@@ -1401,39 +1414,6 @@ math :: MarkdownParser (F Inlines)
math = (return . B.displayMath <$> (mathDisplay >>= applyMacros'))
<|> (return . B.math <$> (mathInline >>= applyMacros'))
-mathDisplay :: MarkdownParser String
-mathDisplay =
- (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
- <|> (guardEnabled Ext_tex_math_single_backslash >>
- mathDisplayWith "\\[" "\\]")
- <|> (guardEnabled Ext_tex_math_double_backslash >>
- mathDisplayWith "\\\\[" "\\\\]")
-
-mathDisplayWith :: String -> String -> MarkdownParser String
-mathDisplayWith op cl = try $ do
- string op
- many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string cl)
-
-mathInline :: MarkdownParser String
-mathInline =
- (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
- <|> (guardEnabled Ext_tex_math_single_backslash >>
- mathInlineWith "\\(" "\\)")
- <|> (guardEnabled Ext_tex_math_double_backslash >>
- mathInlineWith "\\\\(" "\\\\)")
-
-mathInlineWith :: String -> String -> MarkdownParser String
-mathInlineWith op cl = try $ do
- string op
- notFollowedBy space
- words' <- many1Till (count 1 (noneOf "\n\\")
- <|> (char '\\' >> anyChar >>= \c -> return ['\\',c])
- <|> count 1 newline <* notFollowedBy' blankline
- *> return " ")
- (try $ string cl)
- notFollowedBy digit -- to prevent capture of $5
- return $ concat words'
-
-- Parses material enclosed in *s, **s, _s, or __s.
-- Designed to avoid backtracking.
enclosure :: Char
@@ -1450,6 +1430,7 @@ enclosure c = do
-- Parse inlines til you hit one c or a sequence of two cs.
-- If one c, emit emph and then parse two.
-- If two cs, emit strong and then parse one.
+-- Otherwise, emit ccc then the results.
three :: Char -> MarkdownParser (F Inlines)
three c = do
contents <- mconcat <$> many (notFollowedBy (char c) >> inline)
@@ -1474,7 +1455,7 @@ one c prefix' = do
contents <- mconcat <$> many ( (notFollowedBy (char c) >> inline)
<|> try (string [c,c] >>
notFollowedBy (char c) >>
- two c prefix') )
+ two c mempty) )
(char c >> return (B.emph <$> (prefix' <> contents)))
<|> return (return (B.str [c]) <> (prefix' <> contents))
@@ -1559,8 +1540,11 @@ endline :: MarkdownParser (F Inlines)
endline = try $ do
newline
notFollowedBy blankline
+ guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart
guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart
guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header
+ guardEnabled Ext_backtick_code_blocks >>
+ notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced))
-- parse potential list-starts differently if in a list:
st <- getState
when (stateParserContext st == ListItemState) $ do
@@ -1738,7 +1722,7 @@ spanHtml = try $ do
guardEnabled Ext_markdown_in_html_blocks
(TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" [])
contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span"))
- let ident = maybe "" id $ lookup "id" attrs
+ let ident = fromMaybe "" $ lookup "id" attrs
let classes = maybe [] words $ lookup "class" attrs
let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
return $ B.spanWith (ident, classes, keyvals) <$> contents
@@ -1748,7 +1732,7 @@ divHtml = try $ do
guardEnabled Ext_markdown_in_html_blocks
(TagOpen _ attrs, _) <- htmlTag (~== TagOpen "div" [])
contents <- mconcat <$> manyTill block (htmlTag (~== TagClose "div"))
- let ident = maybe "" id $ lookup "id" attrs
+ let ident = fromMaybe "" $ lookup "id" attrs
let classes = maybe [] words $ lookup "class" attrs
let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
return $ B.divWith (ident, classes, keyvals) <$> contents
@@ -1768,12 +1752,11 @@ rawHtmlInline = do
cite :: MarkdownParser (F Inlines)
cite = do
guardEnabled Ext_citations
- citations <- textualCite <|> (fmap (flip B.cite unknownC) <$> normalCite)
+ citations <- textualCite
+ <|> do (cs, raw) <- withRaw normalCite
+ return $ (flip B.cite (B.text raw)) <$> cs
return citations
-unknownC :: Inlines
-unknownC = B.str "???"
-
textualCite :: MarkdownParser (F Inlines)
textualCite = try $ do
(_, key) <- citeKey
@@ -1784,14 +1767,18 @@ textualCite = try $ do
, citationNoteNum = 0
, citationHash = 0
}
- mbrest <- option Nothing $ try $ spnl >> Just <$> normalCite
+ mbrest <- option Nothing $ try $ spnl >> Just <$> withRaw normalCite
case mbrest of
- Just rest -> return $ (flip B.cite unknownC . (first:)) <$> rest
- Nothing -> (fmap (flip B.cite unknownC) <$> bareloc first) <|>
- return (do st <- askF
- return $ case M.lookup key (stateExamples st) of
- Just n -> B.str (show n)
- _ -> B.cite [first] unknownC)
+ Just (rest, raw) ->
+ return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw) . (first:))
+ <$> rest
+ Nothing ->
+ (do (cs, raw) <- withRaw $ bareloc first
+ return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw)) <$> cs)
+ <|> return (do st <- askF
+ return $ case M.lookup key (stateExamples st) of
+ Just n -> B.str (show n)
+ _ -> B.cite [first] $ B.str $ '@':key)
bareloc :: Citation -> MarkdownParser (F [Citation])
bareloc c = try $ do
@@ -1817,11 +1804,17 @@ normalCite = try $ do
citeKey :: MarkdownParser (Bool, String)
citeKey = try $ do
+ -- make sure we're not right after an alphanumeric,
+ -- since foo@bar.baz is probably an email address
+ lastStrPos <- stateLastStrPos <$> getState
+ pos <- getPosition
+ guard $ lastStrPos /= Just pos
suppress_author <- option False (char '-' >> return True)
char '@'
- first <- letter
- let internal p = try $ p >>~ lookAhead (letter <|> digit)
- rest <- many $ letter <|> digit <|> internal (oneOf ":.#$%&-_+?<>~/")
+ first <- letter <|> char '_'
+ let regchar = satisfy (\c -> isAlphaNum c || c == '_')
+ let internal p = try $ p >>~ lookAhead regchar
+ rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/")
let key = first:rest
return (suppress_author, key)
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 2b938cd82..8d8ea0199 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -1,4 +1,5 @@
-{-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7
+{-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances #-}
+-- RelaxedPolyRec needed for inlinesBetween on GHC < 7
{-
Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu>
@@ -43,7 +44,7 @@ import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isCommentTag )
import Text.Pandoc.XML ( fromEntities )
import Text.Pandoc.Parsing hiding ( nested )
import Text.Pandoc.Walk ( walk )
-import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead )
+import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead, stringify, trim )
import Data.Monoid (mconcat, mempty)
import Control.Applicative ((<$>), (<*), (*>), (<$))
import Control.Monad
@@ -51,7 +52,9 @@ import Data.List (intersperse, intercalate, isPrefixOf )
import Text.HTML.TagSoup
import Data.Sequence (viewl, ViewL(..), (<|))
import qualified Data.Foldable as F
+import qualified Data.Map as M
import Data.Char (isDigit, isSpace)
+import Data.Maybe (fromMaybe)
-- | Read mediawiki from an input string and return a Pandoc document.
readMediaWiki :: ReaderOptions -- ^ Reader options
@@ -62,6 +65,8 @@ readMediaWiki opts s =
, mwMaxNestingLevel = 4
, mwNextLinkNumber = 1
, mwCategoryLinks = []
+ , mwHeaderMap = M.empty
+ , mwIdentifierList = []
}
"source" (s ++ "\n") of
Left err' -> error $ "\nError:\n" ++ show err'
@@ -71,10 +76,23 @@ data MWState = MWState { mwOptions :: ReaderOptions
, mwMaxNestingLevel :: Int
, mwNextLinkNumber :: Int
, mwCategoryLinks :: [Inlines]
+ , mwHeaderMap :: M.Map Inlines String
+ , mwIdentifierList :: [String]
}
type MWParser = Parser [Char] MWState
+instance HasReaderOptions MWParser where
+ askReaderOption f = (f . mwOptions) `fmap` getState
+
+instance HasHeaderMap MWParser where
+ getHeaderMap = fmap mwHeaderMap getState
+ putHeaderMap hm = updateState $ \st -> st{ mwHeaderMap = hm }
+
+instance HasIdentifierList MWParser where
+ getIdentifierList = fmap mwIdentifierList getState
+ putIdentifierList l = updateState $ \st -> st{ mwIdentifierList = l }
+
--
-- auxiliary functions
--
@@ -91,7 +109,7 @@ nested p = do
return res
specialChars :: [Char]
-specialChars = "'[]<=&*{}|\""
+specialChars = "'[]<=&*{}|\":\\"
spaceChars :: [Char]
spaceChars = " \n\t"
@@ -187,7 +205,7 @@ table = do
tableStart
styles <- option [] parseAttrs <* blankline
let tableWidth = case lookup "width" styles of
- Just w -> maybe 1.0 id $ parseWidth w
+ Just w -> fromMaybe 1.0 $ parseWidth w
Nothing -> 1.0
caption <- option mempty tableCaption
optional rowsep
@@ -268,7 +286,7 @@ tableCell = try $ do
Just "center" -> AlignCenter
_ -> AlignDefault
let width = case lookup "width" attrs of
- Just xs -> maybe 0.0 id $ parseWidth xs
+ Just xs -> fromMaybe 0.0 $ parseWidth xs
Nothing -> 0.0
return ((align, width), bs)
@@ -351,7 +369,8 @@ header = try $ do
let lev = length eqs
guard $ lev <= 6
contents <- trimInlines . mconcat <$> manyTill inline (count lev $ char '=')
- return $ B.header lev contents
+ attr <- registerHeader nullAttr contents
+ return $ B.headerWith attr lev contents
bulletList :: MWParser Blocks
bulletList = B.bulletList <$>
@@ -369,7 +388,7 @@ orderedList =
spaces
items <- many (listItem '#' <|> li)
optional (htmlTag (~== TagClose "ol"))
- let start = maybe 1 id $ safeRead $ fromAttrib "start" tag
+ let start = fromMaybe 1 $ safeRead $ fromAttrib "start" tag
return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items
definitionList :: MWParser Blocks
@@ -380,8 +399,9 @@ defListItem = try $ do
terms <- mconcat . intersperse B.linebreak <$> many defListTerm
-- we allow dd with no dt, or dt with no dd
defs <- if B.isNull terms
- then many1 $ listItem ':'
- else many $ listItem ':'
+ then notFollowedBy (try $ string ":<math>") *>
+ many1 (listItem ':')
+ else many (listItem ':')
return (terms, defs)
defListTerm :: MWParser Inlines
@@ -462,6 +482,7 @@ inline = whitespace
<|> image
<|> internalLink
<|> externalLink
+ <|> math
<|> inlineTag
<|> B.singleton <$> charRef
<|> inlineHtml
@@ -472,6 +493,16 @@ inline = whitespace
str :: MWParser Inlines
str = B.str <$> many1 (noneOf $ specialChars ++ spaceChars)
+math :: MWParser Inlines
+math = (B.displayMath . trim <$> try (char ':' >> charsInTags "math"))
+ <|> (B.math . trim <$> charsInTags "math")
+ <|> (B.displayMath . trim <$> try (dmStart *> manyTill anyChar dmEnd))
+ <|> (B.math . trim <$> try (mStart *> manyTill (satisfy (/='\n')) mEnd))
+ where dmStart = string "\\["
+ dmEnd = try (string "\\]")
+ mStart = string "\\("
+ mEnd = try (string "\\)")
+
variable :: MWParser String
variable = try $ do
string "{{{"
@@ -495,7 +526,6 @@ inlineTag = do
TagOpen "del" _ -> B.strikeout <$> inlinesInTags "del"
TagOpen "sub" _ -> B.subscript <$> inlinesInTags "sub"
TagOpen "sup" _ -> B.superscript <$> inlinesInTags "sup"
- TagOpen "math" _ -> B.math <$> charsInTags "math"
TagOpen "code" _ -> B.code <$> charsInTags "code"
TagOpen "tt" _ -> B.code <$> charsInTags "tt"
TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> charsInTags "hask"
@@ -528,7 +558,7 @@ image = try $ do
_ <- many (try $ char '|' *> imageOption)
caption <- (B.str fname <$ sym "]]")
<|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]")))
- return $ B.image fname "image" caption
+ return $ B.image fname ("fig:" ++ stringify caption) caption
imageOption :: MWParser String
imageOption =
diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs
index 1f7088f72..6bd617f7e 100644
--- a/src/Text/Pandoc/Readers/TeXMath.hs
+++ b/src/Text/Pandoc/Readers/TeXMath.hs
@@ -27,16 +27,30 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of TeX math to a list of 'Pandoc' inline elements.
-}
-module Text.Pandoc.Readers.TeXMath ( readTeXMath ) where
+module Text.Pandoc.Readers.TeXMath ( readTeXMath, readTeXMath' ) where
import Text.Pandoc.Definition
import Text.TeXMath
-- | Converts a raw TeX math formula to a list of 'Pandoc' inlines.
--- Defaults to raw formula between @$@ characters if entire formula
+-- Defaults to raw formula between @$@ or @$$@ characters if entire formula
-- can't be converted.
+readTeXMath' :: MathType
+ -> String -- ^ String to parse (assumes @'\n'@ line endings)
+ -> [Inline]
+readTeXMath' mt inp = case texMathToPandoc dt inp of
+ Left _ -> [Str (delim ++ inp ++ delim)]
+ Right res -> res
+ where (dt, delim) = case mt of
+ DisplayMath -> (DisplayBlock, "$$")
+ InlineMath -> (DisplayInline, "$")
+
+{-# DEPRECATED readTeXMath "Use readTeXMath' from Text.Pandoc.JSON instead" #-}
+-- | Converts a raw TeX math formula to a list of 'Pandoc' inlines.
+-- Defaults to raw formula between @$@ characters if entire formula
+-- can't be converted. (This is provided for backwards compatibility;
+-- it is better to use @readTeXMath'@, which properly distinguishes
+-- between display and inline math.)
readTeXMath :: String -- ^ String to parse (assumes @'\n'@ line endings)
-> [Inline]
-readTeXMath inp = case texMathToPandoc DisplayInline inp of
- Left _ -> [Str ("$" ++ inp ++ "$")]
- Right res -> res
+readTeXMath = readTeXMath' InlineMath
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 23e07f621..93658cdea 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -594,7 +594,7 @@ surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try bo
simpleInline :: Parser [Char] ParserState t -- ^ surrounding parser
-> ([Inline] -> Inline) -- ^ Inline constructor
-> Parser [Char] ParserState Inline -- ^ content parser (to be used repeatedly)
-simpleInline border construct = surrounded border (inlineWithAttribute) >>=
+simpleInline border construct = surrounded border inlineWithAttribute >>=
return . construct . normalizeSpaces
where inlineWithAttribute = (try $ optional attributes) >> inline