aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/Parsing.hs43
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs235
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs2
-rw-r--r--src/Text/Pandoc/Readers/RST.hs2
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs299
5 files changed, 314 insertions, 267 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 68d4605ee..a9009eaa2 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -997,17 +997,17 @@ registerHeader (ident,classes,kvs) header' = do
failUnlessSmart :: HasReaderOptions st => Parser s st ()
failUnlessSmart = getOption readerSmart >>= guard
-smartPunctuation :: Parser [Char] ParserState Inline
- -> Parser [Char] ParserState Inline
+smartPunctuation :: Parser [Char] ParserState Inlines
+ -> Parser [Char] ParserState Inlines
smartPunctuation inlineParser = do
failUnlessSmart
choice [ quoted inlineParser, apostrophe, dash, ellipses ]
-apostrophe :: Parser [Char] ParserState Inline
-apostrophe = (char '\'' <|> char '\8217') >> return (Str "\x2019")
+apostrophe :: Parser [Char] ParserState Inlines
+apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\x2019")
-quoted :: Parser [Char] ParserState Inline
- -> Parser [Char] ParserState Inline
+quoted :: Parser [Char] ParserState Inlines
+ -> Parser [Char] ParserState Inlines
quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
withQuoteContext :: QuoteContext
@@ -1022,20 +1022,19 @@ withQuoteContext context parser = do
setState newState { stateQuoteContext = oldQuoteContext }
return result
-singleQuoted :: Parser [Char] ParserState Inline
- -> Parser [Char] ParserState Inline
+singleQuoted :: Parser [Char] ParserState Inlines
+ -> Parser [Char] ParserState Inlines
singleQuoted inlineParser = try $ do
singleQuoteStart
withQuoteContext InSingleQuote $ many1Till inlineParser singleQuoteEnd >>=
- return . Quoted SingleQuote . normalizeSpaces
+ return . B.singleQuoted . mconcat
-doubleQuoted :: Parser [Char] ParserState Inline
- -> Parser [Char] ParserState Inline
+doubleQuoted :: Parser [Char] ParserState Inlines
+ -> Parser [Char] ParserState Inlines
doubleQuoted inlineParser = try $ do
doubleQuoteStart
- withQuoteContext InDoubleQuote $ do
- contents <- manyTill inlineParser doubleQuoteEnd
- return . Quoted DoubleQuote . normalizeSpaces $ contents
+ withQuoteContext InDoubleQuote $ manyTill inlineParser doubleQuoteEnd >>=
+ return . B.doubleQuoted . mconcat
failIfInQuoteContext :: QuoteContext -> Parser [tok] ParserState ()
failIfInQuoteContext context = do
@@ -1079,17 +1078,17 @@ doubleQuoteEnd = do
charOrRef "\"\8221\148"
return ()
-ellipses :: Parser [Char] st Inline
+ellipses :: Parser [Char] st Inlines
ellipses = do
try (charOrRef "\8230\133") <|> try (string "..." >> return '…')
- return (Str "\8230")
+ return (B.str "\8230")
-dash :: Parser [Char] ParserState Inline
+dash :: Parser [Char] ParserState Inlines
dash = do
oldDashes <- getOption readerOldDashes
if oldDashes
then emDashOld <|> enDashOld
- else Str `fmap` (hyphenDash <|> emDash <|> enDash)
+ else B.str `fmap` (hyphenDash <|> emDash <|> enDash)
-- Two hyphens = en-dash, three = em-dash
hyphenDash :: Parser [Char] st String
@@ -1107,16 +1106,16 @@ enDash = do
try (charOrRef "\8212\151")
return "\8211"
-enDashOld :: Parser [Char] st Inline
+enDashOld :: Parser [Char] st Inlines
enDashOld = do
try (charOrRef "\8211\150") <|>
try (char '-' >> lookAhead (satisfy isDigit) >> return '–')
- return (Str "\8211")
+ return (B.str "\8211")
-emDashOld :: Parser [Char] st Inline
+emDashOld :: Parser [Char] st Inlines
emDashOld = do
try (charOrRef "\8212\151") <|> (try $ string "--" >> optional (char '-') >> return '-')
- return (Str "\8212")
+ return (B.str "\8212")
-- This is used to prevent exponential blowups for things like:
-- a**a*a**a*a**a*a**a*a**a*a**a*a**
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index d1e4d0024..4fab251bb 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -40,6 +40,7 @@ import Text.HTML.TagSoup
import Text.HTML.TagSoup.Match
import Text.Pandoc.Definition
import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
import Text.Pandoc.Shared
import Text.Pandoc.Options
import Text.Pandoc.Parsing
@@ -48,6 +49,8 @@ import Data.List ( intercalate )
import Data.Char ( isDigit )
import Control.Monad ( liftM, guard, when, mzero )
import Control.Applicative ( (<$>), (<$), (<*) )
+import Data.Monoid
+import Data.Sequence (ViewL(..), ViewR(..), viewr, viewl)
isSpace :: Char -> Bool
isSpace ' ' = True
@@ -66,30 +69,30 @@ readHtml opts inp =
where tags = canonicalizeTags $
parseTagsOptions parseOptions{ optTagPosition = True } inp
parseDoc = do
- blocks <- (fixPlains False . concat) <$> manyTill block eof
+ blocks <- (fixPlains False) . mconcat <$> manyTill block eof
meta <- stateMeta <$> getState
- return $ Pandoc meta blocks
+ return $ Pandoc meta (B.toList blocks)
type TagParser = Parser [Tag String] ParserState
-pBody :: TagParser [Block]
+pBody :: TagParser Blocks
pBody = pInTags "body" block
-pHead :: TagParser [Block]
-pHead = pInTags "head" $ pTitle <|> pMetaTag <|> ([] <$ pAnyTag)
- where pTitle = pInTags "title" inline >>= setTitle . normalizeSpaces
- setTitle t = [] <$ (updateState $ B.setMeta "title" (B.fromList t))
+pHead :: TagParser Blocks
+pHead = pInTags "head" $ pTitle <|> pMetaTag <|> (mempty <$ pAnyTag)
+ where pTitle = pInTags "title" inline >>= setTitle . trimInlines
+ setTitle t = mempty <$ (updateState $ B.setMeta "title" t)
pMetaTag = do
mt <- pSatisfy (~== TagOpen "meta" [])
let name = fromAttrib "name" mt
if null name
- then return []
+ then return mempty
else do
let content = fromAttrib "content" mt
updateState $ B.setMeta name (B.text content)
- return []
+ return mempty
-block :: TagParser [Block]
+block :: TagParser Blocks
block = choice
[ pPara
, pHeader
@@ -105,10 +108,10 @@ block = choice
, pRawHtmlBlock
]
-pList :: TagParser [Block]
+pList :: TagParser Blocks
pList = pBulletList <|> pOrderedList <|> pDefinitionList
-pBulletList :: TagParser [Block]
+pBulletList :: TagParser Blocks
pBulletList = try $ do
pSatisfy (~== TagOpen "ul" [])
let nonItem = pSatisfy (\t ->
@@ -118,9 +121,9 @@ pBulletList = try $ do
-- treat it as a list item, though it's not valid xhtml...
skipMany nonItem
items <- manyTill (pInTags "li" block >>~ skipMany nonItem) (pCloses "ul")
- return [BulletList $ map (fixPlains True) items]
+ return $ B.bulletList $ map (fixPlains True) items
-pOrderedList :: TagParser [Block]
+pOrderedList :: TagParser Blocks
pOrderedList = try $ do
TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" [])
let (start, style) = (sta', sty')
@@ -146,27 +149,27 @@ pOrderedList = try $ do
-- treat it as a list item, though it's not valid xhtml...
skipMany nonItem
items <- manyTill (pInTags "li" block >>~ skipMany nonItem) (pCloses "ol")
- return [OrderedList (start, style, DefaultDelim) $ map (fixPlains True) items]
+ return $ B.orderedListWith (start, style, DefaultDelim) $ map (fixPlains True) items
-pDefinitionList :: TagParser [Block]
+pDefinitionList :: TagParser Blocks
pDefinitionList = try $ do
pSatisfy (~== TagOpen "dl" [])
items <- manyTill pDefListItem (pCloses "dl")
- return [DefinitionList items]
+ return $ B.definitionList items
-pDefListItem :: TagParser ([Inline],[[Block]])
+pDefListItem :: TagParser (Inlines, [Blocks])
pDefListItem = try $ do
let nonItem = pSatisfy (\t -> not (t ~== TagOpen "dt" []) &&
not (t ~== TagOpen "dd" []) && not (t ~== TagClose "dl"))
terms <- many1 (try $ skipMany nonItem >> pInTags "dt" inline)
defs <- many1 (try $ skipMany nonItem >> pInTags "dd" block)
skipMany nonItem
- let term = intercalate [LineBreak] terms
+ let term = foldl1 (\x y -> x <> B.linebreak <> y) terms
return (term, map (fixPlains True) defs)
-fixPlains :: Bool -> [Block] -> [Block]
-fixPlains inList bs = if any isParaish bs
- then map plainToPara bs
+fixPlains :: Bool -> Blocks -> Blocks
+fixPlains inList bs = if any isParaish bs'
+ then B.fromList $ map plainToPara bs'
else bs
where isParaish (Para _) = True
isParaish (CodeBlock _ _) = True
@@ -178,6 +181,7 @@ fixPlains inList bs = if any isParaish bs
isParaish _ = False
plainToPara (Plain xs) = Para xs
plainToPara x = x
+ bs' = B.toList bs
pRawTag :: TagParser String
pRawTag = do
@@ -187,20 +191,20 @@ pRawTag = do
then return []
else return $ renderTags' [tag]
-pDiv :: TagParser [Block]
+pDiv :: TagParser Blocks
pDiv = try $ do
getOption readerParseRaw >>= guard
TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="div") (const True)
contents <- pInTags "div" block
- return [Div (mkAttr attr) contents]
+ return $ B.divWith (mkAttr attr) contents
-pRawHtmlBlock :: TagParser [Block]
+pRawHtmlBlock :: TagParser Blocks
pRawHtmlBlock = do
raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag
parseRaw <- getOption readerParseRaw
if parseRaw && not (null raw)
- then return [RawBlock (Format "html") raw]
- else return []
+ then return $ B.rawBlock "html" raw
+ else return mempty
pHtmlBlock :: String -> TagParser String
pHtmlBlock t = try $ do
@@ -208,35 +212,34 @@ pHtmlBlock t = try $ do
contents <- manyTill pAnyTag (pSatisfy (~== TagClose t))
return $ renderTags' $ [open] ++ contents ++ [TagClose t]
-pHeader :: TagParser [Block]
+pHeader :: TagParser Blocks
pHeader = try $ do
TagOpen tagtype attr <- pSatisfy $
tagOpen (`elem` ["h1","h2","h3","h4","h5","h6"])
(const True)
let bodyTitle = TagOpen tagtype attr ~== TagOpen "h1" [("class","title")]
let level = read (drop 1 tagtype)
- contents <- liftM concat $ manyTill inline (pCloses tagtype <|> eof)
+ contents <- trimInlines . mconcat <$> manyTill inline (pCloses tagtype <|> eof)
let ident = fromMaybe "" $ lookup "id" attr
let classes = maybe [] words $ lookup "class" attr
let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"]
return $ if bodyTitle
- then [] -- skip a representation of the title in the body
- else [Header level (ident, classes, keyvals) $
- normalizeSpaces contents]
+ then mempty -- skip a representation of the title in the body
+ else B.headerWith (ident, classes, keyvals) level contents
-pHrule :: TagParser [Block]
+pHrule :: TagParser Blocks
pHrule = do
pSelfClosing (=="hr") (const True)
- return [HorizontalRule]
+ return B.horizontalRule
-pTable :: TagParser [Block]
+pTable :: TagParser Blocks
pTable = try $ do
TagOpen _ _ <- pSatisfy (~== TagOpen "table" [])
skipMany pBlank
- caption <- option [] $ pInTags "caption" inline >>~ skipMany pBlank
+ caption <- option mempty $ pInTags "caption" inline >>~ skipMany pBlank
-- TODO actually read these and take width information from them
widths' <- pColgroup <|> many pCol
- head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th")
+ head' <- option mempty $ pOptInTag "thead" $ pInTags "tr" (pCell "th")
skipMany pBlank
rows <- pOptInTag "tbody"
$ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td")
@@ -245,19 +248,21 @@ pTable = try $ do
let isSinglePlain [] = True
isSinglePlain [Plain _] = True
isSinglePlain _ = False
- let isSimple = all isSinglePlain $ concat (head':rows)
- let cols = length $ if null head'
- then head rows
- else head'
+ let lHead = B.toList head'
+ let lRows = map B.toList rows
+ let isSimple = all isSinglePlain (lHead:lRows)
+ let cols = length $ if null lHead
+ then head lRows
+ else lHead
-- fail if there are colspans or rowspans
- guard $ all (\r -> length r == cols) rows
+ guard $ all (\r -> length r == cols) lRows
let aligns = replicate cols AlignLeft
let widths = if null widths'
then if isSimple
then replicate cols 0
else replicate cols (1.0 / fromIntegral cols)
else widths'
- return [Table caption aligns widths head' rows]
+ return $ B.table caption (zip aligns widths) [head'] [rows]
pCol :: TagParser Double
pCol = try $ do
@@ -275,31 +280,31 @@ pColgroup = try $ do
skipMany pBlank
manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank
-pCell :: String -> TagParser [TableCell]
+pCell :: String -> TagParser Blocks
pCell celltype = try $ do
skipMany pBlank
res <- pInTags celltype block
skipMany pBlank
- return [res]
+ return res
-pBlockQuote :: TagParser [Block]
+pBlockQuote :: TagParser Blocks
pBlockQuote = do
contents <- pInTags "blockquote" block
- return [BlockQuote $ fixPlains False contents]
+ return $ B.blockQuote $ fixPlains False contents
-pPlain :: TagParser [Block]
+pPlain :: TagParser Blocks
pPlain = do
- contents <- liftM (normalizeSpaces . concat) $ many1 inline
- if null contents
- then return []
- else return [Plain contents]
+ contents <- trimInlines . mconcat <$> many1 inline
+ if B.isNull contents
+ then return mempty
+ else return $ B.plain contents
-pPara :: TagParser [Block]
+pPara :: TagParser Blocks
pPara = do
- contents <- pInTags "p" inline
- return [Para $ normalizeSpaces contents]
+ contents <- trimInlines <$> pInTags "p" inline
+ return $ B.para contents
-pCodeBlock :: TagParser [Block]
+pCodeBlock :: TagParser Blocks
pCodeBlock = try $ do
TagOpen _ attr <- pSatisfy (~== TagOpen "pre" [])
contents <- manyTill pAnyTag (pCloses "pre" <|> eof)
@@ -312,9 +317,9 @@ pCodeBlock = try $ do
let result = case reverse result' of
'\n':_ -> init result'
_ -> result'
- return [CodeBlock (mkAttr attr) result]
+ return $ B.codeBlockWith (mkAttr attr) result
-inline :: TagParser [Inline]
+inline :: TagParser Inlines
inline = choice
[ pTagText
, pQ
@@ -354,7 +359,7 @@ pSelfClosing f g = do
optional $ pSatisfy (tagClose f)
return open
-pQ :: TagParser [Inline]
+pQ :: TagParser Inlines
pQ = do
quoteContext <- stateQuoteContext `fmap` getState
let quoteType = case quoteContext of
@@ -363,82 +368,93 @@ pQ = do
let innerQuoteContext = if quoteType == SingleQuote
then InSingleQuote
else InDoubleQuote
- withQuoteContext innerQuoteContext $ pInlinesInTags "q" (Quoted quoteType)
+ let constructor = case quoteType of
+ SingleQuote -> B.singleQuoted
+ DoubleQuote -> B.doubleQuoted
+ withQuoteContext innerQuoteContext $
+ pInlinesInTags "q" constructor
-pEmph :: TagParser [Inline]
-pEmph = pInlinesInTags "em" Emph <|> pInlinesInTags "i" Emph
+pEmph :: TagParser Inlines
+pEmph = pInlinesInTags "em" B.emph <|> pInlinesInTags "i" B.emph
-pStrong :: TagParser [Inline]
-pStrong = pInlinesInTags "strong" Strong <|> pInlinesInTags "b" Strong
+pStrong :: TagParser Inlines
+pStrong = pInlinesInTags "strong" B.strong <|> pInlinesInTags "b" B.strong
-pSuperscript :: TagParser [Inline]
-pSuperscript = pInlinesInTags "sup" Superscript
+pSuperscript :: TagParser Inlines
+pSuperscript = pInlinesInTags "sup" B.superscript
-pSubscript :: TagParser [Inline]
-pSubscript = pInlinesInTags "sub" Subscript
+pSubscript :: TagParser Inlines
+pSubscript = pInlinesInTags "sub" B.subscript
-pStrikeout :: TagParser [Inline]
+pStrikeout :: TagParser Inlines
pStrikeout = do
- pInlinesInTags "s" Strikeout <|>
- pInlinesInTags "strike" Strikeout <|>
- pInlinesInTags "del" Strikeout <|>
+ pInlinesInTags "s" B.strikeout <|>
+ pInlinesInTags "strike" B.strikeout <|>
+ pInlinesInTags "del" B.strikeout <|>
try (do pSatisfy (~== TagOpen "span" [("class","strikeout")])
- contents <- liftM concat $ manyTill inline (pCloses "span")
- return [Strikeout contents])
+ contents <- mconcat <$> manyTill inline (pCloses "span")
+ return $ B.strikeout contents)
-pLineBreak :: TagParser [Inline]
+pLineBreak :: TagParser Inlines
pLineBreak = do
pSelfClosing (=="br") (const True)
- return [LineBreak]
+ return B.linebreak
-pLink :: TagParser [Inline]
+pLink :: TagParser Inlines
pLink = try $ do
tag <- pSatisfy (tagOpenLit "a" (isJust . lookup "href"))
let url = fromAttrib "href" tag
let title = fromAttrib "title" tag
- lab <- liftM concat $ manyTill inline (pCloses "a")
- return [Link (normalizeSpaces lab) (escapeURI url, title)]
+ lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a")
+ return $ B.link (escapeURI url) title lab
-pImage :: TagParser [Inline]
+pImage :: TagParser Inlines
pImage = do
tag <- pSelfClosing (=="img") (isJust . lookup "src")
let url = fromAttrib "src" tag
let title = fromAttrib "title" tag
let alt = fromAttrib "alt" tag
- return [Image (B.toList $ B.text alt) (escapeURI url, title)]
+ return $ B.image (escapeURI url) title (B.text alt)
-pCode :: TagParser [Inline]
+pCode :: TagParser Inlines
pCode = try $ do
(TagOpen open attr) <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True)
result <- manyTill pAnyTag (pCloses open)
- return [Code (mkAttr attr) $ intercalate " " $ lines $ innerText result]
+ return $ B.codeWith (mkAttr attr) $ intercalate " " $ lines $ innerText result
-pSpan :: TagParser [Inline]
+pSpan :: TagParser Inlines
pSpan = try $ do
getOption readerParseRaw >>= guard
TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True)
contents <- pInTags "span" inline
- return [Span (mkAttr attr) contents]
+ return $ B.spanWith (mkAttr attr) contents
-pRawHtmlInline :: TagParser [Inline]
+pRawHtmlInline :: TagParser Inlines
pRawHtmlInline = do
result <- pSatisfy (tagComment (const True)) <|> pSatisfy isInlineTag
parseRaw <- getOption readerParseRaw
if parseRaw
- then return [RawInline (Format "html") $ renderTags' [result]]
- else return []
+ then return $ B.rawInline "html" $ renderTags' [result]
+ else return mempty
-pInlinesInTags :: String -> ([Inline] -> Inline)
- -> TagParser [Inline]
+pInlinesInTags :: String -> (Inlines -> Inlines)
+ -> TagParser Inlines
pInlinesInTags tagtype f = do
- contents <- pInTags tagtype inline
- return [f $ normalizeSpaces contents]
-
-pInTags :: String -> TagParser [a]
- -> TagParser [a]
+ contents <- B.unMany <$> pInTags tagtype inline
+ let left = case viewl contents of
+ EmptyL -> mempty
+ (a :< _) -> padSpace a
+ let right = case viewr contents of
+ EmptyR -> mempty
+ (_ :> a) -> padSpace a
+ return (left <> f (trimInlines . B.Many $ contents) <> right)
+ where padSpace a = if a == Space then B.space else mempty
+
+pInTags :: (Monoid a) => String -> TagParser a
+ -> TagParser a
pInTags tagtype parser = try $ do
pSatisfy (~== TagOpen tagtype [])
- liftM concat $ manyTill parser (pCloses tagtype <|> eof)
+ mconcat <$> manyTill parser (pCloses tagtype <|> eof)
pOptInTag :: String -> TagParser a
-> TagParser a
@@ -461,36 +477,36 @@ pCloses tagtype = try $ do
(TagClose "dl") | tagtype == "li" -> return ()
_ -> mzero
-pTagText :: TagParser [Inline]
+pTagText :: TagParser Inlines
pTagText = try $ do
(TagText str) <- pSatisfy isTagText
st <- getState
case runParser (many pTagContents) st "text" str of
Left _ -> fail $ "Could not parse `" ++ str ++ "'"
- Right result -> return result
+ Right result -> return $ mconcat result
pBlank :: TagParser ()
pBlank = try $ do
(TagText str) <- pSatisfy isTagText
guard $ all isSpace str
-pTagContents :: Parser [Char] ParserState Inline
+pTagContents :: Parser [Char] ParserState Inlines
pTagContents =
- Math DisplayMath `fmap` mathDisplay
- <|> Math InlineMath `fmap` mathInline
+ B.displayMath <$> mathDisplay
+ <|> B.math <$> mathInline
<|> pStr
<|> pSpace
<|> smartPunctuation pTagContents
<|> pSymbol
<|> pBad
-pStr :: Parser [Char] ParserState Inline
+pStr :: Parser [Char] ParserState Inlines
pStr = do
result <- many1 $ satisfy $ \c ->
not (isSpace c) && not (isSpecial c) && not (isBad c)
pos <- getPosition
updateState $ \s -> s{ stateLastStrPos = Just pos }
- return $ Str result
+ return $ B.str result
isSpecial :: Char -> Bool
isSpecial '"' = True
@@ -504,13 +520,13 @@ isSpecial '\8220' = True
isSpecial '\8221' = True
isSpecial _ = False
-pSymbol :: Parser [Char] ParserState Inline
-pSymbol = satisfy isSpecial >>= return . Str . (:[])
+pSymbol :: Parser [Char] ParserState Inlines
+pSymbol = satisfy isSpecial >>= return . B.str . (:[])
isBad :: Char -> Bool
isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML
-pBad :: Parser [Char] ParserState Inline
+pBad :: Parser [Char] ParserState Inlines
pBad = do
c <- satisfy isBad
let c' = case c of
@@ -542,10 +558,10 @@ pBad = do
'\158' -> '\382'
'\159' -> '\376'
_ -> '?'
- return $ Str [c']
+ return $ B.str [c']
-pSpace :: Parser [Char] ParserState Inline
-pSpace = many1 (satisfy isSpace) >> return Space
+pSpace :: Parser [Char] ParserState Inlines
+pSpace = many1 (satisfy isSpace) >> return B.space
--
-- Constants
@@ -679,3 +695,4 @@ mkAttr attr = (attribsId, attribsClasses, attribsKV)
attribsClasses = words $ fromMaybe "" $ lookup "class" attr
attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr
+
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index aa0252266..57e1ca560 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1873,7 +1873,7 @@ smart :: MarkdownParser (F Inlines)
smart = do
getOption readerSmart >>= guard
doubleQuoted <|> singleQuoted <|>
- choice (map (return . B.singleton <$>) [apostrophe, dash, ellipses])
+ choice (map (return <$>) [apostrophe, dash, ellipses])
singleQuoted :: MarkdownParser (F Inlines)
singleQuoted = try $ do
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 127eae167..a574f343a 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -1140,7 +1140,7 @@ smart :: RSTParser Inlines
smart = do
getOption readerSmart >>= guard
doubleQuoted <|> singleQuoted <|>
- choice (map (B.singleton <$>) [apostrophe, dash, ellipses])
+ choice [apostrophe, dash, ellipses]
singleQuoted :: RSTParser Inlines
singleQuoted = try $ do
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 93658cdea..c6f992275 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -50,20 +50,20 @@ TODO : refactor common patterns across readers :
module Text.Pandoc.Readers.Textile ( readTextile) where
-
import Text.Pandoc.Definition
+import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Shared
import Text.Pandoc.Options
-import Text.Pandoc.Parsing
+import Text.Pandoc.Parsing
import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag )
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..))
import Text.HTML.TagSoup.Match
import Data.List ( intercalate )
-import Data.Char ( digitToInt, isUpper )
+import Data.Char ( digitToInt, isUpper)
import Control.Monad ( guard, liftM )
import Control.Applicative ((<$>), (*>), (<*))
+import Data.Monoid
-- | Parse a Textile text and return a Pandoc document.
readTextile :: ReaderOptions -- ^ Reader options
@@ -95,7 +95,7 @@ parseTextile = do
updateState $ \s -> s { stateNotes = reverse reversedNotes }
-- now parse it for real...
blocks <- parseBlocks
- return $ Pandoc nullMeta blocks -- FIXME
+ return $ Pandoc nullMeta (B.toList blocks) -- FIXME
noteMarker :: Parser [Char] ParserState [Char]
noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.')
@@ -115,11 +115,11 @@ noteBlock = try $ do
return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
-- | Parse document blocks
-parseBlocks :: Parser [Char] ParserState [Block]
-parseBlocks = manyTill block eof
+parseBlocks :: Parser [Char] ParserState Blocks
+parseBlocks = mconcat <$> manyTill block eof
-- | Block parsers list tried in definition order
-blockParsers :: [Parser [Char] ParserState Block]
+blockParsers :: [Parser [Char] ParserState Blocks]
blockParsers = [ codeBlock
, header
, blockQuote
@@ -130,29 +130,32 @@ blockParsers = [ codeBlock
, rawLaTeXBlock'
, maybeExplicitBlock "table" table
, maybeExplicitBlock "p" para
+ , endBlock
]
+endBlock :: Parser [Char] ParserState Blocks
+endBlock = string "\n\n" >> return mempty
-- | Any block in the order of definition of blockParsers
-block :: Parser [Char] ParserState Block
+block :: Parser [Char] ParserState Blocks
block = choice blockParsers <?> "block"
-commentBlock :: Parser [Char] ParserState Block
+commentBlock :: Parser [Char] ParserState Blocks
commentBlock = try $ do
string "###."
manyTill anyLine blanklines
- return Null
+ return mempty
-codeBlock :: Parser [Char] ParserState Block
+codeBlock :: Parser [Char] ParserState Blocks
codeBlock = codeBlockBc <|> codeBlockPre
-codeBlockBc :: Parser [Char] ParserState Block
+codeBlockBc :: Parser [Char] ParserState Blocks
codeBlockBc = try $ do
string "bc. "
contents <- manyTill anyLine blanklines
- return $ CodeBlock ("",[],[]) $ unlines contents
+ return $ B.codeBlock (unlines contents)
-- | Code Blocks in Textile are between <pre> and </pre>
-codeBlockPre :: Parser [Char] ParserState Block
+codeBlockPre :: Parser [Char] ParserState Blocks
codeBlockPre = try $ do
(t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True))
result' <- (innerText . parseTags) `fmap` -- remove internal tags
@@ -169,29 +172,29 @@ codeBlockPre = try $ do
let classes = words $ fromAttrib "class" t
let ident = fromAttrib "id" t
let kvs = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
- return $ CodeBlock (ident,classes,kvs) result'''
+ return $ B.codeBlockWith (ident,classes,kvs) result'''
-- | Header of the form "hN. content" with N in 1..6
-header :: Parser [Char] ParserState Block
+header :: Parser [Char] ParserState Blocks
header = try $ do
char 'h'
level <- digitToInt <$> oneOf "123456"
attr <- attributes
char '.'
- whitespace
- name <- normalizeSpaces <$> manyTill inline blockBreak
- attr' <- registerHeader attr (B.fromList name)
- return $ Header level attr' name
+ lookAhead whitespace
+ name <- trimInlines . mconcat <$> manyTill inline blockBreak
+ attr' <- registerHeader attr name
+ return $ B.headerWith attr' level name
-- | Blockquote of the form "bq. content"
-blockQuote :: Parser [Char] ParserState Block
+blockQuote :: Parser [Char] ParserState Blocks
blockQuote = try $ do
string "bq" >> attributes >> char '.' >> whitespace
- BlockQuote . singleton <$> para
+ B.blockQuote <$> para
-- Horizontal rule
-hrule :: Parser [Char] st Block
+hrule :: Parser [Char] st Blocks
hrule = try $ do
skipSpaces
start <- oneOf "-*"
@@ -199,62 +202,62 @@ hrule = try $ do
skipMany (spaceChar <|> char start)
newline
optional blanklines
- return HorizontalRule
+ return B.horizontalRule
-- Lists handling
-- | Can be a bullet list or an ordered list. This implementation is
-- strict in the nesting, sublist must start at exactly "parent depth
-- plus one"
-anyList :: Parser [Char] ParserState Block
+anyList :: Parser [Char] ParserState Blocks
anyList = try $ anyListAtDepth 1 <* blanklines
-- | This allow one type of list to be nested into an other type,
-- provided correct nesting
-anyListAtDepth :: Int -> Parser [Char] ParserState Block
+anyListAtDepth :: Int -> Parser [Char] ParserState Blocks
anyListAtDepth depth = choice [ bulletListAtDepth depth,
orderedListAtDepth depth,
definitionList ]
-- | Bullet List of given depth, depth being the number of leading '*'
-bulletListAtDepth :: Int -> Parser [Char] ParserState Block
-bulletListAtDepth depth = try $ BulletList <$> many1 (bulletListItemAtDepth depth)
+bulletListAtDepth :: Int -> Parser [Char] ParserState Blocks
+bulletListAtDepth depth = try $ B.bulletList <$> many1 (bulletListItemAtDepth depth)
-- | Bullet List Item of given depth, depth being the number of
-- leading '*'
-bulletListItemAtDepth :: Int -> Parser [Char] ParserState [Block]
+bulletListItemAtDepth :: Int -> Parser [Char] ParserState Blocks
bulletListItemAtDepth = genericListItemAtDepth '*'
-- | Ordered List of given depth, depth being the number of
-- leading '#'
-orderedListAtDepth :: Int -> Parser [Char] ParserState Block
+orderedListAtDepth :: Int -> Parser [Char] ParserState Blocks
orderedListAtDepth depth = try $ do
items <- many1 (orderedListItemAtDepth depth)
- return (OrderedList (1, DefaultStyle, DefaultDelim) items)
+ return $ B.orderedList items
-- | Ordered List Item of given depth, depth being the number of
-- leading '#'
-orderedListItemAtDepth :: Int -> Parser [Char] ParserState [Block]
+orderedListItemAtDepth :: Int -> Parser [Char] ParserState Blocks
orderedListItemAtDepth = genericListItemAtDepth '#'
-- | Common implementation of list items
-genericListItemAtDepth :: Char -> Int -> Parser [Char] ParserState [Block]
+genericListItemAtDepth :: Char -> Int -> Parser [Char] ParserState Blocks
genericListItemAtDepth c depth = try $ do
count depth (char c) >> attributes >> whitespace
- p <- many listInline
+ p <- mconcat <$> many listInline
newline
- sublist <- option [] (singleton <$> anyListAtDepth (depth + 1))
- return (Plain p : sublist)
+ sublist <- option mempty (anyListAtDepth (depth + 1))
+ return $ (B.plain p) <> sublist
-- | A definition list is a set of consecutive definition items
-definitionList :: Parser [Char] ParserState Block
-definitionList = try $ DefinitionList <$> many1 definitionListItem
+definitionList :: Parser [Char] ParserState Blocks
+definitionList = try $ B.definitionList <$> many1 definitionListItem
-- | List start character.
listStart :: Parser [Char] st Char
listStart = oneOf "*#-"
-listInline :: Parser [Char] ParserState Inline
+listInline :: Parser [Char] ParserState Inlines
listInline = try (notFollowedBy newline >> inline)
<|> try (endline <* notFollowedBy listStart)
@@ -262,16 +265,16 @@ listInline = try (notFollowedBy newline >> inline)
-- the term defined, then spaces and ":=". The definition follows, on
-- the same single line, or spaned on multiple line, after a line
-- break.
-definitionListItem :: Parser [Char] ParserState ([Inline], [[Block]])
+definitionListItem :: Parser [Char] ParserState (Inlines, [Blocks])
definitionListItem = try $ do
string "- "
- term <- many1Till inline (try (whitespace >> string ":="))
+ term <- mconcat <$> many1Till inline (try (whitespace >> string ":="))
def' <- multilineDef <|> inlineDef
return (term, def')
- where inlineDef :: Parser [Char] ParserState [[Block]]
- inlineDef = liftM (\d -> [[Plain d]])
- $ optional whitespace >> many listInline <* newline
- multilineDef :: Parser [Char] ParserState [[Block]]
+ where inlineDef :: Parser [Char] ParserState [Blocks]
+ inlineDef = liftM (\d -> [B.plain d])
+ $ optional whitespace >> (trimInlines . mconcat <$> many listInline) <* newline
+ multilineDef :: Parser [Char] ParserState [Blocks]
multilineDef = try $ do
optional whitespace >> newline
s <- many1Till anyChar (try (string "=:" >> newline))
@@ -288,59 +291,57 @@ blockBreak = try (newline >> blanklines >> return ()) <|>
-- raw content
-- | A raw Html Block, optionally followed by blanklines
-rawHtmlBlock :: Parser [Char] ParserState Block
+rawHtmlBlock :: Parser [Char] ParserState Blocks
rawHtmlBlock = try $ do
(_,b) <- htmlTag isBlockTag
optional blanklines
- return $ RawBlock (Format "html") b
+ return $ B.rawBlock "html" b
-- | Raw block of LaTeX content
-rawLaTeXBlock' :: Parser [Char] ParserState Block
+rawLaTeXBlock' :: Parser [Char] ParserState Blocks
rawLaTeXBlock' = do
guardEnabled Ext_raw_tex
- RawBlock (Format "latex") <$> (rawLaTeXBlock <* spaces)
+ B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces)
-- | In textile, paragraphs are separated by blank lines.
-para :: Parser [Char] ParserState Block
-para = try $ Para . normalizeSpaces <$> manyTill inline blockBreak
-
+para :: Parser [Char] ParserState Blocks
+para = B.para . trimInlines . mconcat <$> manyTill inline blockBreak
-- Tables
-- | A table cell spans until a pipe |
-tableCell :: Parser [Char] ParserState TableCell
+tableCell :: Parser [Char] ParserState Blocks
tableCell = do
c <- many1 (noneOf "|\n")
- content <- parseFromString (many1 inline) c
- return $ [ Plain $ normalizeSpaces content ]
+ content <- trimInlines . mconcat <$> parseFromString (many1 inline) c
+ return $ B.plain content
-- | A table row is made of many table cells
-tableRow :: Parser [Char] ParserState [TableCell]
+tableRow :: Parser [Char] ParserState [Blocks]
tableRow = try $ ( char '|' *>
(endBy1 tableCell (optional blankline *> char '|')) <* newline)
-- | Many table rows
-tableRows :: Parser [Char] ParserState [[TableCell]]
+tableRows :: Parser [Char] ParserState [[Blocks]]
tableRows = many1 tableRow
-- | Table headers are made of cells separated by a tag "|_."
-tableHeaders :: Parser [Char] ParserState [TableCell]
+tableHeaders :: Parser [Char] ParserState [Blocks]
tableHeaders = let separator = (try $ string "|_.") in
try $ ( separator *> (sepBy1 tableCell separator) <* char '|' <* newline )
-- | A table with an optional header. Current implementation can
-- handle tables with and without header, but will parse cells
-- alignment attributes as content.
-table :: Parser [Char] ParserState Block
+table :: Parser [Char] ParserState Blocks
table = try $ do
- headers <- option [] tableHeaders
+ headers <- option mempty tableHeaders
rows <- tableRows
blanklines
let nbOfCols = max (length headers) (length $ head rows)
- return $ Table []
- (replicate nbOfCols AlignDefault)
- (replicate nbOfCols 0.0)
+ return $ B.table mempty
+ (zip (replicate nbOfCols AlignDefault) (replicate nbOfCols 0.0))
headers
rows
@@ -348,8 +349,8 @@ table = try $ do
-- | Blocks like 'p' and 'table' do not need explicit block tag.
-- However, they can be used to set HTML/CSS attributes when needed.
maybeExplicitBlock :: String -- ^ block tag name
- -> Parser [Char] ParserState Block -- ^ implicit block
- -> Parser [Char] ParserState Block
+ -> Parser [Char] ParserState Blocks -- ^ implicit block
+ -> Parser [Char] ParserState Blocks
maybeExplicitBlock name blk = try $ do
optional $ try $ string name >> attributes >> char '.' >>
optional whitespace >> optional endline
@@ -363,12 +364,14 @@ maybeExplicitBlock name blk = try $ do
-- | Any inline element
-inline :: Parser [Char] ParserState Inline
-inline = choice inlineParsers <?> "inline"
+inline :: Parser [Char] ParserState Inlines
+inline = do
+ choice inlineParsers <?> "inline"
-- | Inline parsers tried in order
-inlineParsers :: [Parser [Char] ParserState Inline]
-inlineParsers = [ str
+inlineParsers :: [Parser [Char] ParserState Inlines]
+inlineParsers = [ inlineMarkup
+ , str
, whitespace
, endline
, code
@@ -378,58 +381,57 @@ inlineParsers = [ str
, rawLaTeXInline'
, note
, try $ (char '[' *> inlineMarkup <* char ']')
- , inlineMarkup
, link
, image
, mark
- , (Str . (:[])) <$> characterReference
+ , (B.str . (:[])) <$> characterReference
, smartPunctuation inline
, symbol
]
-- | Inline markups
-inlineMarkup :: Parser [Char] ParserState Inline
-inlineMarkup = choice [ simpleInline (string "??") (Cite [])
- , simpleInline (string "**") Strong
- , simpleInline (string "__") Emph
- , simpleInline (char '*') Strong
- , simpleInline (char '_') Emph
- , simpleInline (char '+') Emph -- approximates underline
- , simpleInline (char '-' <* notFollowedBy (char '-')) Strikeout
- , simpleInline (char '^') Superscript
- , simpleInline (char '~') Subscript
+inlineMarkup :: Parser [Char] ParserState Inlines
+inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
+ , simpleInline (string "**") B.strong
+ , simpleInline (string "__") B.emph
+ , simpleInline (char '*') B.strong
+ , simpleInline (char '_') B.emph
+ , simpleInline (char '+') B.emph -- approximates underline
+ , simpleInline (char '-' <* notFollowedBy (char '-')) B.strikeout
+ , simpleInline (char '^') B.superscript
+ , simpleInline (char '~') B.subscript
]
-- | Trademark, registered, copyright
-mark :: Parser [Char] st Inline
+mark :: Parser [Char] st Inlines
mark = try $ char '(' >> (try tm <|> try reg <|> copy)
-reg :: Parser [Char] st Inline
+reg :: Parser [Char] st Inlines
reg = do
oneOf "Rr"
char ')'
- return $ Str "\174"
+ return $ B.str "\174"
-tm :: Parser [Char] st Inline
+tm :: Parser [Char] st Inlines
tm = do
oneOf "Tt"
oneOf "Mm"
char ')'
- return $ Str "\8482"
+ return $ B.str "\8482"
-copy :: Parser [Char] st Inline
+copy :: Parser [Char] st Inlines
copy = do
oneOf "Cc"
char ')'
- return $ Str "\169"
+ return $ B.str "\169"
-note :: Parser [Char] ParserState Inline
+note :: Parser [Char] ParserState Inlines
note = try $ do
ref <- (char '[' *> many1 digit <* char ']')
notes <- stateNotes <$> getState
case lookup ref notes of
Nothing -> fail "note not found"
- Just raw -> liftM Note $ parseFromString parseBlocks raw
+ Just raw -> B.note <$> parseFromString parseBlocks raw
-- | Special chars
markupChars :: [Char]
@@ -450,7 +452,7 @@ wordBoundaries = markupChars ++ stringBreakers
hyphenedWords :: Parser [Char] ParserState String
hyphenedWords = do
x <- wordChunk
- xs <- many (try $ char '-' >> wordChunk)
+ xs <- many (try $ char '-' >> wordChunk)
return $ intercalate "-" (x:xs)
wordChunk :: Parser [Char] ParserState String
@@ -462,7 +464,7 @@ wordChunk = try $ do
return $ hd:tl
-- | Any string
-str :: Parser [Char] ParserState Inline
+str :: Parser [Char] ParserState Inlines
str = do
baseStr <- hyphenedWords
-- RedCloth compliance : if parsed word is uppercase and immediatly
@@ -472,89 +474,89 @@ str = do
acro <- enclosed (char '(') (char ')') anyChar
return $ concat [baseStr, " (", acro, ")"]
updateLastStrPos
- return $ Str fullStr
+ return $ B.str fullStr
-- | Textile allows HTML span infos, we discard them
-htmlSpan :: Parser [Char] ParserState Inline
-htmlSpan = try $ Str <$> ( char '%' *> attributes *> manyTill anyChar (char '%') )
+htmlSpan :: Parser [Char] ParserState Inlines
+htmlSpan = try $ B.str <$> ( char '%' *> attributes *> manyTill anyChar (char '%') )
-- | Some number of space chars
-whitespace :: Parser [Char] ParserState Inline
-whitespace = many1 spaceChar >> return Space <?> "whitespace"
+whitespace :: Parser [Char] ParserState Inlines
+whitespace = many1 spaceChar >> return B.space <?> "whitespace"
-- | In Textile, an isolated endline character is a line break
-endline :: Parser [Char] ParserState Inline
+endline :: Parser [Char] ParserState Inlines
endline = try $ do
newline >> notFollowedBy blankline
- return LineBreak
+ return B.linebreak
-rawHtmlInline :: Parser [Char] ParserState Inline
-rawHtmlInline = RawInline (Format "html") . snd <$> htmlTag isInlineTag
+rawHtmlInline :: Parser [Char] ParserState Inlines
+rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
-- | Raw LaTeX Inline
-rawLaTeXInline' :: Parser [Char] ParserState Inline
+rawLaTeXInline' :: Parser [Char] ParserState Inlines
rawLaTeXInline' = try $ do
guardEnabled Ext_raw_tex
- rawLaTeXInline
+ B.singleton <$> rawLaTeXInline
-- | Textile standard link syntax is "label":target. But we
-- can also have ["label":target].
-link :: Parser [Char] ParserState Inline
+link :: Parser [Char] ParserState Inlines
link = linkB <|> linkNoB
-linkNoB :: Parser [Char] ParserState Inline
+linkNoB :: Parser [Char] ParserState Inlines
linkNoB = try $ do
- name <- surrounded (char '"') inline
+ name <- mconcat <$> surrounded (char '"') (withQuoteContext InDoubleQuote inline)
char ':'
let stopChars = "!.,;:"
url <- manyTill nonspaceChar (lookAhead $ space <|> try (oneOf stopChars >> (space <|> newline)))
- let name' = if name == [Str "$"] then [Str url] else name
- return $ Link name' (url, "")
+ let name' = if B.toList name == [Str "$"] then B.str url else name
+ return $ B.link url "" name'
-linkB :: Parser [Char] ParserState Inline
+linkB :: Parser [Char] ParserState Inlines
linkB = try $ do
char '['
- name <- surrounded (char '"') inline
+ name <- mconcat <$> surrounded (char '"') inline
char ':'
url <- manyTill nonspaceChar (char ']')
- let name' = if name == [Str "$"] then [Str url] else name
- return $ Link name' (url, "")
+ let name' = if B.toList name == [Str "$"] then B.str url else name
+ return $ B.link url "" name'
-- | image embedding
-image :: Parser [Char] ParserState Inline
+image :: Parser [Char] ParserState Inlines
image = try $ do
char '!' >> notFollowedBy space
src <- manyTill anyChar (lookAhead $ oneOf "!(")
alt <- option "" (try $ (char '(' >> manyTill anyChar (char ')')))
char '!'
- return $ Image [Str alt] (src, alt)
+ return $ B.image src alt (B.str alt)
-escapedInline :: Parser [Char] ParserState Inline
+escapedInline :: Parser [Char] ParserState Inlines
escapedInline = escapedEqs <|> escapedTag
-escapedEqs :: Parser [Char] ParserState Inline
-escapedEqs = Str <$> (try $ string "==" *> manyTill anyChar (try $ string "=="))
+escapedEqs :: Parser [Char] ParserState Inlines
+escapedEqs = B.str <$> (try $ string "==" *> manyTill anyChar (try $ string "=="))
-- | literal text escaped btw <notextile> tags
-escapedTag :: Parser [Char] ParserState Inline
-escapedTag = Str <$>
+escapedTag :: Parser [Char] ParserState Inlines
+escapedTag = B.str <$>
(try $ string "<notextile>" *> manyTill anyChar (try $ string "</notextile>"))
-- | Any special symbol defined in wordBoundaries
-symbol :: Parser [Char] ParserState Inline
-symbol = Str . singleton <$> (oneOf wordBoundaries <|> oneOf markupChars)
+symbol :: Parser [Char] ParserState Inlines
+symbol = B.str . singleton <$> (oneOf wordBoundaries <|> oneOf markupChars)
-- | Inline code
-code :: Parser [Char] ParserState Inline
+code :: Parser [Char] ParserState Inlines
code = code1 <|> code2
-code1 :: Parser [Char] ParserState Inline
-code1 = Code nullAttr <$> surrounded (char '@') anyChar
+code1 :: Parser [Char] ParserState Inlines
+code1 = B.code <$> surrounded (char '@') anyChar
-code2 :: Parser [Char] ParserState Inline
+code2 :: Parser [Char] ParserState Inlines
code2 = do
htmlTag (tagOpen (=="tt") null)
- Code nullAttr <$> manyTill anyChar (try $ htmlTag $ tagClose (=="tt"))
+ B.code <$> manyTill anyChar (try $ htmlTag $ tagClose (=="tt"))
-- | Html / CSS attributes
attributes :: Parser [Char] ParserState Attr
@@ -581,7 +583,7 @@ styleAttr = do
langAttr :: Parser [Char] ParserState (Attr -> Attr)
langAttr = do
- lang <- try $ enclosed (char '[') (char ']') anyChar
+ lang <- try $ enclosed (char '[') (char ']') alphaNum
return $ \(id',classes,keyvals) -> (id',classes,("lang",lang):keyvals)
-- | Parses material surrounded by a parser.
@@ -590,14 +592,43 @@ surrounded :: Parser [Char] st t -- ^ surrounding parser
-> Parser [Char] st [a]
surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border)
--- | Inlines are most of the time of the same form
+
simpleInline :: Parser [Char] ParserState t -- ^ surrounding parser
- -> ([Inline] -> Inline) -- ^ Inline constructor
- -> Parser [Char] ParserState Inline -- ^ content parser (to be used repeatedly)
-simpleInline border construct = surrounded border inlineWithAttribute >>=
- return . construct . normalizeSpaces
- where inlineWithAttribute = (try $ optional attributes) >> inline
+ -> (Inlines -> Inlines) -- ^ Inline constructor
+ -> Parser [Char] ParserState Inlines -- ^ content parser (to be used repeatedly)
+simpleInline border construct = groupedSimpleInline border construct <|> ungroupedSimpleInline border construct
+
+ungroupedSimpleInline :: Parser [Char] ParserState t -- ^ surrounding parser
+ -> (Inlines -> Inlines) -- ^ Inline constructor
+ -> Parser [Char] ParserState Inlines -- ^ content parser (to be used repeatedly)
+ungroupedSimpleInline border construct = try $ do
+ st <- getState
+ pos <- getPosition
+ isWhitespace <- option False (whitespace >> return True)
+ guard $ (stateQuoteContext st /= NoQuote)
+ || (sourceColumn pos == 1)
+ || isWhitespace
+ body <- surrounded border inlineWithAttribute
+ lookAhead (notFollowedBy alphaNum)
+ let result = construct $ mconcat body
+ return $ if isWhitespace then B.space <> result
+ else result
+ where
+ inlineWithAttribute = (try $ optional attributes) >> notFollowedBy (string "\n\n")
+ >> (withQuoteContext InSingleQuote inline)
+
+
+groupedSimpleInline :: Parser [Char] ParserState t
+ -> (Inlines -> Inlines)
+ -> Parser [Char] ParserState Inlines
+groupedSimpleInline border construct = try $ do
+ char '['
+ withQuoteContext InSingleQuote (simpleInline border construct) >>~ char ']'
+
+
+
-- | Create a singleton list
singleton :: a -> [a]
singleton x = [x]
+