From 0ccca94b4cb22e846289c004deaa023245e14981 Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Tue, 25 Mar 2014 18:22:23 +0000 Subject: Bugfix for #1175 and convert textile reader to use builder. The reader did not correctly parse inline markup. The behavoir is now as follows. (a) The markup must start at the start of a line, be inside previous inline markup or be preceeded by whitespace. (b) The markup can not span across paragraphs (delimited by \n\n) (c) The markup can not be followed by a alphanumeric character. (d) Square brackets can be placed around the markup to avoid having to have white space before it. In order to make these changes it was either necessary to convert the parser to return a list of inlines or to convert the whole reader to use the builder. The latter approach whilst more work makes a bit more sense as it becomes easy to arbitarily append and prepend elements without changing the type. Tests are accordingly updated in a later commit to reflect the different normalisation behavoir specified by the builder monoid. --- src/Text/Pandoc/Readers/Textile.hs | 301 ++++++++++++++++++++----------------- 1 file changed, 167 insertions(+), 134 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 93658cdea..ede50c6de 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -50,20 +50,20 @@ TODO : refactor common patterns across readers : module Text.Pandoc.Readers.Textile ( readTextile) where - import Text.Pandoc.Definition +import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Shared import Text.Pandoc.Options -import Text.Pandoc.Parsing +import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag ) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..)) import Text.HTML.TagSoup.Match import Data.List ( intercalate ) -import Data.Char ( digitToInt, isUpper ) +import Data.Char ( digitToInt, isUpper) import Control.Monad ( guard, liftM ) import Control.Applicative ((<$>), (*>), (<*)) +import Data.Monoid -- | Parse a Textile text and return a Pandoc document. readTextile :: ReaderOptions -- ^ Reader options @@ -95,7 +95,7 @@ parseTextile = do updateState $ \s -> s { stateNotes = reverse reversedNotes } -- now parse it for real... blocks <- parseBlocks - return $ Pandoc nullMeta blocks -- FIXME + return $ Pandoc nullMeta (B.toList blocks) -- FIXME noteMarker :: Parser [Char] ParserState [Char] noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.') @@ -115,11 +115,11 @@ noteBlock = try $ do return $ replicate (sourceLine endPos - sourceLine startPos) '\n' -- | Parse document blocks -parseBlocks :: Parser [Char] ParserState [Block] -parseBlocks = manyTill block eof +parseBlocks :: Parser [Char] ParserState Blocks +parseBlocks = mconcat <$> manyTill block eof -- | Block parsers list tried in definition order -blockParsers :: [Parser [Char] ParserState Block] +blockParsers :: [Parser [Char] ParserState Blocks] blockParsers = [ codeBlock , header , blockQuote @@ -130,29 +130,32 @@ blockParsers = [ codeBlock , rawLaTeXBlock' , maybeExplicitBlock "table" table , maybeExplicitBlock "p" para + , endBlock ] +endBlock :: Parser [Char] ParserState Blocks +endBlock = string "\n\n" >> return mempty -- | Any block in the order of definition of blockParsers -block :: Parser [Char] ParserState Block +block :: Parser [Char] ParserState Blocks block = choice blockParsers "block" -commentBlock :: Parser [Char] ParserState Block +commentBlock :: Parser [Char] ParserState Blocks commentBlock = try $ do string "###." manyTill anyLine blanklines - return Null + return mempty -codeBlock :: Parser [Char] ParserState Block +codeBlock :: Parser [Char] ParserState Blocks codeBlock = codeBlockBc <|> codeBlockPre -codeBlockBc :: Parser [Char] ParserState Block +codeBlockBc :: Parser [Char] ParserState Blocks codeBlockBc = try $ do string "bc. " contents <- manyTill anyLine blanklines - return $ CodeBlock ("",[],[]) $ unlines contents + return $ B.codeBlock (unlines contents) -- | Code Blocks in Textile are between
 and 
-codeBlockPre :: Parser [Char] ParserState Block +codeBlockPre :: Parser [Char] ParserState Blocks codeBlockPre = try $ do (t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True)) result' <- (innerText . parseTags) `fmap` -- remove internal tags @@ -169,29 +172,29 @@ codeBlockPre = try $ do let classes = words $ fromAttrib "class" t let ident = fromAttrib "id" t let kvs = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] - return $ CodeBlock (ident,classes,kvs) result''' + return $ B.codeBlockWith (ident,classes,kvs) result''' -- | Header of the form "hN. content" with N in 1..6 -header :: Parser [Char] ParserState Block +header :: Parser [Char] ParserState Blocks header = try $ do char 'h' level <- digitToInt <$> oneOf "123456" attr <- attributes char '.' - whitespace - name <- normalizeSpaces <$> manyTill inline blockBreak - attr' <- registerHeader attr (B.fromList name) - return $ Header level attr' name + lookAhead whitespace + name <- trimInlines . mconcat <$> manyTill inline blockBreak + attr' <- registerHeader attr name + return $ B.headerWith attr' level name -- | Blockquote of the form "bq. content" -blockQuote :: Parser [Char] ParserState Block +blockQuote :: Parser [Char] ParserState Blocks blockQuote = try $ do string "bq" >> attributes >> char '.' >> whitespace - BlockQuote . singleton <$> para + B.blockQuote <$> para -- Horizontal rule -hrule :: Parser [Char] st Block +hrule :: Parser [Char] st Blocks hrule = try $ do skipSpaces start <- oneOf "-*" @@ -199,62 +202,62 @@ hrule = try $ do skipMany (spaceChar <|> char start) newline optional blanklines - return HorizontalRule + return B.horizontalRule -- Lists handling -- | Can be a bullet list or an ordered list. This implementation is -- strict in the nesting, sublist must start at exactly "parent depth -- plus one" -anyList :: Parser [Char] ParserState Block +anyList :: Parser [Char] ParserState Blocks anyList = try $ anyListAtDepth 1 <* blanklines -- | This allow one type of list to be nested into an other type, -- provided correct nesting -anyListAtDepth :: Int -> Parser [Char] ParserState Block +anyListAtDepth :: Int -> Parser [Char] ParserState Blocks anyListAtDepth depth = choice [ bulletListAtDepth depth, orderedListAtDepth depth, definitionList ] -- | Bullet List of given depth, depth being the number of leading '*' -bulletListAtDepth :: Int -> Parser [Char] ParserState Block -bulletListAtDepth depth = try $ BulletList <$> many1 (bulletListItemAtDepth depth) +bulletListAtDepth :: Int -> Parser [Char] ParserState Blocks +bulletListAtDepth depth = try $ B.bulletList <$> many1 (bulletListItemAtDepth depth) -- | Bullet List Item of given depth, depth being the number of -- leading '*' -bulletListItemAtDepth :: Int -> Parser [Char] ParserState [Block] +bulletListItemAtDepth :: Int -> Parser [Char] ParserState Blocks bulletListItemAtDepth = genericListItemAtDepth '*' -- | Ordered List of given depth, depth being the number of -- leading '#' -orderedListAtDepth :: Int -> Parser [Char] ParserState Block +orderedListAtDepth :: Int -> Parser [Char] ParserState Blocks orderedListAtDepth depth = try $ do items <- many1 (orderedListItemAtDepth depth) - return (OrderedList (1, DefaultStyle, DefaultDelim) items) + return $ B.orderedList items -- | Ordered List Item of given depth, depth being the number of -- leading '#' -orderedListItemAtDepth :: Int -> Parser [Char] ParserState [Block] +orderedListItemAtDepth :: Int -> Parser [Char] ParserState Blocks orderedListItemAtDepth = genericListItemAtDepth '#' -- | Common implementation of list items -genericListItemAtDepth :: Char -> Int -> Parser [Char] ParserState [Block] +genericListItemAtDepth :: Char -> Int -> Parser [Char] ParserState Blocks genericListItemAtDepth c depth = try $ do count depth (char c) >> attributes >> whitespace - p <- many listInline + p <- mconcat <$> many listInline newline - sublist <- option [] (singleton <$> anyListAtDepth (depth + 1)) - return (Plain p : sublist) + sublist <- option mempty (anyListAtDepth (depth + 1)) + return $ (B.plain p) <> sublist -- | A definition list is a set of consecutive definition items -definitionList :: Parser [Char] ParserState Block -definitionList = try $ DefinitionList <$> many1 definitionListItem +definitionList :: Parser [Char] ParserState Blocks +definitionList = try $ B.definitionList <$> many1 definitionListItem -- | List start character. listStart :: Parser [Char] st Char listStart = oneOf "*#-" -listInline :: Parser [Char] ParserState Inline +listInline :: Parser [Char] ParserState Inlines listInline = try (notFollowedBy newline >> inline) <|> try (endline <* notFollowedBy listStart) @@ -262,16 +265,16 @@ listInline = try (notFollowedBy newline >> inline) -- the term defined, then spaces and ":=". The definition follows, on -- the same single line, or spaned on multiple line, after a line -- break. -definitionListItem :: Parser [Char] ParserState ([Inline], [[Block]]) +definitionListItem :: Parser [Char] ParserState (Inlines, [Blocks]) definitionListItem = try $ do string "- " - term <- many1Till inline (try (whitespace >> string ":=")) + term <- mconcat <$> many1Till inline (try (whitespace >> string ":=")) def' <- multilineDef <|> inlineDef return (term, def') - where inlineDef :: Parser [Char] ParserState [[Block]] - inlineDef = liftM (\d -> [[Plain d]]) - $ optional whitespace >> many listInline <* newline - multilineDef :: Parser [Char] ParserState [[Block]] + where inlineDef :: Parser [Char] ParserState [Blocks] + inlineDef = liftM (\d -> [B.plain d]) + $ optional whitespace >> (trimInlines . mconcat <$> many listInline) <* newline + multilineDef :: Parser [Char] ParserState [Blocks] multilineDef = try $ do optional whitespace >> newline s <- many1Till anyChar (try (string "=:" >> newline)) @@ -288,59 +291,59 @@ blockBreak = try (newline >> blanklines >> return ()) <|> -- raw content -- | A raw Html Block, optionally followed by blanklines -rawHtmlBlock :: Parser [Char] ParserState Block +rawHtmlBlock :: Parser [Char] ParserState Blocks rawHtmlBlock = try $ do (_,b) <- htmlTag isBlockTag optional blanklines - return $ RawBlock (Format "html") b + return $ B.rawBlock "html" b -- | Raw block of LaTeX content -rawLaTeXBlock' :: Parser [Char] ParserState Block +rawLaTeXBlock' :: Parser [Char] ParserState Blocks rawLaTeXBlock' = do guardEnabled Ext_raw_tex - RawBlock (Format "latex") <$> (rawLaTeXBlock <* spaces) + B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces) -- | In textile, paragraphs are separated by blank lines. -para :: Parser [Char] ParserState Block -para = try $ Para . normalizeSpaces <$> manyTill inline blockBreak - +para :: Parser [Char] ParserState Blocks +para = do + a <- manyTill inline blockBreak + return $ (B.para . trimInlines . mconcat) a -- Tables -- | A table cell spans until a pipe | -tableCell :: Parser [Char] ParserState TableCell +tableCell :: Parser [Char] ParserState Blocks tableCell = do c <- many1 (noneOf "|\n") - content <- parseFromString (many1 inline) c - return $ [ Plain $ normalizeSpaces content ] + content <- trimInlines . mconcat <$> parseFromString (many1 inline) c + return $ B.plain content -- | A table row is made of many table cells -tableRow :: Parser [Char] ParserState [TableCell] +tableRow :: Parser [Char] ParserState [Blocks] tableRow = try $ ( char '|' *> (endBy1 tableCell (optional blankline *> char '|')) <* newline) -- | Many table rows -tableRows :: Parser [Char] ParserState [[TableCell]] +tableRows :: Parser [Char] ParserState [[Blocks]] tableRows = many1 tableRow -- | Table headers are made of cells separated by a tag "|_." -tableHeaders :: Parser [Char] ParserState [TableCell] +tableHeaders :: Parser [Char] ParserState [Blocks] tableHeaders = let separator = (try $ string "|_.") in try $ ( separator *> (sepBy1 tableCell separator) <* char '|' <* newline ) -- | A table with an optional header. Current implementation can -- handle tables with and without header, but will parse cells -- alignment attributes as content. -table :: Parser [Char] ParserState Block +table :: Parser [Char] ParserState Blocks table = try $ do - headers <- option [] tableHeaders + headers <- option mempty tableHeaders rows <- tableRows blanklines let nbOfCols = max (length headers) (length $ head rows) - return $ Table [] - (replicate nbOfCols AlignDefault) - (replicate nbOfCols 0.0) + return $ B.table mempty + (zip (replicate nbOfCols AlignDefault) (replicate nbOfCols 0.0)) headers rows @@ -348,8 +351,8 @@ table = try $ do -- | Blocks like 'p' and 'table' do not need explicit block tag. -- However, they can be used to set HTML/CSS attributes when needed. maybeExplicitBlock :: String -- ^ block tag name - -> Parser [Char] ParserState Block -- ^ implicit block - -> Parser [Char] ParserState Block + -> Parser [Char] ParserState Blocks -- ^ implicit block + -> Parser [Char] ParserState Blocks maybeExplicitBlock name blk = try $ do optional $ try $ string name >> attributes >> char '.' >> optional whitespace >> optional endline @@ -363,12 +366,14 @@ maybeExplicitBlock name blk = try $ do -- | Any inline element -inline :: Parser [Char] ParserState Inline -inline = choice inlineParsers "inline" +inline :: Parser [Char] ParserState Inlines +inline = do + choice inlineParsers "inline" -- | Inline parsers tried in order -inlineParsers :: [Parser [Char] ParserState Inline] -inlineParsers = [ str +inlineParsers :: [Parser [Char] ParserState Inlines] +inlineParsers = [ inlineMarkup + , str , whitespace , endline , code @@ -378,58 +383,57 @@ inlineParsers = [ str , rawLaTeXInline' , note , try $ (char '[' *> inlineMarkup <* char ']') - , inlineMarkup , link , image , mark - , (Str . (:[])) <$> characterReference + , (B.str . (:[])) <$> characterReference , smartPunctuation inline , symbol ] -- | Inline markups -inlineMarkup :: Parser [Char] ParserState Inline -inlineMarkup = choice [ simpleInline (string "??") (Cite []) - , simpleInline (string "**") Strong - , simpleInline (string "__") Emph - , simpleInline (char '*') Strong - , simpleInline (char '_') Emph - , simpleInline (char '+') Emph -- approximates underline - , simpleInline (char '-' <* notFollowedBy (char '-')) Strikeout - , simpleInline (char '^') Superscript - , simpleInline (char '~') Subscript +inlineMarkup :: Parser [Char] ParserState Inlines +inlineMarkup = choice [ simpleInline (string "??") (B.cite []) + , simpleInline (string "**") B.strong + , simpleInline (string "__") B.emph + , simpleInline (char '*') B.strong + , simpleInline (char '_') B.emph + , simpleInline (char '+') B.emph -- approximates underline + , simpleInline (char '-' <* notFollowedBy (char '-')) B.strikeout + , simpleInline (char '^') B.superscript + , simpleInline (char '~') B.subscript ] -- | Trademark, registered, copyright -mark :: Parser [Char] st Inline +mark :: Parser [Char] st Inlines mark = try $ char '(' >> (try tm <|> try reg <|> copy) -reg :: Parser [Char] st Inline +reg :: Parser [Char] st Inlines reg = do oneOf "Rr" char ')' - return $ Str "\174" + return $ B.str "\174" -tm :: Parser [Char] st Inline +tm :: Parser [Char] st Inlines tm = do oneOf "Tt" oneOf "Mm" char ')' - return $ Str "\8482" + return $ B.str "\8482" -copy :: Parser [Char] st Inline +copy :: Parser [Char] st Inlines copy = do oneOf "Cc" char ')' - return $ Str "\169" + return $ B.str "\169" -note :: Parser [Char] ParserState Inline +note :: Parser [Char] ParserState Inlines note = try $ do ref <- (char '[' *> many1 digit <* char ']') notes <- stateNotes <$> getState case lookup ref notes of Nothing -> fail "note not found" - Just raw -> liftM Note $ parseFromString parseBlocks raw + Just raw -> B.note <$> parseFromString parseBlocks raw -- | Special chars markupChars :: [Char] @@ -450,7 +454,7 @@ wordBoundaries = markupChars ++ stringBreakers hyphenedWords :: Parser [Char] ParserState String hyphenedWords = do x <- wordChunk - xs <- many (try $ char '-' >> wordChunk) + xs <- many (try $ char '-' >> wordChunk) return $ intercalate "-" (x:xs) wordChunk :: Parser [Char] ParserState String @@ -462,7 +466,7 @@ wordChunk = try $ do return $ hd:tl -- | Any string -str :: Parser [Char] ParserState Inline +str :: Parser [Char] ParserState Inlines str = do baseStr <- hyphenedWords -- RedCloth compliance : if parsed word is uppercase and immediatly @@ -472,89 +476,89 @@ str = do acro <- enclosed (char '(') (char ')') anyChar return $ concat [baseStr, " (", acro, ")"] updateLastStrPos - return $ Str fullStr + return $ B.str fullStr -- | Textile allows HTML span infos, we discard them -htmlSpan :: Parser [Char] ParserState Inline -htmlSpan = try $ Str <$> ( char '%' *> attributes *> manyTill anyChar (char '%') ) +htmlSpan :: Parser [Char] ParserState Inlines +htmlSpan = try $ B.str <$> ( char '%' *> attributes *> manyTill anyChar (char '%') ) -- | Some number of space chars -whitespace :: Parser [Char] ParserState Inline -whitespace = many1 spaceChar >> return Space "whitespace" +whitespace :: Parser [Char] ParserState Inlines +whitespace = many1 spaceChar >> return B.space "whitespace" -- | In Textile, an isolated endline character is a line break -endline :: Parser [Char] ParserState Inline +endline :: Parser [Char] ParserState Inlines endline = try $ do newline >> notFollowedBy blankline - return LineBreak + return B.linebreak -rawHtmlInline :: Parser [Char] ParserState Inline -rawHtmlInline = RawInline (Format "html") . snd <$> htmlTag isInlineTag +rawHtmlInline :: Parser [Char] ParserState Inlines +rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag -- | Raw LaTeX Inline -rawLaTeXInline' :: Parser [Char] ParserState Inline +rawLaTeXInline' :: Parser [Char] ParserState Inlines rawLaTeXInline' = try $ do guardEnabled Ext_raw_tex - rawLaTeXInline + B.singleton <$> rawLaTeXInline -- | Textile standard link syntax is "label":target. But we -- can also have ["label":target]. -link :: Parser [Char] ParserState Inline +link :: Parser [Char] ParserState Inlines link = linkB <|> linkNoB -linkNoB :: Parser [Char] ParserState Inline +linkNoB :: Parser [Char] ParserState Inlines linkNoB = try $ do - name <- surrounded (char '"') inline + name <- mconcat <$> surrounded (char '"') (withQuoteContext InDoubleQuote inline) char ':' let stopChars = "!.,;:" url <- manyTill nonspaceChar (lookAhead $ space <|> try (oneOf stopChars >> (space <|> newline))) - let name' = if name == [Str "$"] then [Str url] else name - return $ Link name' (url, "") + let name' = if B.toList name == [Str "$"] then B.str url else name + return $ B.link url "" name' -linkB :: Parser [Char] ParserState Inline +linkB :: Parser [Char] ParserState Inlines linkB = try $ do char '[' - name <- surrounded (char '"') inline + name <- mconcat <$> surrounded (char '"') inline char ':' url <- manyTill nonspaceChar (char ']') - let name' = if name == [Str "$"] then [Str url] else name - return $ Link name' (url, "") + let name' = if B.toList name == [Str "$"] then B.str url else name + return $ B.link url "" name' -- | image embedding -image :: Parser [Char] ParserState Inline +image :: Parser [Char] ParserState Inlines image = try $ do char '!' >> notFollowedBy space src <- manyTill anyChar (lookAhead $ oneOf "!(") alt <- option "" (try $ (char '(' >> manyTill anyChar (char ')'))) char '!' - return $ Image [Str alt] (src, alt) + return $ B.image src alt (B.str alt) -escapedInline :: Parser [Char] ParserState Inline +escapedInline :: Parser [Char] ParserState Inlines escapedInline = escapedEqs <|> escapedTag -escapedEqs :: Parser [Char] ParserState Inline -escapedEqs = Str <$> (try $ string "==" *> manyTill anyChar (try $ string "==")) +escapedEqs :: Parser [Char] ParserState Inlines +escapedEqs = B.str <$> (try $ string "==" *> manyTill anyChar (try $ string "==")) -- | literal text escaped btw tags -escapedTag :: Parser [Char] ParserState Inline -escapedTag = Str <$> +escapedTag :: Parser [Char] ParserState Inlines +escapedTag = B.str <$> (try $ string "" *> manyTill anyChar (try $ string "")) -- | Any special symbol defined in wordBoundaries -symbol :: Parser [Char] ParserState Inline -symbol = Str . singleton <$> (oneOf wordBoundaries <|> oneOf markupChars) +symbol :: Parser [Char] ParserState Inlines +symbol = B.str . singleton <$> (oneOf wordBoundaries <|> oneOf markupChars) -- | Inline code -code :: Parser [Char] ParserState Inline +code :: Parser [Char] ParserState Inlines code = code1 <|> code2 -code1 :: Parser [Char] ParserState Inline -code1 = Code nullAttr <$> surrounded (char '@') anyChar +code1 :: Parser [Char] ParserState Inlines +code1 = B.code <$> surrounded (char '@') anyChar -code2 :: Parser [Char] ParserState Inline +code2 :: Parser [Char] ParserState Inlines code2 = do htmlTag (tagOpen (=="tt") null) - Code nullAttr <$> manyTill anyChar (try $ htmlTag $ tagClose (=="tt")) + B.code <$> manyTill anyChar (try $ htmlTag $ tagClose (=="tt")) -- | Html / CSS attributes attributes :: Parser [Char] ParserState Attr @@ -581,7 +585,7 @@ styleAttr = do langAttr :: Parser [Char] ParserState (Attr -> Attr) langAttr = do - lang <- try $ enclosed (char '[') (char ']') anyChar + lang <- try $ enclosed (char '[') (char ']') alphaNum return $ \(id',classes,keyvals) -> (id',classes,("lang",lang):keyvals) -- | Parses material surrounded by a parser. @@ -590,14 +594,43 @@ surrounded :: Parser [Char] st t -- ^ surrounding parser -> Parser [Char] st [a] surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border) --- | Inlines are most of the time of the same form + simpleInline :: Parser [Char] ParserState t -- ^ surrounding parser - -> ([Inline] -> Inline) -- ^ Inline constructor - -> Parser [Char] ParserState Inline -- ^ content parser (to be used repeatedly) -simpleInline border construct = surrounded border inlineWithAttribute >>= - return . construct . normalizeSpaces - where inlineWithAttribute = (try $ optional attributes) >> inline + -> (Inlines -> Inlines) -- ^ Inline constructor + -> Parser [Char] ParserState Inlines -- ^ content parser (to be used repeatedly) +simpleInline border construct = groupedSimpleInline border construct <|> ungroupedSimpleInline border construct + +ungroupedSimpleInline :: Parser [Char] ParserState t -- ^ surrounding parser + -> (Inlines -> Inlines) -- ^ Inline constructor + -> Parser [Char] ParserState Inlines -- ^ content parser (to be used repeatedly) +ungroupedSimpleInline border construct = try $ do + st <- getState + pos <- getPosition + isWhitespace <- option False (whitespace >> return True) + guard $ (stateQuoteContext st /= NoQuote) + || (sourceColumn pos == 1) + || isWhitespace + body <- surrounded border inlineWithAttribute + lookAhead (notFollowedBy alphaNum) + let result = construct $ mconcat body + return $ if isWhitespace then B.space <> result + else result + where + inlineWithAttribute = (try $ optional attributes) >> notFollowedBy (string "\n\n") + >> (withQuoteContext InSingleQuote inline) + + +groupedSimpleInline :: Parser [Char] ParserState t + -> (Inlines -> Inlines) + -> Parser [Char] ParserState Inlines +groupedSimpleInline border construct = try $ do + char '[' + withQuoteContext InSingleQuote (simpleInline border construct) >>~ char ']' + + + -- | Create a singleton list singleton :: a -> [a] singleton x = [x] + -- cgit v1.2.3 From 9b5d474e79c0b508ac0da9943b9bb385671aad85 Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Thu, 27 Mar 2014 19:53:32 +0000 Subject: Converted HTML reader to use builder. Fixes #1162. --- src/Text/Pandoc/Readers/HTML.hs | 235 +++++++++++++++++++++------------------- 1 file changed, 126 insertions(+), 109 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index d1e4d0024..4fab251bb 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -40,6 +40,7 @@ import Text.HTML.TagSoup import Text.HTML.TagSoup.Match import Text.Pandoc.Definition import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Builder (Blocks, Inlines, trimInlines) import Text.Pandoc.Shared import Text.Pandoc.Options import Text.Pandoc.Parsing @@ -48,6 +49,8 @@ import Data.List ( intercalate ) import Data.Char ( isDigit ) import Control.Monad ( liftM, guard, when, mzero ) import Control.Applicative ( (<$>), (<$), (<*) ) +import Data.Monoid +import Data.Sequence (ViewL(..), ViewR(..), viewr, viewl) isSpace :: Char -> Bool isSpace ' ' = True @@ -66,30 +69,30 @@ readHtml opts inp = where tags = canonicalizeTags $ parseTagsOptions parseOptions{ optTagPosition = True } inp parseDoc = do - blocks <- (fixPlains False . concat) <$> manyTill block eof + blocks <- (fixPlains False) . mconcat <$> manyTill block eof meta <- stateMeta <$> getState - return $ Pandoc meta blocks + return $ Pandoc meta (B.toList blocks) type TagParser = Parser [Tag String] ParserState -pBody :: TagParser [Block] +pBody :: TagParser Blocks pBody = pInTags "body" block -pHead :: TagParser [Block] -pHead = pInTags "head" $ pTitle <|> pMetaTag <|> ([] <$ pAnyTag) - where pTitle = pInTags "title" inline >>= setTitle . normalizeSpaces - setTitle t = [] <$ (updateState $ B.setMeta "title" (B.fromList t)) +pHead :: TagParser Blocks +pHead = pInTags "head" $ pTitle <|> pMetaTag <|> (mempty <$ pAnyTag) + where pTitle = pInTags "title" inline >>= setTitle . trimInlines + setTitle t = mempty <$ (updateState $ B.setMeta "title" t) pMetaTag = do mt <- pSatisfy (~== TagOpen "meta" []) let name = fromAttrib "name" mt if null name - then return [] + then return mempty else do let content = fromAttrib "content" mt updateState $ B.setMeta name (B.text content) - return [] + return mempty -block :: TagParser [Block] +block :: TagParser Blocks block = choice [ pPara , pHeader @@ -105,10 +108,10 @@ block = choice , pRawHtmlBlock ] -pList :: TagParser [Block] +pList :: TagParser Blocks pList = pBulletList <|> pOrderedList <|> pDefinitionList -pBulletList :: TagParser [Block] +pBulletList :: TagParser Blocks pBulletList = try $ do pSatisfy (~== TagOpen "ul" []) let nonItem = pSatisfy (\t -> @@ -118,9 +121,9 @@ pBulletList = try $ do -- treat it as a list item, though it's not valid xhtml... skipMany nonItem items <- manyTill (pInTags "li" block >>~ skipMany nonItem) (pCloses "ul") - return [BulletList $ map (fixPlains True) items] + return $ B.bulletList $ map (fixPlains True) items -pOrderedList :: TagParser [Block] +pOrderedList :: TagParser Blocks pOrderedList = try $ do TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" []) let (start, style) = (sta', sty') @@ -146,27 +149,27 @@ pOrderedList = try $ do -- treat it as a list item, though it's not valid xhtml... skipMany nonItem items <- manyTill (pInTags "li" block >>~ skipMany nonItem) (pCloses "ol") - return [OrderedList (start, style, DefaultDelim) $ map (fixPlains True) items] + return $ B.orderedListWith (start, style, DefaultDelim) $ map (fixPlains True) items -pDefinitionList :: TagParser [Block] +pDefinitionList :: TagParser Blocks pDefinitionList = try $ do pSatisfy (~== TagOpen "dl" []) items <- manyTill pDefListItem (pCloses "dl") - return [DefinitionList items] + return $ B.definitionList items -pDefListItem :: TagParser ([Inline],[[Block]]) +pDefListItem :: TagParser (Inlines, [Blocks]) pDefListItem = try $ do let nonItem = pSatisfy (\t -> not (t ~== TagOpen "dt" []) && not (t ~== TagOpen "dd" []) && not (t ~== TagClose "dl")) terms <- many1 (try $ skipMany nonItem >> pInTags "dt" inline) defs <- many1 (try $ skipMany nonItem >> pInTags "dd" block) skipMany nonItem - let term = intercalate [LineBreak] terms + let term = foldl1 (\x y -> x <> B.linebreak <> y) terms return (term, map (fixPlains True) defs) -fixPlains :: Bool -> [Block] -> [Block] -fixPlains inList bs = if any isParaish bs - then map plainToPara bs +fixPlains :: Bool -> Blocks -> Blocks +fixPlains inList bs = if any isParaish bs' + then B.fromList $ map plainToPara bs' else bs where isParaish (Para _) = True isParaish (CodeBlock _ _) = True @@ -178,6 +181,7 @@ fixPlains inList bs = if any isParaish bs isParaish _ = False plainToPara (Plain xs) = Para xs plainToPara x = x + bs' = B.toList bs pRawTag :: TagParser String pRawTag = do @@ -187,20 +191,20 @@ pRawTag = do then return [] else return $ renderTags' [tag] -pDiv :: TagParser [Block] +pDiv :: TagParser Blocks pDiv = try $ do getOption readerParseRaw >>= guard TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="div") (const True) contents <- pInTags "div" block - return [Div (mkAttr attr) contents] + return $ B.divWith (mkAttr attr) contents -pRawHtmlBlock :: TagParser [Block] +pRawHtmlBlock :: TagParser Blocks pRawHtmlBlock = do raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag parseRaw <- getOption readerParseRaw if parseRaw && not (null raw) - then return [RawBlock (Format "html") raw] - else return [] + then return $ B.rawBlock "html" raw + else return mempty pHtmlBlock :: String -> TagParser String pHtmlBlock t = try $ do @@ -208,35 +212,34 @@ pHtmlBlock t = try $ do contents <- manyTill pAnyTag (pSatisfy (~== TagClose t)) return $ renderTags' $ [open] ++ contents ++ [TagClose t] -pHeader :: TagParser [Block] +pHeader :: TagParser Blocks pHeader = try $ do TagOpen tagtype attr <- pSatisfy $ tagOpen (`elem` ["h1","h2","h3","h4","h5","h6"]) (const True) let bodyTitle = TagOpen tagtype attr ~== TagOpen "h1" [("class","title")] let level = read (drop 1 tagtype) - contents <- liftM concat $ manyTill inline (pCloses tagtype <|> eof) + contents <- trimInlines . mconcat <$> manyTill inline (pCloses tagtype <|> eof) let ident = fromMaybe "" $ lookup "id" attr let classes = maybe [] words $ lookup "class" attr let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"] return $ if bodyTitle - then [] -- skip a representation of the title in the body - else [Header level (ident, classes, keyvals) $ - normalizeSpaces contents] + then mempty -- skip a representation of the title in the body + else B.headerWith (ident, classes, keyvals) level contents -pHrule :: TagParser [Block] +pHrule :: TagParser Blocks pHrule = do pSelfClosing (=="hr") (const True) - return [HorizontalRule] + return B.horizontalRule -pTable :: TagParser [Block] +pTable :: TagParser Blocks pTable = try $ do TagOpen _ _ <- pSatisfy (~== TagOpen "table" []) skipMany pBlank - caption <- option [] $ pInTags "caption" inline >>~ skipMany pBlank + caption <- option mempty $ pInTags "caption" inline >>~ skipMany pBlank -- TODO actually read these and take width information from them widths' <- pColgroup <|> many pCol - head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th") + head' <- option mempty $ pOptInTag "thead" $ pInTags "tr" (pCell "th") skipMany pBlank rows <- pOptInTag "tbody" $ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td") @@ -245,19 +248,21 @@ pTable = try $ do let isSinglePlain [] = True isSinglePlain [Plain _] = True isSinglePlain _ = False - let isSimple = all isSinglePlain $ concat (head':rows) - let cols = length $ if null head' - then head rows - else head' + let lHead = B.toList head' + let lRows = map B.toList rows + let isSimple = all isSinglePlain (lHead:lRows) + let cols = length $ if null lHead + then head lRows + else lHead -- fail if there are colspans or rowspans - guard $ all (\r -> length r == cols) rows + guard $ all (\r -> length r == cols) lRows let aligns = replicate cols AlignLeft let widths = if null widths' then if isSimple then replicate cols 0 else replicate cols (1.0 / fromIntegral cols) else widths' - return [Table caption aligns widths head' rows] + return $ B.table caption (zip aligns widths) [head'] [rows] pCol :: TagParser Double pCol = try $ do @@ -275,31 +280,31 @@ pColgroup = try $ do skipMany pBlank manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank -pCell :: String -> TagParser [TableCell] +pCell :: String -> TagParser Blocks pCell celltype = try $ do skipMany pBlank res <- pInTags celltype block skipMany pBlank - return [res] + return res -pBlockQuote :: TagParser [Block] +pBlockQuote :: TagParser Blocks pBlockQuote = do contents <- pInTags "blockquote" block - return [BlockQuote $ fixPlains False contents] + return $ B.blockQuote $ fixPlains False contents -pPlain :: TagParser [Block] +pPlain :: TagParser Blocks pPlain = do - contents <- liftM (normalizeSpaces . concat) $ many1 inline - if null contents - then return [] - else return [Plain contents] + contents <- trimInlines . mconcat <$> many1 inline + if B.isNull contents + then return mempty + else return $ B.plain contents -pPara :: TagParser [Block] +pPara :: TagParser Blocks pPara = do - contents <- pInTags "p" inline - return [Para $ normalizeSpaces contents] + contents <- trimInlines <$> pInTags "p" inline + return $ B.para contents -pCodeBlock :: TagParser [Block] +pCodeBlock :: TagParser Blocks pCodeBlock = try $ do TagOpen _ attr <- pSatisfy (~== TagOpen "pre" []) contents <- manyTill pAnyTag (pCloses "pre" <|> eof) @@ -312,9 +317,9 @@ pCodeBlock = try $ do let result = case reverse result' of '\n':_ -> init result' _ -> result' - return [CodeBlock (mkAttr attr) result] + return $ B.codeBlockWith (mkAttr attr) result -inline :: TagParser [Inline] +inline :: TagParser Inlines inline = choice [ pTagText , pQ @@ -354,7 +359,7 @@ pSelfClosing f g = do optional $ pSatisfy (tagClose f) return open -pQ :: TagParser [Inline] +pQ :: TagParser Inlines pQ = do quoteContext <- stateQuoteContext `fmap` getState let quoteType = case quoteContext of @@ -363,82 +368,93 @@ pQ = do let innerQuoteContext = if quoteType == SingleQuote then InSingleQuote else InDoubleQuote - withQuoteContext innerQuoteContext $ pInlinesInTags "q" (Quoted quoteType) + let constructor = case quoteType of + SingleQuote -> B.singleQuoted + DoubleQuote -> B.doubleQuoted + withQuoteContext innerQuoteContext $ + pInlinesInTags "q" constructor -pEmph :: TagParser [Inline] -pEmph = pInlinesInTags "em" Emph <|> pInlinesInTags "i" Emph +pEmph :: TagParser Inlines +pEmph = pInlinesInTags "em" B.emph <|> pInlinesInTags "i" B.emph -pStrong :: TagParser [Inline] -pStrong = pInlinesInTags "strong" Strong <|> pInlinesInTags "b" Strong +pStrong :: TagParser Inlines +pStrong = pInlinesInTags "strong" B.strong <|> pInlinesInTags "b" B.strong -pSuperscript :: TagParser [Inline] -pSuperscript = pInlinesInTags "sup" Superscript +pSuperscript :: TagParser Inlines +pSuperscript = pInlinesInTags "sup" B.superscript -pSubscript :: TagParser [Inline] -pSubscript = pInlinesInTags "sub" Subscript +pSubscript :: TagParser Inlines +pSubscript = pInlinesInTags "sub" B.subscript -pStrikeout :: TagParser [Inline] +pStrikeout :: TagParser Inlines pStrikeout = do - pInlinesInTags "s" Strikeout <|> - pInlinesInTags "strike" Strikeout <|> - pInlinesInTags "del" Strikeout <|> + pInlinesInTags "s" B.strikeout <|> + pInlinesInTags "strike" B.strikeout <|> + pInlinesInTags "del" B.strikeout <|> try (do pSatisfy (~== TagOpen "span" [("class","strikeout")]) - contents <- liftM concat $ manyTill inline (pCloses "span") - return [Strikeout contents]) + contents <- mconcat <$> manyTill inline (pCloses "span") + return $ B.strikeout contents) -pLineBreak :: TagParser [Inline] +pLineBreak :: TagParser Inlines pLineBreak = do pSelfClosing (=="br") (const True) - return [LineBreak] + return B.linebreak -pLink :: TagParser [Inline] +pLink :: TagParser Inlines pLink = try $ do tag <- pSatisfy (tagOpenLit "a" (isJust . lookup "href")) let url = fromAttrib "href" tag let title = fromAttrib "title" tag - lab <- liftM concat $ manyTill inline (pCloses "a") - return [Link (normalizeSpaces lab) (escapeURI url, title)] + lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a") + return $ B.link (escapeURI url) title lab -pImage :: TagParser [Inline] +pImage :: TagParser Inlines pImage = do tag <- pSelfClosing (=="img") (isJust . lookup "src") let url = fromAttrib "src" tag let title = fromAttrib "title" tag let alt = fromAttrib "alt" tag - return [Image (B.toList $ B.text alt) (escapeURI url, title)] + return $ B.image (escapeURI url) title (B.text alt) -pCode :: TagParser [Inline] +pCode :: TagParser Inlines pCode = try $ do (TagOpen open attr) <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True) result <- manyTill pAnyTag (pCloses open) - return [Code (mkAttr attr) $ intercalate " " $ lines $ innerText result] + return $ B.codeWith (mkAttr attr) $ intercalate " " $ lines $ innerText result -pSpan :: TagParser [Inline] +pSpan :: TagParser Inlines pSpan = try $ do getOption readerParseRaw >>= guard TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True) contents <- pInTags "span" inline - return [Span (mkAttr attr) contents] + return $ B.spanWith (mkAttr attr) contents -pRawHtmlInline :: TagParser [Inline] +pRawHtmlInline :: TagParser Inlines pRawHtmlInline = do result <- pSatisfy (tagComment (const True)) <|> pSatisfy isInlineTag parseRaw <- getOption readerParseRaw if parseRaw - then return [RawInline (Format "html") $ renderTags' [result]] - else return [] + then return $ B.rawInline "html" $ renderTags' [result] + else return mempty -pInlinesInTags :: String -> ([Inline] -> Inline) - -> TagParser [Inline] +pInlinesInTags :: String -> (Inlines -> Inlines) + -> TagParser Inlines pInlinesInTags tagtype f = do - contents <- pInTags tagtype inline - return [f $ normalizeSpaces contents] - -pInTags :: String -> TagParser [a] - -> TagParser [a] + contents <- B.unMany <$> pInTags tagtype inline + let left = case viewl contents of + EmptyL -> mempty + (a :< _) -> padSpace a + let right = case viewr contents of + EmptyR -> mempty + (_ :> a) -> padSpace a + return (left <> f (trimInlines . B.Many $ contents) <> right) + where padSpace a = if a == Space then B.space else mempty + +pInTags :: (Monoid a) => String -> TagParser a + -> TagParser a pInTags tagtype parser = try $ do pSatisfy (~== TagOpen tagtype []) - liftM concat $ manyTill parser (pCloses tagtype <|> eof) + mconcat <$> manyTill parser (pCloses tagtype <|> eof) pOptInTag :: String -> TagParser a -> TagParser a @@ -461,36 +477,36 @@ pCloses tagtype = try $ do (TagClose "dl") | tagtype == "li" -> return () _ -> mzero -pTagText :: TagParser [Inline] +pTagText :: TagParser Inlines pTagText = try $ do (TagText str) <- pSatisfy isTagText st <- getState case runParser (many pTagContents) st "text" str of Left _ -> fail $ "Could not parse `" ++ str ++ "'" - Right result -> return result + Right result -> return $ mconcat result pBlank :: TagParser () pBlank = try $ do (TagText str) <- pSatisfy isTagText guard $ all isSpace str -pTagContents :: Parser [Char] ParserState Inline +pTagContents :: Parser [Char] ParserState Inlines pTagContents = - Math DisplayMath `fmap` mathDisplay - <|> Math InlineMath `fmap` mathInline + B.displayMath <$> mathDisplay + <|> B.math <$> mathInline <|> pStr <|> pSpace <|> smartPunctuation pTagContents <|> pSymbol <|> pBad -pStr :: Parser [Char] ParserState Inline +pStr :: Parser [Char] ParserState Inlines pStr = do result <- many1 $ satisfy $ \c -> not (isSpace c) && not (isSpecial c) && not (isBad c) pos <- getPosition updateState $ \s -> s{ stateLastStrPos = Just pos } - return $ Str result + return $ B.str result isSpecial :: Char -> Bool isSpecial '"' = True @@ -504,13 +520,13 @@ isSpecial '\8220' = True isSpecial '\8221' = True isSpecial _ = False -pSymbol :: Parser [Char] ParserState Inline -pSymbol = satisfy isSpecial >>= return . Str . (:[]) +pSymbol :: Parser [Char] ParserState Inlines +pSymbol = satisfy isSpecial >>= return . B.str . (:[]) isBad :: Char -> Bool isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML -pBad :: Parser [Char] ParserState Inline +pBad :: Parser [Char] ParserState Inlines pBad = do c <- satisfy isBad let c' = case c of @@ -542,10 +558,10 @@ pBad = do '\158' -> '\382' '\159' -> '\376' _ -> '?' - return $ Str [c'] + return $ B.str [c'] -pSpace :: Parser [Char] ParserState Inline -pSpace = many1 (satisfy isSpace) >> return Space +pSpace :: Parser [Char] ParserState Inlines +pSpace = many1 (satisfy isSpace) >> return B.space -- -- Constants @@ -679,3 +695,4 @@ mkAttr attr = (attribsId, attribsClasses, attribsKV) attribsClasses = words $ fromMaybe "" $ lookup "class" attr attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr + -- cgit v1.2.3 From 5a51a67abda59c177f3a6d0f6cba59d41e866287 Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Thu, 27 Mar 2014 19:56:47 +0000 Subject: Changed the smart punctuation parser to return Inlines rather than an Inline element and updated files accordingly --- src/Text/Pandoc/Parsing.hs | 43 ++++++++++++++++++------------------- src/Text/Pandoc/Readers/Markdown.hs | 2 +- src/Text/Pandoc/Readers/RST.hs | 2 +- src/Text/Pandoc/Readers/Textile.hs | 4 +--- 4 files changed, 24 insertions(+), 27 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 68d4605ee..a9009eaa2 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -997,17 +997,17 @@ registerHeader (ident,classes,kvs) header' = do failUnlessSmart :: HasReaderOptions st => Parser s st () failUnlessSmart = getOption readerSmart >>= guard -smartPunctuation :: Parser [Char] ParserState Inline - -> Parser [Char] ParserState Inline +smartPunctuation :: Parser [Char] ParserState Inlines + -> Parser [Char] ParserState Inlines smartPunctuation inlineParser = do failUnlessSmart choice [ quoted inlineParser, apostrophe, dash, ellipses ] -apostrophe :: Parser [Char] ParserState Inline -apostrophe = (char '\'' <|> char '\8217') >> return (Str "\x2019") +apostrophe :: Parser [Char] ParserState Inlines +apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\x2019") -quoted :: Parser [Char] ParserState Inline - -> Parser [Char] ParserState Inline +quoted :: Parser [Char] ParserState Inlines + -> Parser [Char] ParserState Inlines quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser withQuoteContext :: QuoteContext @@ -1022,20 +1022,19 @@ withQuoteContext context parser = do setState newState { stateQuoteContext = oldQuoteContext } return result -singleQuoted :: Parser [Char] ParserState Inline - -> Parser [Char] ParserState Inline +singleQuoted :: Parser [Char] ParserState Inlines + -> Parser [Char] ParserState Inlines singleQuoted inlineParser = try $ do singleQuoteStart withQuoteContext InSingleQuote $ many1Till inlineParser singleQuoteEnd >>= - return . Quoted SingleQuote . normalizeSpaces + return . B.singleQuoted . mconcat -doubleQuoted :: Parser [Char] ParserState Inline - -> Parser [Char] ParserState Inline +doubleQuoted :: Parser [Char] ParserState Inlines + -> Parser [Char] ParserState Inlines doubleQuoted inlineParser = try $ do doubleQuoteStart - withQuoteContext InDoubleQuote $ do - contents <- manyTill inlineParser doubleQuoteEnd - return . Quoted DoubleQuote . normalizeSpaces $ contents + withQuoteContext InDoubleQuote $ manyTill inlineParser doubleQuoteEnd >>= + return . B.doubleQuoted . mconcat failIfInQuoteContext :: QuoteContext -> Parser [tok] ParserState () failIfInQuoteContext context = do @@ -1079,17 +1078,17 @@ doubleQuoteEnd = do charOrRef "\"\8221\148" return () -ellipses :: Parser [Char] st Inline +ellipses :: Parser [Char] st Inlines ellipses = do try (charOrRef "\8230\133") <|> try (string "..." >> return '…') - return (Str "\8230") + return (B.str "\8230") -dash :: Parser [Char] ParserState Inline +dash :: Parser [Char] ParserState Inlines dash = do oldDashes <- getOption readerOldDashes if oldDashes then emDashOld <|> enDashOld - else Str `fmap` (hyphenDash <|> emDash <|> enDash) + else B.str `fmap` (hyphenDash <|> emDash <|> enDash) -- Two hyphens = en-dash, three = em-dash hyphenDash :: Parser [Char] st String @@ -1107,16 +1106,16 @@ enDash = do try (charOrRef "\8212\151") return "\8211" -enDashOld :: Parser [Char] st Inline +enDashOld :: Parser [Char] st Inlines enDashOld = do try (charOrRef "\8211\150") <|> try (char '-' >> lookAhead (satisfy isDigit) >> return '–') - return (Str "\8211") + return (B.str "\8211") -emDashOld :: Parser [Char] st Inline +emDashOld :: Parser [Char] st Inlines emDashOld = do try (charOrRef "\8212\151") <|> (try $ string "--" >> optional (char '-') >> return '-') - return (Str "\8212") + return (B.str "\8212") -- This is used to prevent exponential blowups for things like: -- a**a*a**a*a**a*a**a*a**a*a**a*a** diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index aa0252266..57e1ca560 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1873,7 +1873,7 @@ smart :: MarkdownParser (F Inlines) smart = do getOption readerSmart >>= guard doubleQuoted <|> singleQuoted <|> - choice (map (return . B.singleton <$>) [apostrophe, dash, ellipses]) + choice (map (return <$>) [apostrophe, dash, ellipses]) singleQuoted :: MarkdownParser (F Inlines) singleQuoted = try $ do diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 127eae167..a574f343a 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -1140,7 +1140,7 @@ smart :: RSTParser Inlines smart = do getOption readerSmart >>= guard doubleQuoted <|> singleQuoted <|> - choice (map (B.singleton <$>) [apostrophe, dash, ellipses]) + choice [apostrophe, dash, ellipses] singleQuoted :: RSTParser Inlines singleQuoted = try $ do diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index ede50c6de..c6f992275 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -306,9 +306,7 @@ rawLaTeXBlock' = do -- | In textile, paragraphs are separated by blank lines. para :: Parser [Char] ParserState Blocks -para = do - a <- manyTill inline blockBreak - return $ (B.para . trimInlines . mconcat) a +para = B.para . trimInlines . mconcat <$> manyTill inline blockBreak -- Tables -- cgit v1.2.3