From 7a649170bef70f33099000277f42362d4ab7ff50 Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Sun, 20 Jul 2014 23:41:42 +0100 Subject: Added generalize function which can be used to lift specialised parsers. Monad m => Parsec s st a -> Parsec T s st m a --- src/Text/Pandoc/Parsing.hs | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 28ea2bd2f..ec15adf77 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -109,6 +109,7 @@ module Text.Pandoc.Parsing ( anyLine, askF, asksF, token, + generalize, -- * Re-exports from Text.Pandoc.Parsec Stream, runParser, @@ -1264,3 +1265,6 @@ addWarning mbpos msg = updateState $ \st -> st{ stateWarnings = (msg ++ maybe "" (\pos -> " " ++ show pos) mbpos) : stateWarnings st } + +generalize :: (Monad m) => Parser s st a -> ParserT s st m a +generalize m = mkPT (\ s -> (return $ (return . runIdentity) <$> runIdentity (runParsecT m s))) -- cgit v1.2.3 From e8677bae786190b203b84b7d42c30b0284bbb95c Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Thu, 29 Jan 2015 13:16:40 +0000 Subject: Remove F monad from Org Reader. --- src/Text/Pandoc/Readers/Org.hs | 439 ++++++++++++++++++++--------------------- 1 file changed, 212 insertions(+), 227 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index f16aed48d..97a15576b 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -1,6 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE ScopedTypeVariables #-} {- Copyright (C) 2014 Albert Krewinkel @@ -45,34 +46,48 @@ import Text.Pandoc.Shared (compactify', compactify'DL) import Text.TeXMath (readTeX, writePandoc, DisplayType(..)) import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap -import Control.Applicative ( Applicative, pure +import Control.Applicative ( pure , (<$>), (<$), (<*>), (<*), (*>) ) import Control.Arrow (first) -import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when) +import Control.Monad (guard, mplus, mzero, when) import Control.Monad.Reader (Reader, runReader, ask, asks) import Data.Char (isAlphaNum, toLower) import Data.Default -import Data.List (intersperse, isPrefixOf, isSuffixOf) +import Data.List (intersperse, isPrefixOf, isSuffixOf, foldl') import qualified Data.Map as M import Data.Maybe (fromMaybe, isJust) -import Data.Monoid (Monoid, mconcat, mempty, mappend) +import Data.Monoid (mconcat, mempty, mappend) import Network.HTTP (urlEncode) -- | Parse org-mode string and return a Pandoc document. readOrg :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) -> Pandoc -readOrg opts s = readWith parseOrg def{ orgStateOptions = opts } (s ++ "\n\n") +readOrg opts s = runOrg opts s parseOrg -type OrgParser = Parser [Char] OrgParserState +runOrg :: ReaderOptions -> String -> OrgParser a -> a +runOrg opts inp p = fst res + where + imd = readWithM (retState p) def{ orgStateOptions = opts } (inp ++ "\n\n") + res = runReader imd s + s :: OrgParserState + s = snd $ runReader imd s + +retState :: OrgParser a -> OrgParser (a, OrgParserState) +retState p = do + r <- p + s <- getState + return (r, s) + +type OrgParser a = ParserT [Char] OrgParserState (Reader OrgParserState) a parseOrg :: OrgParser Pandoc parseOrg = do blocks' <- parseBlocks st <- getState - let meta = runF (orgStateMeta' st) st + let meta = orgStateMeta st let removeUnwantedBlocks = dropCommentTrees . filter (/= Null) - return $ Pandoc meta $ removeUnwantedBlocks (B.toList $ runF blocks' st) + return $ Pandoc meta $ removeUnwantedBlocks (B.toList $ blocks') -- | Drop COMMENT headers and the document tree below those headers. dropCommentTrees :: [Block] -> [Block] @@ -102,7 +117,7 @@ isHeaderLevelLowerEq n blk = -- Parser State for Org -- -type OrgNoteRecord = (String, F Blocks) +type OrgNoteRecord = (String, Blocks) type OrgNoteTable = [OrgNoteRecord] type OrgBlockAttributes = M.Map String String @@ -121,7 +136,6 @@ data OrgParserState = OrgParserState , orgStateLastStrPos :: Maybe SourcePos , orgStateLinkFormatters :: OrgLinkFormatters , orgStateMeta :: Meta - , orgStateMeta' :: F Meta , orgStateNotes' :: OrgNoteTable } @@ -153,13 +167,13 @@ defaultOrgParserState = OrgParserState , orgStateLastStrPos = Nothing , orgStateLinkFormatters = M.empty , orgStateMeta = nullMeta - , orgStateMeta' = return nullMeta , orgStateNotes' = [] } recordAnchorId :: String -> OrgParser () recordAnchorId i = updateState $ \s -> - s{ orgStateAnchorIds = i : (orgStateAnchorIds s) } + let as = orgStateAnchorIds s in + s{ orgStateAnchorIds = i : as } addBlockAttribute :: String -> String -> OrgParser () addBlockAttribute key val = updateState $ \s -> @@ -238,30 +252,6 @@ parseFromString parser str' = do -- Adaptions and specializations of parsing utilities -- -newtype F a = F { unF :: Reader OrgParserState a - } deriving (Monad, Applicative, Functor) - -runF :: F a -> OrgParserState -> a -runF = runReader . unF - -askF :: F OrgParserState -askF = F ask - -asksF :: (OrgParserState -> a) -> F a -asksF f = F $ asks f - -instance Monoid a => Monoid (F a) where - mempty = return mempty - mappend = liftM2 mappend - mconcat = fmap mconcat . sequence - -trimInlinesF :: F Inlines -> F Inlines -trimInlinesF = liftM trimInlines - -returnF :: a -> OrgParser (F a) -returnF = return . return - - -- | Like @Text.Parsec.Char.newline@, but causes additional state changes. newline :: OrgParser Char newline = @@ -280,10 +270,10 @@ blanklines = -- parsing blocks -- -parseBlocks :: OrgParser (F Blocks) +parseBlocks :: OrgParser Blocks parseBlocks = mconcat <$> manyTill block eof -block :: OrgParser (F Blocks) +block :: OrgParser Blocks block = choice [ mempty <$ blanklines , optionalAttributes $ choice [ orgBlock @@ -294,14 +284,14 @@ block = choice [ mempty <$ blanklines , drawer , specialLine , header - , return <$> hline + , hline , list , latexFragment , noteBlock , paraOrPlain ] "block" -optionalAttributes :: OrgParser (F Blocks) -> OrgParser (F Blocks) +optionalAttributes :: OrgParser Blocks -> OrgParser Blocks optionalAttributes parser = try $ resetBlockAttributes *> parseBlockAttributes *> parser @@ -321,7 +311,7 @@ parseAndAddAttribute key value = do let key' = map toLower key () <$ addBlockAttribute key' value -lookupInlinesAttr :: String -> OrgParser (Maybe (F Inlines)) +lookupInlinesAttr :: String -> OrgParser (Maybe Inlines) lookupInlinesAttr attr = try $ do val <- lookupBlockAttribute attr maybe (return Nothing) @@ -335,20 +325,20 @@ lookupInlinesAttr attr = try $ do type BlockProperties = (Int, String) -- (Indentation, Block-Type) -orgBlock :: OrgParser (F Blocks) +orgBlock :: OrgParser Blocks orgBlock = try $ do blockProp@(_, blkType) <- blockHeaderStart ($ blockProp) $ case blkType of "comment" -> withRaw' (const mempty) - "html" -> withRaw' (return . (B.rawBlock blkType)) - "latex" -> withRaw' (return . (B.rawBlock blkType)) - "ascii" -> withRaw' (return . (B.rawBlock blkType)) - "example" -> withRaw' (return . exampleCode) - "quote" -> withParsed (fmap B.blockQuote) + "html" -> withRaw' (B.rawBlock blkType) + "latex" -> withRaw' (B.rawBlock blkType) + "ascii" -> withRaw' (B.rawBlock blkType) + "example" -> withRaw' exampleCode + "quote" -> withParsed B.blockQuote "verse" -> verseBlock "src" -> codeBlock - _ -> withParsed (fmap $ divWithClass blkType) + _ -> withParsed (divWithClass blkType) blockHeaderStart :: OrgParser (Int, String) blockHeaderStart = try $ (,) <$> indent <*> blockType @@ -356,10 +346,10 @@ blockHeaderStart = try $ (,) <$> indent <*> blockType indent = length <$> many spaceChar blockType = map toLower <$> (stringAnyCase "#+begin_" *> orgArgWord) -withRaw' :: (String -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) +withRaw' :: (String -> Blocks) -> BlockProperties -> OrgParser Blocks withRaw' f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp)) -withParsed :: (F Blocks -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) +withParsed :: (Blocks -> Blocks) -> BlockProperties -> OrgParser Blocks withParsed f blockProp = (ignHeaders *> (f <$> parsedBlockContent blockProp)) ignHeaders :: OrgParser () @@ -368,11 +358,11 @@ ignHeaders = (() <$ newline) <|> (() <$ anyLine) divWithClass :: String -> Blocks -> Blocks divWithClass cls = B.divWith ("", [cls], []) -verseBlock :: BlockProperties -> OrgParser (F Blocks) +verseBlock :: BlockProperties -> OrgParser Blocks verseBlock blkProp = try $ do ignHeaders content <- rawBlockContent blkProp - fmap B.para . mconcat . intersperse (pure B.linebreak) + B.para . mconcat . intersperse B.linebreak <$> mapM (parseFromString parseInlines) (lines content) exportsCode :: [(String, String)] -> Bool @@ -389,7 +379,7 @@ followingResultsBlock = *> blankline *> (unlines <$> many1 exampleLine)) -codeBlock :: BlockProperties -> OrgParser (F Blocks) +codeBlock :: BlockProperties -> OrgParser Blocks codeBlock blkProp = do skipSpaces (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders) @@ -399,17 +389,15 @@ codeBlock blkProp = do let includeCode = exportsCode kv let includeResults = exportsResults kv let codeBlck = B.codeBlockWith ( id', classes, kv ) content - labelledBlck <- maybe (pure codeBlck) - (labelDiv codeBlck) + labelledBlck <- maybe codeBlck (labelDiv codeBlck) <$> lookupInlinesAttr "caption" - let resultBlck = pure $ maybe mempty (exampleCode) resultsContent + let resultBlck = maybe mempty exampleCode resultsContent return $ (if includeCode then labelledBlck else mempty) <> (if includeResults then resultBlck else mempty) where labelDiv blk value = - B.divWith nullAttr <$> (mappend <$> labelledBlock value - <*> pure blk) - labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], [])) + B.divWith nullAttr (labelledBlock value <> blk) + labelledBlock = B.plain . B.spanWith ("", ["label"], []) rawBlockContent :: BlockProperties -> OrgParser String rawBlockContent (indent, blockType) = try $ @@ -418,7 +406,7 @@ rawBlockContent (indent, blockType) = try $ indentedLine = try $ ("" <$ blankline) <|> (indentWith indent *> anyLine) blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType) -parsedBlockContent :: BlockProperties -> OrgParser (F Blocks) +parsedBlockContent :: BlockProperties -> OrgParser Blocks parsedBlockContent blkProps = try $ do raw <- rawBlockContent blkProps parseFromString parseBlocks (raw ++ "\n") @@ -509,9 +497,9 @@ commaEscaped (',':cs@('*':_)) = cs commaEscaped (',':cs@('#':'+':_)) = cs commaEscaped cs = cs -example :: OrgParser (F Blocks) +example :: OrgParser Blocks example = try $ do - return . return . exampleCode =<< unlines <$> many1 exampleLine + return . exampleCode =<< unlines <$> many1 exampleLine exampleCode :: String -> Blocks exampleCode = B.codeBlockWith ("", ["example"], []) @@ -520,7 +508,7 @@ exampleLine :: OrgParser String exampleLine = try $ skipSpaces *> string ": " *> anyLine -- Drawers for properties or a logbook -drawer :: OrgParser (F Blocks) +drawer :: OrgParser Blocks drawer = try $ do drawerStart manyTill drawerLine (try drawerEnd) @@ -546,14 +534,12 @@ drawerEnd = try $ -- -- Figures (Image on a line by itself, preceded by name and/or caption) -figure :: OrgParser (F Blocks) +figure :: OrgParser Blocks figure = try $ do (cap, nam) <- nameAndCaption src <- skipSpaces *> selfTarget <* skipSpaces <* P.newline guard (isImageFilename src) - return $ do - cap' <- cap - return $ B.para $ B.image src nam cap' + return $ B.para $ B.image src nam cap where nameAndCaption = do @@ -569,8 +555,8 @@ figure = try $ do -- -- Comments, Options and Metadata -specialLine :: OrgParser (F Blocks) -specialLine = fmap return . try $ metaLine <|> commentLine +specialLine :: OrgParser Blocks +specialLine = try $ metaLine <|> commentLine metaLine :: OrgParser Blocks metaLine = try $ mempty @@ -590,14 +576,14 @@ commentLineStart = try $ mappend <$> many spaceChar <*> string "# " declarationLine :: OrgParser () declarationLine = try $ do key <- metaKey - inlinesF <- metaInlines + inlines <- metaInlines updateState $ \st -> - let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta - in st { orgStateMeta' = orgStateMeta' st <> meta' } + let meta' = B.setMeta key inlines nullMeta + in st { orgStateMeta = orgStateMeta st <> meta' } return () -metaInlines :: OrgParser (F MetaValue) -metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline +metaInlines :: OrgParser MetaValue +metaInlines = (MetaInlines . B.toList) <$> inlinesTillNewline metaKey :: OrgParser String metaKey = map toLower <$> many1 (noneOf ": \n\r") @@ -638,11 +624,11 @@ parseFormat = try $ do -- -- | Headers -header :: OrgParser (F Blocks) +header :: OrgParser Blocks header = try $ do level <- headerStart title <- inlinesTillNewline - return $ B.header level <$> title + return $ B.header level title headerStart :: OrgParser Int headerStart = try $ @@ -666,7 +652,7 @@ hline = try $ do -- Tables -- -data OrgTableRow = OrgContentRow (F [Blocks]) +data OrgTableRow = OrgContentRow [Blocks] | OrgAlignRow [Alignment] | OrgHlineRow @@ -677,13 +663,13 @@ data OrgTable = OrgTable , orgTableRows :: [[Blocks]] } -table :: OrgParser (F Blocks) +table :: OrgParser Blocks table = try $ do lookAhead tableStart do rows <- tableRows - cptn <- fromMaybe (pure "") <$> lookupInlinesAttr "caption" - return $ (<$> cptn) . orgToPandocTable . normalizeTable =<< rowsToTable rows + (cptn :: Inlines) <- fromMaybe "" <$> lookupInlinesAttr "caption" + return $ ($ cptn) . orgToPandocTable . normalizeTable . rowsToTable $ rows orgToPandocTable :: OrgTable -> Inlines @@ -699,11 +685,11 @@ tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow) tableContentRow :: OrgParser OrgTableRow tableContentRow = try $ - OrgContentRow . sequence <$> (tableStart *> manyTill tableContentCell newline) + OrgContentRow <$> (tableStart *> manyTill tableContentCell newline) -tableContentCell :: OrgParser (F Blocks) +tableContentCell :: OrgParser Blocks tableContentCell = try $ - fmap B.plain . trimInlinesF . mconcat <$> many1Till inline endOfCell + B.plain . trimInlines . mconcat <$> many1Till inline endOfCell endOfCell :: OrgParser Char endOfCell = try $ char '|' <|> lookAhead newline @@ -735,8 +721,8 @@ tableHline = try $ OrgHlineRow <$ (tableStart *> char '-' *> anyLine) rowsToTable :: [OrgTableRow] - -> F OrgTable -rowsToTable = foldM (flip rowToContent) zeroTable + -> OrgTable +rowsToTable = foldl' (flip rowToContent) zeroTable where zeroTable = OrgTable 0 mempty mempty mempty normalizeTable :: OrgTable @@ -755,45 +741,43 @@ normalizeTable (OrgTable cols aligns heads lns) = -- line as a header. All other horizontal lines are discarded. rowToContent :: OrgTableRow -> OrgTable - -> F OrgTable + -> OrgTable rowToContent OrgHlineRow t = maybeBodyToHeader t -rowToContent (OrgAlignRow as) t = setLongestRow as =<< setAligns as t -rowToContent (OrgContentRow rf) t = do - rs <- rf - setLongestRow rs =<< appendToBody rs t +rowToContent (OrgAlignRow as) t = setLongestRow as . setAligns as $ t +rowToContent (OrgContentRow rf) t = setLongestRow rf . appendToBody rf $ t setLongestRow :: [a] -> OrgTable - -> F OrgTable + -> OrgTable setLongestRow rs t = - return t{ orgTableColumns = max (length rs) (orgTableColumns t) } + t{ orgTableColumns = max (length rs) (orgTableColumns t) } maybeBodyToHeader :: OrgTable - -> F OrgTable + -> OrgTable maybeBodyToHeader t = case t of OrgTable{ orgTableHeader = [], orgTableRows = b:[] } -> - return t{ orgTableHeader = b , orgTableRows = [] } - _ -> return t + t{ orgTableHeader = b , orgTableRows = [] } + _ -> t appendToBody :: [Blocks] -> OrgTable - -> F OrgTable -appendToBody r t = return t{ orgTableRows = orgTableRows t ++ [r] } + -> OrgTable +appendToBody r t = t{ orgTableRows = orgTableRows t ++ [r] } setAligns :: [Alignment] -> OrgTable - -> F OrgTable -setAligns aligns t = return $ t{ orgTableAlignments = aligns } + -> OrgTable +setAligns aligns t = t{ orgTableAlignments = aligns } -- -- LaTeX fragments -- -latexFragment :: OrgParser (F Blocks) +latexFragment :: OrgParser Blocks latexFragment = try $ do envName <- latexEnvStart content <- mconcat <$> manyTill anyLineNewline (latexEnd envName) - return . return $ B.rawBlock "latex" (content `inLatexEnv` envName) + return $ B.rawBlock "latex" (content `inLatexEnv` envName) where c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n" , c @@ -823,7 +807,7 @@ latexEnvName = try $ do -- -- Footnote defintions -- -noteBlock :: OrgParser (F Blocks) +noteBlock :: OrgParser Blocks noteBlock = try $ do ref <- noteMarker <* skipSpaces content <- mconcat <$> blocksTillHeaderOrNote @@ -835,37 +819,37 @@ noteBlock = try $ do <|> () <$ lookAhead headerStart) -- Paragraphs or Plain text -paraOrPlain :: OrgParser (F Blocks) +paraOrPlain :: OrgParser Blocks paraOrPlain = try $ do ils <- parseInlines nl <- option False (newline >> return True) try (guard nl >> notFollowedBy (orderedListStart <|> bulletListStart) >> - return (B.para <$> ils)) - <|> (return (B.plain <$> ils)) + (return $ B.para ils)) + <|> (return $ B.plain ils) -inlinesTillNewline :: OrgParser (F Inlines) -inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline +inlinesTillNewline :: OrgParser Inlines +inlinesTillNewline = trimInlines . mconcat <$> manyTill inline newline -- -- list blocks -- -list :: OrgParser (F Blocks) +list :: OrgParser Blocks list = choice [ definitionList, bulletList, orderedList ] "list" -definitionList :: OrgParser (F Blocks) +definitionList :: OrgParser Blocks definitionList = try $ do n <- lookAhead (bulletListStart' Nothing) - fmap B.definitionList . fmap compactify'DL . sequence + B.definitionList . compactify'DL <$> many1 (definitionListItem $ bulletListStart' (Just n)) -bulletList :: OrgParser (F Blocks) +bulletList :: OrgParser Blocks bulletList = try $ do n <- lookAhead (bulletListStart' Nothing) - fmap B.bulletList . fmap compactify' . sequence + B.bulletList . compactify' <$> many1 (listItem (bulletListStart' $ Just n)) -orderedList :: OrgParser (F Blocks) -orderedList = fmap B.orderedList . fmap compactify' . sequence +orderedList :: OrgParser Blocks +orderedList = B.orderedList . compactify' <$> many1 (listItem orderedListStart) genericListStart :: OrgParser String @@ -902,7 +886,7 @@ orderedListStart = genericListStart orderedListMarker where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)") definitionListItem :: OrgParser Int - -> OrgParser (F (Inlines, [Blocks])) + -> OrgParser (Inlines, [Blocks]) definitionListItem parseMarkerGetLength = try $ do markerLength <- parseMarkerGetLength term <- manyTill (noneOf "\n\r") (try $ string "::") @@ -911,12 +895,12 @@ definitionListItem parseMarkerGetLength = try $ do cont <- concat <$> many (listContinuation markerLength) term' <- parseFromString parseInlines term contents' <- parseFromString parseBlocks $ line1 ++ blank ++ cont - return $ (,) <$> term' <*> fmap (:[]) contents' + return (term', [contents']) -- parse raw text for one list item, excluding start marker and continuations listItem :: OrgParser Int - -> OrgParser (F Blocks) + -> OrgParser Blocks listItem start = try $ do markerLength <- try start firstLine <- anyLineNewline @@ -942,7 +926,7 @@ anyLineNewline = (++ "\n") <$> anyLine -- inline -- -inline :: OrgParser (F Inlines) +inline :: OrgParser Inlines inline = choice [ whitespace , linebreak @@ -968,31 +952,31 @@ inline = ] <* (guard =<< newlinesCountWithinLimits) "inline" -parseInlines :: OrgParser (F Inlines) -parseInlines = trimInlinesF . mconcat <$> many1 inline +parseInlines :: OrgParser Inlines +parseInlines = trimInlines . mconcat <$> many1 inline -- treat these as potentially non-text when parsing inline: specialChars :: [Char] specialChars = "\"$'()*+-,./:<=>[\\]^_{|}~" -whitespace :: OrgParser (F Inlines) -whitespace = pure B.space <$ skipMany1 spaceChar +whitespace :: OrgParser Inlines +whitespace = B.space <$ skipMany1 spaceChar <* updateLastPreCharPos <* updateLastForbiddenCharPos "whitespace" -linebreak :: OrgParser (F Inlines) -linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline +linebreak :: OrgParser Inlines +linebreak = try $ B.linebreak <$ string "\\\\" <* skipSpaces <* newline -str :: OrgParser (F Inlines) -str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") +str :: OrgParser Inlines +str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") <* updateLastStrPos -- | An endline character that can be treated as a space, not a structural -- break. This should reflect the values of the Emacs variable -- @org-element-pagaraph-separate@. -endline :: OrgParser (F Inlines) +endline :: OrgParser Inlines endline = try $ do newline notFollowedBy blankline @@ -1010,77 +994,72 @@ endline = try $ do decEmphasisNewlinesCount guard =<< newlinesCountWithinLimits updateLastPreCharPos - return . return $ B.space + return $ B.space -cite :: OrgParser (F Inlines) +cite :: OrgParser Inlines cite = try $ do guardEnabled Ext_citations (cs, raw) <- withRaw normalCite - return $ (flip B.cite (B.text raw)) <$> cs + return $ flip B.cite (B.text raw) cs -normalCite :: OrgParser (F [Citation]) +normalCite :: OrgParser [Citation] normalCite = try $ char '[' *> skipSpaces *> citeList <* skipSpaces <* char ']' -citeList :: OrgParser (F [Citation]) -citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces) +citeList :: OrgParser [Citation] +citeList = sepBy1 citation (try $ char ';' *> skipSpaces) -citation :: OrgParser (F Citation) +citation :: OrgParser Citation citation = try $ do pref <- prefix (suppress_author, key) <- citeKey suff <- suffix - return $ do - x <- pref - y <- suff - return $ Citation{ citationId = key - , citationPrefix = B.toList x - , citationSuffix = B.toList y - , citationMode = if suppress_author - then SuppressAuthor - else NormalCitation - , citationNoteNum = 0 - , citationHash = 0 - } + return $ Citation{ citationId = key + , citationPrefix = B.toList pref + , citationSuffix = B.toList suff + , citationMode = if suppress_author + then SuppressAuthor + else NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } where - prefix = trimInlinesF . mconcat <$> + prefix = trimInlines . mconcat <$> manyTill inline (char ']' <|> (']' <$ lookAhead citeKey)) suffix = try $ do hasSpace <- option False (notFollowedBy nonspaceChar >> return True) skipSpaces - rest <- trimInlinesF . mconcat <$> + rest <- trimInlines . mconcat <$> many (notFollowedBy (oneOf ";]") *> inline) - return $ if hasSpace - then (B.space <>) <$> rest - else rest + return $ + if hasSpace + then B.space <> rest + else rest -footnote :: OrgParser (F Inlines) +footnote :: OrgParser Inlines footnote = try $ inlineNote <|> referencedNote -inlineNote :: OrgParser (F Inlines) +inlineNote :: OrgParser Inlines inlineNote = try $ do string "[fn:" ref <- many alphaNum char ':' - note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']') + note <- B.para . trimInlines . mconcat <$> many1Till inline (char ']') when (not $ null ref) $ addToNotesTable ("fn:" ++ ref, note) - return $ B.note <$> note + return $ B.note note -referencedNote :: OrgParser (F Inlines) +referencedNote :: OrgParser Inlines referencedNote = try $ do ref <- noteMarker - return $ do - notes <- asksF orgStateNotes' + notes <- asks orgStateNotes' + return $ case lookup ref notes of - Nothing -> return $ B.str $ "[" ++ ref ++ "]" - Just contents -> do - st <- askF - let contents' = runF contents st{ orgStateNotes' = [] } - return $ B.note contents' + Just contents -> B.note contents + Nothing -> B.str $ "[" ++ ref ++ "]" noteMarker :: OrgParser String noteMarker = try $ do @@ -1090,37 +1069,37 @@ noteMarker = try $ do <*> many1Till (noneOf "\n\r\t ") (char ']') ] -linkOrImage :: OrgParser (F Inlines) +linkOrImage :: OrgParser Inlines linkOrImage = explicitOrImageLink <|> selflinkOrImage <|> angleLink <|> plainLink "link or image" -explicitOrImageLink :: OrgParser (F Inlines) +explicitOrImageLink :: OrgParser Inlines explicitOrImageLink = try $ do char '[' - srcF <- applyCustomLinkFormat =<< possiblyEmptyLinkTarget + src <- applyCustomLinkFormat =<< possiblyEmptyLinkTarget title <- enclosedRaw (char '[') (char ']') title' <- parseFromString (mconcat <$> many inline) title char ']' - return $ do - src <- srcF - if isImageFilename src && isImageFilename title - then pure $ B.link src "" $ B.image title mempty mempty - else linkToInlinesF src =<< title' + alt <- internalLink src title' + return $ + (if isImageFilename src && isImageFilename title + then B.link src "" $ B.image title mempty mempty + else fromMaybe alt (linkToInlines src title')) -selflinkOrImage :: OrgParser (F Inlines) +selflinkOrImage :: OrgParser Inlines selflinkOrImage = try $ do src <- char '[' *> linkTarget <* char ']' - return $ linkToInlinesF src (B.str src) + return $ fromMaybe "" (linkToInlines src (B.str src)) -plainLink :: OrgParser (F Inlines) +plainLink :: OrgParser Inlines plainLink = try $ do (orig, src) <- uri - returnF $ B.link src "" (B.str orig) + return $ B.link src "" (B.str orig) -angleLink :: OrgParser (F Inlines) +angleLink :: OrgParser Inlines angleLink = try $ do char '<' link <- plainLink @@ -1136,26 +1115,31 @@ linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]") possiblyEmptyLinkTarget :: OrgParser String possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]") -applyCustomLinkFormat :: String -> OrgParser (F String) +applyCustomLinkFormat :: String -> OrgParser String applyCustomLinkFormat link = do let (linkType, rest) = break (== ':') link - return $ do - formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters - return $ maybe link ($ drop 1 rest) formatter + fmts <- ask + return $ + case M.lookup linkType (orgStateLinkFormatters fmts) of + Just v -> (v (drop 1 rest)) + Nothing -> link -- TODO: might be a lot smarter/cleaner to use parsec and ADTs for this kind -- of parsing. -linkToInlinesF :: String -> Inlines -> F Inlines -linkToInlinesF s = +linkToInlines :: String -> Inlines -> Maybe Inlines +linkToInlines = \s -> case s of - "" -> pure . B.link "" "" - ('#':_) -> pure . B.link s "" - _ | isImageFilename s -> const . pure $ B.image s "" "" - _ | isFileLink s -> pure . B.link (dropLinkType s) "" - _ | isUri s -> pure . B.link s "" - _ | isAbsoluteFilePath s -> pure . B.link ("file://" ++ s) "" - _ | isRelativeFilePath s -> pure . B.link s "" - _ -> internalLink s + _ | null s -> Just . B.link "" "" + _ | isAnchor s -> Just . B.link s "" + _ | isImageFilename s -> const . Just $ B.image s "" "" + _ | isFileLink s -> Just . B.link (dropLinkType s) "" + _ | isUri s -> Just . B.link s "" + _ | isAbsoluteFilePath s -> Just . B.link ("file://" ++ s) "" + _ | isRelativeFilePath s -> Just . B.link s "" + _ -> const Nothing + +isAnchor :: String -> Bool +isAnchor s = "#" `isPrefixOf` s isFileLink :: String -> Bool isFileLink s = ("file:" `isPrefixOf` s) && not ("file://" `isPrefixOf` s) @@ -1184,12 +1168,13 @@ isImageFilename filename = imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ] protocols = [ "file", "http", "https" ] -internalLink :: String -> Inlines -> F Inlines +internalLink :: String -> Inlines -> OrgParser Inlines internalLink link title = do - anchorB <- (link `elem`) <$> asksF orgStateAnchorIds - if anchorB - then return $ B.link ('#':link) "" title - else return $ B.emph title + anchorB <- ask + return $ + if link `elem` (orgStateAnchorIds anchorB) + then B.link ('#':link) "" title + else B.emph title -- | Parse an anchor like @<>@ and return an empty span with -- @anchor-id@ set as id. Legal anchors in org-mode are defined through @@ -1197,11 +1182,11 @@ internalLink link title = do -- @anchor-id@ contains spaces, we are more restrictive in what is accepted as -- an anchor. -anchor :: OrgParser (F Inlines) +anchor :: OrgParser Inlines anchor = try $ do anchorId <- parseAnchor recordAnchorId anchorId - returnF $ B.spanWith (solidify anchorId, [], []) mempty + return $ B.spanWith (solidify anchorId, [], []) mempty where parseAnchor = string "<<" *> many1 (noneOf "\t\n\r<>\"' ") @@ -1219,7 +1204,7 @@ solidify = map replaceSpecialChar | otherwise = '-' -- | Parses an inline code block and marks it as an babel block. -inlineCodeBlock :: OrgParser (F Inlines) +inlineCodeBlock :: OrgParser Inlines inlineCodeBlock = try $ do string "src_" lang <- many1 orgArgWordChar @@ -1227,7 +1212,7 @@ inlineCodeBlock = try $ do inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r") let attrClasses = [translateLang lang, rundocBlockClass] let attrKeyVal = map toRundocAttrib (("language", lang) : opts) - returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode + return $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode enclosedByPair :: Char -- ^ opening char -> Char -- ^ closing char @@ -1235,50 +1220,50 @@ enclosedByPair :: Char -- ^ opening char -> OrgParser [a] enclosedByPair s e p = char s *> many1Till p (char e) -emph :: OrgParser (F Inlines) -emph = fmap B.emph <$> emphasisBetween '/' +emph :: OrgParser Inlines +emph = B.emph <$> emphasisBetween '/' -strong :: OrgParser (F Inlines) -strong = fmap B.strong <$> emphasisBetween '*' +strong :: OrgParser Inlines +strong = B.strong <$> emphasisBetween '*' -strikeout :: OrgParser (F Inlines) -strikeout = fmap B.strikeout <$> emphasisBetween '+' +strikeout :: OrgParser Inlines +strikeout = B.strikeout <$> emphasisBetween '+' -- There is no underline, so we use strong instead. -underline :: OrgParser (F Inlines) -underline = fmap B.strong <$> emphasisBetween '_' +underline :: OrgParser Inlines +underline = B.strong <$> emphasisBetween '_' -verbatim :: OrgParser (F Inlines) -verbatim = return . B.code <$> verbatimBetween '=' +verbatim :: OrgParser Inlines +verbatim = B.code <$> verbatimBetween '=' -code :: OrgParser (F Inlines) -code = return . B.code <$> verbatimBetween '~' +code :: OrgParser Inlines +code = B.code <$> verbatimBetween '~' -subscript :: OrgParser (F Inlines) -subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr) +subscript :: OrgParser Inlines +subscript = B.subscript <$> try (char '_' *> subOrSuperExpr) -superscript :: OrgParser (F Inlines) -superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr) +superscript :: OrgParser Inlines +superscript = B.superscript <$> try (char '^' *> subOrSuperExpr) -math :: OrgParser (F Inlines) -math = return . B.math <$> choice [ math1CharBetween '$' +math :: OrgParser Inlines +math = B.math <$> choice [ math1CharBetween '$' , mathStringBetween '$' , rawMathBetween "\\(" "\\)" ] -displayMath :: OrgParser (F Inlines) -displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" +displayMath :: OrgParser Inlines +displayMath = B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" , rawMathBetween "$$" "$$" ] -symbol :: OrgParser (F Inlines) -symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions) +symbol :: OrgParser Inlines +symbol = B.str . (: "") <$> (oneOf specialChars >>= updatePositions) where updatePositions c = do when (c `elem` emphasisPreChars) updateLastPreCharPos when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos return c emphasisBetween :: Char - -> OrgParser (F Inlines) + -> OrgParser Inlines emphasisBetween c = try $ do startEmphasisNewlinesCounting emphasisAllowedNewlines res <- enclosedInlines (emphasisStart c) (emphasisEnd c) @@ -1355,9 +1340,9 @@ mathEnd c = try $ do enclosedInlines :: OrgParser a -> OrgParser b - -> OrgParser (F Inlines) + -> OrgParser Inlines enclosedInlines start end = try $ - trimInlinesF . mconcat <$> enclosed start end inline + trimInlines . mconcat <$> enclosed start end inline enclosedRaw :: OrgParser a -> OrgParser b @@ -1436,7 +1421,7 @@ notAfterForbiddenBorderChar = do return $ lastFBCPos /= Just pos -- | Read a sub- or superscript expression -subOrSuperExpr :: OrgParser (F Inlines) +subOrSuperExpr :: OrgParser Inlines subOrSuperExpr = try $ choice [ id <$> charsInBalanced '{' '}' (noneOf "\n\r") , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r") @@ -1451,10 +1436,10 @@ simpleSubOrSuperString = try $ <*> many1 alphaNum ] -inlineLaTeX :: OrgParser (F Inlines) +inlineLaTeX :: OrgParser Inlines inlineLaTeX = try $ do cmd <- inlineLaTeXCommand - maybe mzero returnF $ + maybe mzero return $ parseAsMath cmd `mplus` parseAsMathMLSym cmd `mplus` parseAsInlineLaTeX cmd where parseAsMath :: String -> Maybe Inlines -- cgit v1.2.3 From 9d772068278da6004712dd6b012d78f1b283a543 Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Fri, 30 Jan 2015 12:31:12 +0000 Subject: Changed parseWithWarnings to the more general returnWarnings parser transformer --- src/Text/Pandoc/Parsing.hs | 11 +++++------ src/Text/Pandoc/Readers/RST.hs | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index ec15adf77..2a1d61b97 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -65,7 +65,7 @@ module Text.Pandoc.Parsing ( anyLine, widthsFromIndices, gridTableWith, readWith, - readWithWarnings, + returnWarnings, readWithM, testStringWith, guardEnabled, @@ -885,11 +885,10 @@ readWith :: Parser [Char] st a -> a readWith p t inp = runIdentity $ readWithM p t inp -readWithWarnings :: Parser [Char] ParserState a - -> ParserState - -> String - -> (a, [String]) -readWithWarnings p = readWith $ do +returnWarnings :: (Stream s m c) + => ParserT s ParserState m a + -> ParserT s ParserState m (a, [String]) +returnWarnings p = do doc <- p warnings <- stateWarnings <$> getState return (doc, warnings) diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index b9a77c5d6..4ae9d52ae 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -58,7 +58,7 @@ readRST :: ReaderOptions -- ^ Reader options readRST opts s = (readWith parseRST) def{ stateOptions = opts } (s ++ "\n\n") readRSTWithWarnings :: ReaderOptions -> String -> (Pandoc, [String]) -readRSTWithWarnings opts s = (readWithWarnings parseRST) def{ stateOptions = opts } (s ++ "\n\n") +readRSTWithWarnings opts s = (readWith (returnWarnings parseRST)) def{ stateOptions = opts } (s ++ "\n\n") type RSTParser = Parser [Char] ParserState -- cgit v1.2.3 From 2b580600077b615bb66e3bf3b49785a7b8772d09 Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Fri, 30 Jan 2015 12:32:12 +0000 Subject: Remove F monad from Parsing --- src/Text/Pandoc/Parsing.hs | 26 ++------------------------ src/Text/Pandoc/Readers/Org.hs | 3 +-- 2 files changed, 3 insertions(+), 26 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 2a1d61b97..8f1d1086d 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -104,10 +104,6 @@ module Text.Pandoc.Parsing ( anyLine, applyMacros', Parser, ParserT, - F(..), - runF, - askF, - asksF, token, generalize, -- * Re-exports from Text.Pandoc.Parsec @@ -189,7 +185,7 @@ import Data.Default import qualified Data.Set as Set import Control.Monad.Reader import Control.Monad.Identity -import Control.Applicative ((<$>), (<*>), (*>), (<*), (<$), Applicative) +import Control.Applicative ((<$>), (<*>), (*>), (<*), (<$)) import Data.Monoid import Data.Maybe (catMaybes) @@ -197,22 +193,6 @@ type Parser t s = Parsec t s type ParserT = ParsecT -newtype F a = F { unF :: Reader ParserState a } deriving (Monad, Applicative, Functor) - -runF :: F a -> ParserState -> a -runF = runReader . unF - -askF :: F ParserState -askF = F ask - -asksF :: (ParserState -> a) -> F a -asksF f = F $ asks f - -instance Monoid a => Monoid (F a) where - mempty = return mempty - mappend = liftM2 mappend - mconcat = liftM mconcat . sequence - -- | Parse any line of text anyLine :: Stream [Char] m Char => ParserT [Char] st m [Char] anyLine = do @@ -914,7 +894,6 @@ data ParserState = ParserState stateNotes :: NoteTable, -- ^ List of notes (raw bodies) stateNotes' :: NoteTable', -- ^ List of notes (parsed bodies) stateMeta :: Meta, -- ^ Document metadata - stateMeta' :: F Meta, -- ^ Document metadata stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used stateHeaders :: M.Map Inlines String, -- ^ List of headers and ids (used for implicit ref links) stateIdentifiers :: [String], -- ^ List of header identifiers used @@ -1011,7 +990,6 @@ defaultParserState = stateNotes = [], stateNotes' = [], stateMeta = nullMeta, - stateMeta' = return nullMeta, stateHeaderTable = [], stateHeaders = M.empty, stateIdentifiers = [], @@ -1063,7 +1041,7 @@ data QuoteContext type NoteTable = [(String, String)] -type NoteTable' = [(String, F Blocks)] -- used in markdown reader +type NoteTable' = [(String, Blocks)] -- used in markdown reader newtype Key = Key String deriving (Show, Read, Eq, Ord) diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 97a15576b..5cb66bfa7 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -37,8 +37,7 @@ import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..), (<>) import Text.Pandoc.Definition import Text.Pandoc.Options import qualified Text.Pandoc.Parsing as P -import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF - , newline, orderedListMarker +import Text.Pandoc.Parsing hiding ( newline, orderedListMarker , parseFromString, blanklines ) import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) -- cgit v1.2.3 From d4ab579dc3f4b13be0a8ac2024467802a5f9928e Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Fri, 30 Jan 2015 12:32:45 +0000 Subject: Add check to see whether in a footnote to ParserState (to avoid circular footnotes) --- src/Text/Pandoc/Parsing.hs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 8f1d1086d..592f377c6 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -908,7 +908,8 @@ data ParserState = ParserState stateCaption :: Maybe Inlines, -- ^ Caption in current environment stateInHtmlBlock :: Maybe String, -- ^ Tag type of HTML block being parsed stateMarkdownAttribute :: Bool, -- ^ True if in markdown=1 context - stateWarnings :: [String] -- ^ Warnings generated by the parser + stateWarnings :: [String], -- ^ Warnings generated by the parser + stateInFootnote :: Bool -- ^ True if in a footnote block. } instance Default ParserState where @@ -1002,7 +1003,8 @@ defaultParserState = stateCaption = Nothing, stateInHtmlBlock = Nothing, stateMarkdownAttribute = False, - stateWarnings = []} + stateWarnings = [], + stateInFootnote = False } -- | Succeed only if the extension is enabled. guardEnabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m () -- cgit v1.2.3 From dc450d80a124033454f401b3d1f357ec036eab9d Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Fri, 30 Jan 2015 12:33:01 +0000 Subject: Generalise signature of addWarning --- src/Text/Pandoc/Parsing.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 592f377c6..3e9d559dc 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -1239,7 +1239,7 @@ applyMacros' target = do else return target -- | Append a warning to the log. -addWarning :: Maybe SourcePos -> String -> Parser [Char] ParserState () +addWarning :: (Stream s m c) => Maybe SourcePos -> String -> ParserT s ParserState m () addWarning mbpos msg = updateState $ \st -> st{ stateWarnings = (msg ++ maybe "" (\pos -> " " ++ show pos) mbpos) : -- cgit v1.2.3 From 13fb1d61017ecdf6f4e16f811c39fb2f6ca34c99 Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Fri, 30 Jan 2015 12:34:57 +0000 Subject: Remove F Monad from Markdown reader --- src/Text/Pandoc/Readers/Markdown.hs | 619 ++++++++++++++++++------------------ 1 file changed, 307 insertions(+), 312 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 187b479c3..400873fe6 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -57,6 +57,7 @@ import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockT import Data.Monoid (mconcat, mempty) import Control.Applicative ((<$>), (<*), (*>), (<$)) import Control.Monad +import Control.Monad.Reader import System.FilePath (takeExtension, addExtension) import Text.HTML.TagSoup import Text.HTML.TagSoup.Match (tagOpen) @@ -64,25 +65,36 @@ import qualified Data.Set as Set import Text.Printf (printf) import Debug.Trace (trace) -type MarkdownParser = Parser [Char] ParserState +type MarkdownParser a = ParserT [Char] ParserState (Reader ParserState) a -- | Read markdown from an input string and return a Pandoc document. readMarkdown :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) -> Pandoc readMarkdown opts s = - (readWith parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n") + (runMarkdown opts s parseMarkdown) -- | Read markdown from an input string and return a pair of a Pandoc document -- and a list of warnings. readMarkdownWithWarnings :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) -> (Pandoc, [String]) -readMarkdownWithWarnings opts s = - (readWithWarnings parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n") +readMarkdownWithWarnings opts s = runMarkdown opts s (returnWarnings parseMarkdown) -trimInlinesF :: F Inlines -> F Inlines -trimInlinesF = liftM trimInlines + +retState :: MarkdownParser a -> MarkdownParser (a, ParserState) +retState p = do + r <- p + s <- getState + return (r, s) + +runMarkdown :: ReaderOptions -> String -> MarkdownParser a -> a +runMarkdown opts inp p = fst res + where + imd = readWithM (retState p) def{ stateOptions = opts } (inp ++ "\n\n") + res = runReader imd s + s :: ParserState + s = snd $ runReader imd s -- -- Constants and data structure definitions @@ -119,10 +131,10 @@ inList = do ctx <- stateParserContext <$> getState guard (ctx == ListItemState) -isNull :: F Inlines -> Bool -isNull ils = B.isNull $ runF ils def +isNull :: Inlines -> Bool +isNull ils = B.isNull ils -spnl :: Parser [Char] st () +spnl :: Monad m => ParserT [Char] st m () spnl = try $ do skipSpaces optional newline @@ -162,9 +174,9 @@ litChar = escapedChar' -- | Parse a sequence of inline elements between square brackets, -- including inlines between balanced pairs of square brackets. -inlinesInBalancedBrackets :: MarkdownParser (F Inlines) +inlinesInBalancedBrackets :: MarkdownParser Inlines inlinesInBalancedBrackets = charsInBalancedBrackets >>= - parseFromString (trimInlinesF . mconcat <$> many inline) + parseFromString (trimInlines . mconcat <$> many inline) charsInBalancedBrackets :: MarkdownParser [Char] charsInBalancedBrackets = do @@ -181,16 +193,16 @@ charsInBalancedBrackets = do -- document structure -- -titleLine :: MarkdownParser (F Inlines) +titleLine :: MarkdownParser Inlines titleLine = try $ do char '%' skipSpaces res <- many $ (notFollowedBy newline >> inline) <|> try (endline >> whitespace) newline - return $ trimInlinesF $ mconcat res + return $ trimInlines $ mconcat res -authorsLine :: MarkdownParser (F [Inlines]) +authorsLine :: MarkdownParser [Inlines] authorsLine = try $ do char '%' skipSpaces @@ -199,13 +211,13 @@ authorsLine = try $ do (char ';' <|> try (newline >> notFollowedBy blankline >> spaceChar)) newline - return $ sequence $ filter (not . isNull) $ map (trimInlinesF . mconcat) authors + return $ filter (not . isNull) $ map (trimInlines . mconcat) authors -dateLine :: MarkdownParser (F Inlines) +dateLine :: MarkdownParser Inlines dateLine = try $ do char '%' skipSpaces - trimInlinesF . mconcat <$> manyTill inline newline + trimInlines . mconcat <$> manyTill inline newline titleBlock :: MarkdownParser () titleBlock = pandocTitleBlock <|> mmdTitleBlock @@ -215,20 +227,16 @@ pandocTitleBlock = try $ do guardEnabled Ext_pandoc_title_block lookAhead (char '%') title <- option mempty titleLine - author <- option (return []) authorsLine + author <- option [] authorsLine date <- option mempty dateLine optional blanklines - let meta' = do title' <- title - author' <- author - date' <- date - return $ - (if B.isNull title' then id else B.setMeta "title" title') - . (if null author' then id else B.setMeta "author" author') - . (if B.isNull date' then id else B.setMeta "date" date') - $ nullMeta - updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' } - -yamlMetaBlock :: MarkdownParser (F Blocks) + let meta' = (if B.isNull title then id else B.setMeta "title" title) + . (if null author then id else B.setMeta "author" author) + . (if B.isNull date then id else B.setMeta "date" date) + $ nullMeta + updateState $ \st -> st{ stateMeta = stateMeta st <> meta' } + +yamlMetaBlock :: MarkdownParser (Blocks) yamlMetaBlock = try $ do guardEnabled Ext_yaml_metadata_block pos <- getPosition @@ -241,17 +249,17 @@ yamlMetaBlock = try $ do optional blanklines opts <- stateOptions <$> getState meta' <- case Yaml.decodeEither' $ UTF8.fromString rawYaml of - Right (Yaml.Object hashmap) -> return $ return $ + Right (Yaml.Object hashmap) -> return $ H.foldrWithKey (\k v m -> if ignorable k then m else B.setMeta (T.unpack k) (yamlToMeta opts v) m) nullMeta hashmap - Right Yaml.Null -> return $ return nullMeta + Right Yaml.Null -> return nullMeta Right _ -> do addWarning (Just pos) "YAML header is not an object" - return $ return nullMeta + return nullMeta Left err' -> do case err' of InvalidYaml (Just YamlParseException{ @@ -270,8 +278,8 @@ yamlMetaBlock = try $ do _ -> addWarning (Just pos) $ "Could not parse YAML header: " ++ show err' - return $ return nullMeta - updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' } + return nullMeta + updateState $ \st -> st{ stateMeta = stateMeta st <> meta' } return mempty -- ignore fields ending with _ @@ -314,8 +322,8 @@ mmdTitleBlock = try $ do guardEnabled Ext_mmd_title_block kvPairs <- many1 kvPair blanklines - updateState $ \st -> st{ stateMeta' = stateMeta' st <> - return (Meta $ M.fromList kvPairs) } + updateState $ \st -> st{ stateMeta = stateMeta st <> + (Meta $ M.fromList kvPairs) } kvPair :: MarkdownParser (String, MetaValue) kvPair = try $ do @@ -335,11 +343,11 @@ parseMarkdown = do optional titleBlock blocks <- parseBlocks st <- getState - let meta = runF (stateMeta' st) st - let Pandoc _ bs = B.doc $ runF blocks st + let meta = stateMeta st + let Pandoc _ bs = B.doc blocks return $ Pandoc meta bs -referenceKey :: MarkdownParser (F Blocks) +referenceKey :: MarkdownParser (Blocks) referenceKey = try $ do pos <- getPosition skipNonindentSpaces @@ -366,7 +374,7 @@ referenceKey = try $ do Just _ -> addWarning (Just pos) $ "Duplicate link reference `" ++ raw ++ "'" Nothing -> return () updateState $ \s -> s { stateKeys = M.insert key target oldkeys } - return $ return mempty + return mempty referenceTitle :: MarkdownParser String referenceTitle = try $ do @@ -386,7 +394,7 @@ quotedTitle c = try $ do -- | PHP Markdown Extra style abbreviation key. Currently -- we just skip them, since Pandoc doesn't have an element for -- an abbreviation. -abbrevKey :: MarkdownParser (F Blocks) +abbrevKey :: MarkdownParser (Blocks) abbrevKey = do guardEnabled Ext_abbreviations try $ do @@ -395,7 +403,7 @@ abbrevKey = do char ':' skipMany (satisfy (/= '\n')) blanklines - return $ return mempty + return mempty noteMarker :: MarkdownParser String noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']') @@ -413,7 +421,7 @@ rawLines = do rest <- many rawLine return $ unlines (first:rest) -noteBlock :: MarkdownParser (F Blocks) +noteBlock :: MarkdownParser (Blocks) noteBlock = try $ do pos <- getPosition skipNonindentSpaces @@ -425,7 +433,7 @@ noteBlock = try $ do rest <- many $ try $ blanklines >> indentSpaces >> rawLines let raw = unlines (first:rest) ++ "\n" optional blanklines - parsed <- parseFromString parseBlocks raw + parsed <- parseFromString (inFootnote parseBlocks) raw let newnote = (ref, parsed) oldnotes <- stateNotes' <$> getState case lookup ref oldnotes of @@ -434,21 +442,29 @@ noteBlock = try $ do updateState $ \s -> s { stateNotes' = newnote : oldnotes } return mempty +inFootnote :: MarkdownParser a -> MarkdownParser a +inFootnote p = do + st <- stateInFootnote <$> getState + updateState (\s -> s { stateInFootnote = True } ) + r <- p + updateState (\s -> s { stateInFootnote = st } ) + return r + -- -- parsing blocks -- -parseBlocks :: MarkdownParser (F Blocks) +parseBlocks :: MarkdownParser (Blocks) parseBlocks = mconcat <$> manyTill block eof -block :: MarkdownParser (F Blocks) +block :: MarkdownParser (Blocks) block = do tr <- getOption readerTrace pos <- getPosition res <- choice [ mempty <$ blanklines , codeBlockFenced , yamlMetaBlock - , guardEnabled Ext_latex_macros *> (macro >>= return . return) + , guardEnabled Ext_latex_macros *> macro -- note: bulletList needs to be before header because of -- the possibility of empty list items: - , bulletList @@ -471,28 +487,27 @@ block = do , plain ] "block" when tr $ do - st <- getState trace (printf "line %d: %s" (sourceLine pos) - (take 60 $ show $ B.toList $ runF res st)) (return ()) + (take 60 $ show $ B.toList $ res)) (return ()) return res -- -- header blocks -- -header :: MarkdownParser (F Blocks) +header :: MarkdownParser (Blocks) header = setextHeader <|> atxHeader "header" -atxHeader :: MarkdownParser (F Blocks) +atxHeader :: MarkdownParser Blocks atxHeader = try $ do level <- many1 (char '#') >>= return . length notFollowedBy $ guardEnabled Ext_fancy_lists >> (char '.' <|> char ')') -- this would be a list skipSpaces - text <- trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline) + text <- trimInlines . mconcat <$> many (notFollowedBy atxClosing >> inline) attr <- atxClosing - attr' <- registerHeader attr (runF text defaultParserState) - return $ B.headerWith attr' level <$> text + attr' <- registerHeader attr text + return $ B.headerWith attr' level text atxClosing :: MarkdownParser Attr atxClosing = try $ do @@ -519,25 +534,25 @@ mmdHeaderIdentifier = do skipSpaces return (ident,[],[]) -setextHeader :: MarkdownParser (F Blocks) +setextHeader :: MarkdownParser Blocks setextHeader = try $ do -- This lookahead prevents us from wasting time parsing Inlines -- unless necessary -- it gives a significant performance boost. lookAhead $ anyLine >> many1 (oneOf setextHChars) >> blankline - text <- trimInlinesF . mconcat <$> many1 (notFollowedBy setextHeaderEnd >> inline) + text <- trimInlines . mconcat <$> many1 (notFollowedBy setextHeaderEnd >> inline) attr <- setextHeaderEnd underlineChar <- oneOf setextHChars many (char underlineChar) blanklines let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1 - attr' <- registerHeader attr (runF text defaultParserState) - return $ B.headerWith attr' level <$> text + attr' <- registerHeader attr text + return $ B.headerWith attr' level text -- -- hrule block -- -hrule :: Parser [Char] st (F Blocks) +hrule :: Monad m => ParserT [Char] st m Blocks hrule = try $ do skipSpaces start <- satisfy isHruleChar @@ -545,7 +560,7 @@ hrule = try $ do skipMany (spaceChar <|> char start) newline optional blanklines - return $ return B.horizontalRule + return B.horizontalRule -- -- code blocks @@ -554,9 +569,10 @@ hrule = try $ do indentedLine :: MarkdownParser String indentedLine = indentSpaces >> anyLine >>= return . (++ "\n") -blockDelimiter :: (Char -> Bool) +blockDelimiter :: Monad m + => (Char -> Bool) -> Maybe Int - -> Parser [Char] st Int + -> ParserT [Char] st m Int blockDelimiter f len = try $ do c <- lookAhead (satisfy f) case len of @@ -607,7 +623,7 @@ specialAttr = do char '-' return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs) -codeBlockFenced :: MarkdownParser (F Blocks) +codeBlockFenced :: MarkdownParser Blocks codeBlockFenced = try $ do c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~')) <|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`')) @@ -619,7 +635,7 @@ codeBlockFenced = try $ do blankline contents <- manyTill anyLine (blockDelimiter (== c) (Just size)) blanklines - return $ return $ B.codeBlockWith attr $ intercalate "\n" contents + return $ B.codeBlockWith attr $ intercalate "\n" contents -- correctly handle github language identifiers toLanguageId :: String -> String @@ -628,7 +644,7 @@ toLanguageId = map toLower . go go "objective-c" = "objectivec" go x = x -codeBlockIndented :: MarkdownParser (F Blocks) +codeBlockIndented :: MarkdownParser (Blocks) codeBlockIndented = do contents <- many1 (indentedLine <|> try (do b <- blanklines @@ -636,15 +652,15 @@ codeBlockIndented = do return $ b ++ l)) optional blanklines classes <- getOption readerIndentedCodeClasses - return $ return $ B.codeBlockWith ("", classes, []) $ + return $ B.codeBlockWith ("", classes, []) $ stripTrailingNewlines $ concat contents -lhsCodeBlock :: MarkdownParser (F Blocks) +lhsCodeBlock :: MarkdownParser (Blocks) lhsCodeBlock = do guardEnabled Ext_literate_haskell - (return . B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> + (B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> (lhsCodeBlockBird <|> lhsCodeBlockLaTeX)) - <|> (return . B.codeBlockWith ("",["sourceCode","haskell"],[]) <$> + <|> (B.codeBlockWith ("",["sourceCode","haskell"],[]) <$> lhsCodeBlockInverseBird) lhsCodeBlockLaTeX :: MarkdownParser String @@ -673,7 +689,7 @@ lhsCodeBlockBirdWith c = try $ do blanklines return $ intercalate "\n" lns' -birdTrackLine :: Char -> Parser [Char] st String +birdTrackLine :: Monad m => Char -> ParserT [Char] st m String birdTrackLine c = try $ do char c -- allow html tags on left margin: @@ -701,12 +717,12 @@ emailBlockQuote = try $ do optional blanklines return raw -blockQuote :: MarkdownParser (F Blocks) +blockQuote :: MarkdownParser (Blocks) blockQuote = do raw <- emailBlockQuote -- parse the extracted block, which may contain various block elements: contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n" - return $ B.blockQuote <$> contents + return $ B.blockQuote contents -- -- list blocks @@ -804,7 +820,7 @@ listContinuationLine = try $ do return $ result ++ "\n" listItem :: MarkdownParser a - -> MarkdownParser (F Blocks) + -> MarkdownParser (Blocks) listItem start = try $ do first <- rawListItem start continuations <- many listContinuation @@ -820,14 +836,14 @@ listItem start = try $ do updateState (\st -> st {stateParserContext = oldContext}) return contents -orderedList :: MarkdownParser (F Blocks) +orderedList :: MarkdownParser Blocks orderedList = try $ do (start, style, delim) <- lookAhead anyOrderedListStart unless (style `elem` [DefaultStyle, Decimal, Example] && delim `elem` [DefaultDelim, Period]) $ guardEnabled Ext_fancy_lists when (style == Example) $ guardEnabled Ext_example_lists - items <- fmap sequence $ many1 $ listItem + items <- many1 $ listItem ( try $ do optional newline -- if preceded by Plain block in a list startpos <- sourceColumn <$> getPosition @@ -839,12 +855,12 @@ orderedList = try $ do atMostSpaces (tabStop - (endpos - startpos)) return res ) start' <- option 1 $ guardEnabled Ext_startnum >> return start - return $ B.orderedListWith (start', style, delim) <$> fmap compactify' items + return $ B.orderedListWith (start', style, delim) (compactify' items) -bulletList :: MarkdownParser (F Blocks) +bulletList :: MarkdownParser (Blocks) bulletList = do - items <- fmap sequence $ many1 $ listItem bulletListStart - return $ B.bulletList <$> fmap compactify' items + items <- many1 $ listItem bulletListStart + return $ B.bulletList (compactify' items) -- definition lists @@ -859,14 +875,14 @@ defListMarker = do else mzero return () -definitionListItem :: Bool -> MarkdownParser (F (Inlines, [Blocks])) +definitionListItem :: Bool -> MarkdownParser (Inlines, [Blocks]) definitionListItem compact = try $ do rawLine' <- anyLine raw <- many1 $ defRawBlock compact - term <- parseFromString (trimInlinesF . mconcat <$> many inline) rawLine' + term <- parseFromString (trimInlines . mconcat <$> many inline) rawLine' contents <- mapM (parseFromString parseBlocks) raw optional blanklines - return $ liftM2 (,) term (sequence contents) + return $ (term, contents) defRawBlock :: Bool -> MarkdownParser String defRawBlock compact = try $ do @@ -889,32 +905,32 @@ defRawBlock compact = try $ do return $ trimr (firstline ++ "\n" ++ unlines rawlines ++ cont) ++ if hasBlank || not (null cont) then "\n\n" else "" -definitionList :: MarkdownParser (F Blocks) +definitionList :: MarkdownParser (Blocks) definitionList = try $ do lookAhead (anyLine >> optional blankline >> defListMarker) compactDefinitionList <|> normalDefinitionList -compactDefinitionList :: MarkdownParser (F Blocks) +compactDefinitionList :: MarkdownParser (Blocks) compactDefinitionList = do guardEnabled Ext_compact_definition_lists - items <- fmap sequence $ many1 $ definitionListItem True - return $ B.definitionList <$> fmap compactify'DL items + items <- many1 $ definitionListItem True + return $ B.definitionList (compactify'DL items) -normalDefinitionList :: MarkdownParser (F Blocks) +normalDefinitionList :: MarkdownParser (Blocks) normalDefinitionList = do guardEnabled Ext_definition_lists - items <- fmap sequence $ many1 $ definitionListItem False - return $ B.definitionList <$> items + items <- many1 $ definitionListItem False + return $ B.definitionList items -- -- paragraph block -- -para :: MarkdownParser (F Blocks) +para :: MarkdownParser Blocks para = try $ do exts <- getOption readerExtensions - result <- trimInlinesF . mconcat <$> many1 inline - option (B.plain <$> result) + result <- trimInlines . mconcat <$> many1 inline + option (B.plain result) $ try $ do newline (blanklines >> return mempty) @@ -932,17 +948,16 @@ para = try $ do lookAhead (htmlTag (~== TagClose "div")) _ -> mzero return $ do - result' <- result - case B.toList result' of + case B.toList result of [Image alt (src,tit)] | Ext_implicit_figures `Set.member` exts -> -- the fig: at beginning of title indicates a figure - return $ B.para $ B.singleton + B.para $ B.singleton $ Image alt (src,'f':'i':'g':':':tit) - _ -> return $ B.para result' + _ -> B.para result -plain :: MarkdownParser (F Blocks) -plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline +plain :: MarkdownParser (Blocks) +plain = B.plain . trimInlines . mconcat <$> many1 inline -- -- raw html @@ -953,13 +968,13 @@ htmlElement = rawVerbatimBlock <|> strictHtmlBlock <|> liftM snd (htmlTag isBlockTag) -htmlBlock :: MarkdownParser (F Blocks) +htmlBlock :: MarkdownParser (Blocks) htmlBlock = do guardEnabled Ext_raw_html try (do (TagOpen t attrs) <- lookAhead $ fst <$> htmlTag isBlockTag (guard (t `elem` ["pre","style","script"]) >> - (return . B.rawBlock "html") <$> rawVerbatimBlock) + (B.rawBlock "html") <$> rawVerbatimBlock) <|> (do guardEnabled Ext_markdown_attribute oldMarkdownAttribute <- stateMarkdownAttribute <$> getState markdownAttribute <- @@ -978,12 +993,12 @@ htmlBlock = do <|> (guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks)) <|> htmlBlock' -htmlBlock' :: MarkdownParser (F Blocks) +htmlBlock' :: MarkdownParser (Blocks) htmlBlock' = try $ do first <- htmlElement skipMany spaceChar optional blanklines - return $ return $ B.rawBlock "html" first + return $ B.rawBlock "html" first strictHtmlBlock :: MarkdownParser String strictHtmlBlock = htmlInBalanced (not . isInlineTag) @@ -996,17 +1011,17 @@ rawVerbatimBlock = try $ do contents <- manyTill anyChar (htmlTag (~== TagClose tag)) return $ open ++ contents ++ renderTags' [TagClose tag] -rawTeXBlock :: MarkdownParser (F Blocks) +rawTeXBlock :: MarkdownParser (Blocks) rawTeXBlock = do guardEnabled Ext_raw_tex result <- (B.rawBlock "latex" . concat <$> - rawLaTeXBlock `sepEndBy1` blankline) + (generalize rawLaTeXBlock) `sepEndBy1` blankline) <|> (B.rawBlock "context" . concat <$> rawConTeXtEnvironment `sepEndBy1` blankline) spaces - return $ return result + return result -rawHtmlBlocks :: MarkdownParser (F Blocks) +rawHtmlBlocks :: MarkdownParser (Blocks) rawHtmlBlocks = do (TagOpen tagtype _, raw) <- htmlTag isBlockTag -- try to find closing tag @@ -1018,10 +1033,10 @@ rawHtmlBlocks = do contents <- mconcat <$> many (notFollowedBy' closer >> block) result <- (closer >>= \(_, rawcloser) -> return ( - return (B.rawBlock "html" $ stripMarkdownAttribute raw) <> + (B.rawBlock "html" $ stripMarkdownAttribute raw) <> contents <> - return (B.rawBlock "html" rawcloser))) - <|> return (return (B.rawBlock "html" raw) <> contents) + (B.rawBlock "html" rawcloser))) + <|> return ((B.rawBlock "html" raw) <> contents) updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock } return result @@ -1036,12 +1051,12 @@ stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s -- line block -- -lineBlock :: MarkdownParser (F Blocks) +lineBlock :: MarkdownParser (Blocks) lineBlock = try $ do guardEnabled Ext_line_blocks lines' <- lineBlockLines >>= - mapM (parseFromString (trimInlinesF . mconcat <$> many inline)) - return $ B.para <$> (mconcat $ intersperse (return B.linebreak) lines') + mapM (parseFromString (trimInlines . mconcat <$> many inline)) + return $ B.para (mconcat $ intersperse B.linebreak lines') -- -- Tables @@ -1049,8 +1064,8 @@ lineBlock = try $ do -- Parse a dashed line with optional trailing spaces; return its length -- and the length including trailing space. -dashedLine :: Char - -> Parser [Char] st (Int, Int) +dashedLine :: Monad m => Char + -> ParserT [Char] st m (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many spaceChar @@ -1059,7 +1074,7 @@ dashedLine ch = do -- Parse a table header with dashed lines of '-' preceded by -- one (or zero) line of text. simpleTableHeader :: Bool -- ^ Headerless table - -> MarkdownParser (F [Blocks], [Alignment], [Int]) + -> MarkdownParser ([Blocks], [Alignment], [Int]) simpleTableHeader headless = try $ do rawContent <- if headless then return "" @@ -1078,8 +1093,8 @@ simpleTableHeader headless = try $ do let rawHeads' = if headless then replicate (length dashes) "" else rawHeads - heads <- fmap sequence - $ mapM (parseFromString (mconcat <$> many plain)) + heads <- + mapM (parseFromString (mconcat <$> many plain)) $ map trim rawHeads' return (heads, aligns, indices) @@ -1121,30 +1136,30 @@ rawTableLine indices = do -- Parse a table line and return a list of lists of blocks (columns). tableLine :: [Int] - -> MarkdownParser (F [Blocks]) + -> MarkdownParser [Blocks] tableLine indices = rawTableLine indices >>= - fmap sequence . mapM (parseFromString (mconcat <$> many plain)) + mapM (parseFromString (mconcat <$> many plain)) -- Parse a multiline table row and return a list of blocks (columns). multilineRow :: [Int] - -> MarkdownParser (F [Blocks]) + -> MarkdownParser [Blocks] multilineRow indices = do colLines <- many1 (rawTableLine indices) let cols = map unlines $ transpose colLines - fmap sequence $ mapM (parseFromString (mconcat <$> many plain)) cols + mapM (parseFromString (mconcat <$> many plain)) cols -- Parses a table caption: inlines beginning with 'Table:' -- and followed by blank lines. -tableCaption :: MarkdownParser (F Inlines) +tableCaption :: MarkdownParser Inlines tableCaption = try $ do guardEnabled Ext_table_captions skipNonindentSpaces string ":" <|> string "Table:" - trimInlinesF . mconcat <$> many1 inline <* blanklines + trimInlines . mconcat <$> many1 inline <* blanklines -- Parse a simple table with '---' header and one line per row. simpleTable :: Bool -- ^ Headerless table - -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) + -> MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]]) simpleTable headless = do (aligns, _widths, heads', lines') <- tableWith (simpleTableHeader headless) tableLine @@ -1158,12 +1173,12 @@ simpleTable headless = do -- which may be multiline, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). multilineTable :: Bool -- ^ Headerless table - -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) + -> MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]]) multilineTable headless = tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter multilineTableHeader :: Bool -- ^ Headerless table - -> MarkdownParser (F [Blocks], [Alignment], [Int]) + -> MarkdownParser ([Blocks], [Alignment], [Int]) multilineTableHeader headless = try $ do unless headless $ tableSep >> notFollowedBy blankline @@ -1185,7 +1200,7 @@ multilineTableHeader headless = try $ do let rawHeads = if headless then replicate (length dashes) "" else map (unlines . map trim) rawHeadsList - heads <- fmap sequence $ + heads <- mapM (parseFromString (mconcat <$> many plain)) $ map trim rawHeads return (heads, aligns, indices) @@ -1195,7 +1210,7 @@ multilineTableHeader headless = try $ do -- which may be grid, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). gridTable :: Bool -- ^ Headerless table - -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) + -> MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]]) gridTable headless = tableWith (gridTableHeader headless) gridTableRow (gridTableSep '-') gridTableFooter @@ -1204,13 +1219,13 @@ gridTableSplitLine :: [Int] -> String -> [String] gridTableSplitLine indices line = map removeFinalBar $ tail $ splitStringByIndices (init indices) $ trimr line -gridPart :: Char -> Parser [Char] st (Int, Int) +gridPart :: Monad m => Char -> ParserT [Char] st m (Int, Int) gridPart ch = do dashes <- many1 (char ch) char '+' return (length dashes, length dashes + 1) -gridDashedLines :: Char -> Parser [Char] st [(Int,Int)] +gridDashedLines :: Monad m => Char -> ParserT [Char] st m [(Int,Int)] gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline removeFinalBar :: String -> String @@ -1223,7 +1238,7 @@ gridTableSep ch = try $ gridDashedLines ch >> return '\n' -- | Parse header for a grid table. gridTableHeader :: Bool -- ^ Headerless table - -> MarkdownParser (F [Blocks], [Alignment], [Int]) + -> MarkdownParser ([Blocks], [Alignment], [Int]) gridTableHeader headless = try $ do optional blanklines dashes <- gridDashedLines '-' @@ -1243,7 +1258,7 @@ gridTableHeader headless = try $ do then replicate (length dashes) "" else map (unlines . map trim) $ transpose $ map (gridTableSplitLine indices) rawContent - heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads + heads <- mapM (parseFromString parseBlocks . trim) rawHeads return (heads, aligns, indices) gridTableRawLine :: [Int] -> MarkdownParser [String] @@ -1254,12 +1269,12 @@ gridTableRawLine indices = do -- | Parse row of grid table. gridTableRow :: [Int] - -> MarkdownParser (F [Blocks]) + -> MarkdownParser [Blocks] gridTableRow indices = do colLines <- many1 (gridTableRawLine indices) let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ transpose colLines - fmap compactify' <$> fmap sequence (mapM (parseFromString parseBlocks) cols) + compactify' <$> (mapM (parseFromString parseBlocks) cols) removeOneLeadingSpace :: [String] -> [String] removeOneLeadingSpace xs = @@ -1285,14 +1300,14 @@ pipeBreak = try $ do blankline return (first:rest) -pipeTable :: MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) +pipeTable :: MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]]) pipeTable = try $ do (heads,aligns) <- try ( pipeBreak >>= \als -> - return (return $ replicate (length als) mempty, als)) + return (replicate (length als) mempty, als)) <|> ( pipeTableRow >>= \row -> pipeBreak >>= \als -> return (row, als) ) - lines' <- sequence <$> many1 pipeTableRow + lines' <- many1 pipeTableRow let widths = replicate (length aligns) 0.0 return $ (aligns, widths, heads, lines') @@ -1302,7 +1317,7 @@ sepPipe = try $ do notFollowedBy blankline -- parse a row, also returning probable alignments for org-table cells -pipeTableRow :: MarkdownParser (F [Blocks]) +pipeTableRow :: MarkdownParser [Blocks] pipeTableRow = do nonindentSpaces openPipe <- (True <$ char '|') <|> return False @@ -1314,16 +1329,14 @@ pipeTableRow = do guard $ not (null rest && not openPipe) optional (char '|') blankline - let cells = sequence (first:rest) - return $ do - cells' <- cells - return $ map - (\ils -> + let cells = first:rest + return $ + map (\ils -> case trimInlines ils of ils' | B.isNull ils' -> mempty - | otherwise -> B.plain $ ils') cells' + | otherwise -> B.plain $ ils') cells -pipeTableHeaderPart :: Parser [Char] st Alignment +pipeTableHeaderPart :: Monad m => ParserT [Char] st m Alignment pipeTableHeaderPart = try $ do skipMany spaceChar left <- optionMaybe (char ':') @@ -1338,7 +1351,7 @@ pipeTableHeaderPart = try $ do (Just _,Just _) -> AlignCenter -- Succeed only if current line contains a pipe. -scanForPipe :: Parser [Char] st () +scanForPipe :: Monad m => ParserT [Char] st m () scanForPipe = do inp <- getInput case break (\c -> c == '\n' || c == '|') inp of @@ -1348,14 +1361,14 @@ scanForPipe = do -- | Parse a table using 'headerParser', 'rowParser', -- 'lineParser', and 'footerParser'. Variant of the version in -- Text.Pandoc.Parsing. -tableWith :: MarkdownParser (F [Blocks], [Alignment], [Int]) - -> ([Int] -> MarkdownParser (F [Blocks])) +tableWith :: MarkdownParser ([Blocks], [Alignment], [Int]) + -> ([Int] -> MarkdownParser [Blocks]) -> MarkdownParser sep -> MarkdownParser end - -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) + -> MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]]) tableWith headerParser rowParser lineParser footerParser = try $ do (heads, aligns, indices) <- headerParser - lines' <- fmap sequence $ rowParser indices `sepEndBy1` lineParser + lines' <- rowParser indices `sepEndBy1` lineParser footerParser numColumns <- getOption readerColumns let widths = if (indices == []) @@ -1363,7 +1376,7 @@ tableWith headerParser rowParser lineParser footerParser = try $ do else widthsFromIndices numColumns indices return $ (aligns, widths, heads, lines') -table :: MarkdownParser (F Blocks) +table :: MarkdownParser Blocks table = try $ do frontCaption <- option Nothing (Just <$> tableCaption) (aligns, widths, heads, lns) <- @@ -1378,19 +1391,15 @@ table = try $ do (gridTable False <|> gridTable True)) "table" optional blanklines caption <- case frontCaption of - Nothing -> option (return mempty) tableCaption + Nothing -> option mempty tableCaption Just c -> return c - return $ do - caption' <- caption - heads' <- heads - lns' <- lns - return $ B.table caption' (zip aligns widths) heads' lns' + return $ B.table caption (zip aligns widths) heads lns -- -- inline -- -inline :: MarkdownParser (F Inlines) +inline :: MarkdownParser Inlines inline = choice [ whitespace , bareURL , str @@ -1413,7 +1422,7 @@ inline = choice [ whitespace , rawLaTeXInline' , exampleRef , smart - , return . B.singleton <$> charRef + , B.singleton <$> charRef , symbol , ltSign ] "inline" @@ -1424,43 +1433,42 @@ escapedChar' = try $ do (guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum)) <|> oneOf "\\`*_{}[]()>#+-.!~\"" -escapedChar :: MarkdownParser (F Inlines) +escapedChar :: MarkdownParser Inlines escapedChar = do result <- escapedChar' case result of - ' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space + ' ' -> return $ B.str "\160" -- "\ " is a nonbreaking space '\n' -> guardEnabled Ext_escaped_line_breaks >> - return (return B.linebreak) -- "\[newline]" is a linebreak - _ -> return $ return $ B.str [result] + return B.linebreak -- "\[newline]" is a linebreak + _ -> return $ B.str [result] -ltSign :: MarkdownParser (F Inlines) +ltSign :: MarkdownParser Inlines ltSign = do guardDisabled Ext_raw_html <|> (notFollowedByHtmlCloser >> notFollowedBy' (htmlTag isBlockTag)) char '<' - return $ return $ B.str "<" + return $ B.str "<" -exampleRef :: MarkdownParser (F Inlines) +exampleRef :: MarkdownParser Inlines exampleRef = try $ do guardEnabled Ext_example_lists char '@' lab <- many1 (alphaNum <|> oneOf "-_") - return $ do - st <- askF - return $ case M.lookup lab (stateExamples st) of - Just n -> B.str (show n) - Nothing -> B.str ('@':lab) + st <- ask + return $ case M.lookup lab (stateExamples st) of + Just n -> B.str (show n) + Nothing -> B.str ('@':lab) -symbol :: MarkdownParser (F Inlines) +symbol :: MarkdownParser Inlines symbol = do result <- noneOf "<\\\n\t " <|> try (do lookAhead $ char '\\' notFollowedBy' (() <$ rawTeXBlock) char '\\') - return $ return $ B.str [result] + return $ B.str [result] -- parses inline code, between n `s and n `s -code :: MarkdownParser (F Inlines) +code :: MarkdownParser Inlines code = try $ do starts <- many1 (char '`') skipSpaces @@ -1470,16 +1478,16 @@ code = try $ do notFollowedBy (char '`'))) attr <- option ([],[],[]) (try $ guardEnabled Ext_inline_code_attributes >> optional whitespace >> attributes) - return $ return $ B.codeWith attr $ trim $ concat result + return $ B.codeWith attr $ trim $ concat result -math :: MarkdownParser (F Inlines) -math = (return . B.displayMath <$> (mathDisplay >>= applyMacros')) - <|> (return . B.math <$> (mathInline >>= applyMacros')) +math :: MarkdownParser Inlines +math = (B.displayMath <$> (mathDisplay >>= applyMacros')) + <|> (B.math <$> (mathInline >>= applyMacros')) -- Parses material enclosed in *s, **s, _s, or __s. -- Designed to avoid backtracking. enclosure :: Char - -> MarkdownParser (F Inlines) + -> MarkdownParser Inlines enclosure c = do -- we can't start an enclosure with _ if after a string and -- the intraword_underscores extension is enabled: @@ -1487,13 +1495,13 @@ enclosure c = do <|> guard (c == '*') <|> (guard =<< notAfterString) cs <- many1 (char c) - (return (B.str cs) <>) <$> whitespace + ((B.str cs) <>) <$> whitespace <|> do case length cs of 3 -> three c 2 -> two c mempty 1 -> one c mempty - _ -> return (return $ B.str cs) + _ -> return $ B.str cs ender :: Char -> Int -> MarkdownParser () ender c n = try $ do @@ -1506,74 +1514,74 @@ ender c n = try $ do -- If one c, emit emph and then parse two. -- If two cs, emit strong and then parse one. -- Otherwise, emit ccc then the results. -three :: Char -> MarkdownParser (F Inlines) +three :: Char -> MarkdownParser Inlines three c = do contents <- mconcat <$> many (notFollowedBy (ender c 1) >> inline) - (ender c 3 >> return ((B.strong . B.emph) <$> contents)) - <|> (ender c 2 >> one c (B.strong <$> contents)) - <|> (ender c 1 >> two c (B.emph <$> contents)) - <|> return (return (B.str [c,c,c]) <> contents) + (ender c 3 >> return ((B.strong . B.emph) contents)) + <|> (ender c 2 >> one c (B.strong contents)) + <|> (ender c 1 >> two c (B.emph contents)) + <|> return ((B.str [c,c,c]) <> contents) -- Parse inlines til you hit two c's, and emit strong. -- If you never do hit two cs, emit ** plus inlines parsed. -two :: Char -> F Inlines -> MarkdownParser (F Inlines) +two :: Char -> Inlines -> MarkdownParser Inlines two c prefix' = do contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline) - (ender c 2 >> return (B.strong <$> (prefix' <> contents))) - <|> return (return (B.str [c,c]) <> (prefix' <> contents)) + (ender c 2 >> return (B.strong (prefix' <> contents))) + <|> return ((B.str [c,c]) <> (prefix' <> contents)) -- Parse inlines til you hit a c, and emit emph. -- If you never hit a c, emit * plus inlines parsed. -one :: Char -> F Inlines -> MarkdownParser (F Inlines) +one :: Char -> Inlines -> MarkdownParser Inlines one c prefix' = do contents <- mconcat <$> many ( (notFollowedBy (ender c 1) >> inline) <|> try (string [c,c] >> notFollowedBy (ender c 1) >> two c mempty) ) - (ender c 1 >> return (B.emph <$> (prefix' <> contents))) - <|> return (return (B.str [c]) <> (prefix' <> contents)) + (ender c 1 >> return (B.emph (prefix' <> contents))) + <|> return ((B.str [c]) <> (prefix' <> contents)) -strongOrEmph :: MarkdownParser (F Inlines) +strongOrEmph :: MarkdownParser Inlines strongOrEmph = enclosure '*' <|> enclosure '_' --- | Parses a list of inlines between start and end delimiters. +-- | Parses a list oInlines between start and end delimiters. inlinesBetween :: (Show b) => MarkdownParser a -> MarkdownParser b - -> MarkdownParser (F Inlines) + -> MarkdownParser Inlines inlinesBetween start end = - (trimInlinesF . mconcat) <$> try (start >> many1Till inner end) + (trimInlines . mconcat) <$> try (start >> many1Till inner end) where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline) innerSpace = try $ whitespace <* notFollowedBy' end -strikeout :: MarkdownParser (F Inlines) -strikeout = fmap B.strikeout <$> +strikeout :: MarkdownParser Inlines +strikeout = B.strikeout <$> (guardEnabled Ext_strikeout >> inlinesBetween strikeStart strikeEnd) where strikeStart = string "~~" >> lookAhead nonspaceChar >> notFollowedBy (char '~') strikeEnd = try $ string "~~" -superscript :: MarkdownParser (F Inlines) -superscript = fmap B.superscript <$> try (do +superscript :: MarkdownParser Inlines +superscript = B.superscript <$> try (do guardEnabled Ext_superscript char '^' mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '^')) -subscript :: MarkdownParser (F Inlines) -subscript = fmap B.subscript <$> try (do +subscript :: MarkdownParser Inlines +subscript = B.subscript <$> try (do guardEnabled Ext_subscript char '~' mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '~')) -whitespace :: MarkdownParser (F Inlines) -whitespace = spaceChar >> return <$> (lb <|> regsp) "whitespace" +whitespace :: MarkdownParser Inlines +whitespace = spaceChar >> (lb <|> regsp) "whitespace" where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak) regsp = skipMany spaceChar >> return B.space -nonEndline :: Parser [Char] st Char +nonEndline :: Monad m => ParserT [Char] st m Char nonEndline = satisfy (/='\n') -str :: MarkdownParser (F Inlines) +str :: MarkdownParser Inlines str = do result <- many1 alphaNum updateLastStrPos @@ -1581,14 +1589,14 @@ str = do isSmart <- getOption readerSmart if isSmart then case likelyAbbrev result of - [] -> return $ return $ B.str result + [] -> return $ B.str result xs -> choice (map (\x -> try (string x >> oneOf " \n" >> lookAhead alphaNum >> - return (return $ B.str + return (B.str $ result ++ spacesToNbr x ++ "\160"))) xs) - <|> (return $ return $ B.str result) - else return $ return $ B.str result + <|> (return $ B.str result) + else return $ B.str result -- | if the string matches the beginning of an abbreviation (before -- the first period, return strings that would finish the abbreviation. @@ -1603,7 +1611,7 @@ likelyAbbrev x = in map snd $ filter (\(y,_) -> y == x) abbrPairs -- an endline character that can be treated as a space, not a structural break -endline :: MarkdownParser (F Inlines) +endline :: MarkdownParser Inlines endline = try $ do newline notFollowedBy blankline @@ -1616,18 +1624,18 @@ endline = try $ do notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced)) notFollowedByHtmlCloser (eof >> return mempty) - <|> (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak)) + <|> (guardEnabled Ext_hard_line_breaks >> return B.linebreak) <|> (guardEnabled Ext_ignore_line_breaks >> return mempty) - <|> (return $ return B.space) + <|> (return B.space) -- -- links -- -- a reference label for a link -reference :: MarkdownParser (F Inlines, String) +reference :: MarkdownParser (Inlines, String) reference = do notFollowedBy' (string "[^") -- footnote reference - withRaw $ trimInlinesF <$> inlinesInBalancedBrackets + withRaw $ trimInlines <$> inlinesInBalancedBrackets parenthesizedChars :: MarkdownParser [Char] parenthesizedChars = do @@ -1655,7 +1663,7 @@ source = do linkTitle :: MarkdownParser String linkTitle = quotedTitle '"' <|> quotedTitle '\'' -link :: MarkdownParser (F Inlines) +link :: MarkdownParser Inlines link = try $ do st <- getState guard $ stateAllowLinks st @@ -1665,14 +1673,14 @@ link = try $ do regLink B.link lab <|> referenceLink B.link (lab,raw) regLink :: (String -> String -> Inlines -> Inlines) - -> F Inlines -> MarkdownParser (F Inlines) + -> Inlines -> MarkdownParser Inlines regLink constructor lab = try $ do (src, tit) <- source - return $ constructor src tit <$> lab + return $ constructor src tit lab -- a link like [this][ref] or [this][] or [this] referenceLink :: (String -> String -> Inlines -> Inlines) - -> (F Inlines, String) -> MarkdownParser (F Inlines) + -> (Inlines, String) -> MarkdownParser Inlines referenceLink constructor (lab, raw) = do sp <- (True <$ lookAhead (char ' ')) <|> return False (ref,raw') <- option (mempty, "") $ @@ -1685,24 +1693,22 @@ referenceLink constructor (lab, raw) = do fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw implicitHeaderRefs <- option False $ True <$ guardEnabled Ext_implicit_header_references - let makeFallback = do - parsedRaw' <- parsedRaw - fallback' <- fallback - return $ B.str "[" <> fallback' <> B.str "]" <> + let makeFallback = + B.str "[" <> fallback <> B.str "]" <> (if sp && not (null raw) then B.space else mempty) <> - parsedRaw' - return $ do - keys <- asksF stateKeys - case M.lookup key keys of - Nothing -> do - headers <- asksF stateHeaders - ref' <- if labIsRef then lab else ref - if implicitHeaderRefs - then case M.lookup ref' headers of - Just ident -> constructor ('#':ident) "" <$> lab - Nothing -> makeFallback - else makeFallback - Just (src,tit) -> constructor src tit <$> lab + parsedRaw + keys <- asks stateKeys + headers <- asks stateHeaders + return $ + case M.lookup key keys of + Nothing -> + let ref' = if labIsRef then lab else ref in + if implicitHeaderRefs + then case M.lookup ref' headers of + Just ident -> constructor ('#':ident) "" lab + Nothing -> makeFallback + else makeFallback + Just (src,tit) -> constructor src tit lab dropBrackets :: String -> String dropBrackets = reverse . dropRB . reverse . dropLB @@ -1711,14 +1717,14 @@ dropBrackets = reverse . dropRB . reverse . dropLB dropLB ('[':xs) = xs dropLB xs = xs -bareURL :: MarkdownParser (F Inlines) +bareURL :: MarkdownParser Inlines bareURL = try $ do guardEnabled Ext_autolink_bare_uris (orig, src) <- uri <|> emailAddress notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a") - return $ return $ B.link src "" (B.str orig) + return $ B.link src "" (B.str orig) -autoLink :: MarkdownParser (F Inlines) +autoLink :: MarkdownParser Inlines autoLink = try $ do char '<' (orig, src) <- uri <|> emailAddress @@ -1727,9 +1733,9 @@ autoLink = try $ do -- final punctuation. for example: in ``, -- the URI parser will stop before the dashes. extra <- fromEntities <$> manyTill nonspaceChar (char '>') - return $ return $ B.link (src ++ escapeURI extra) "" (B.str $ orig ++ extra) + return $ B.link (src ++ escapeURI extra) "" (B.str $ orig ++ extra) -image :: MarkdownParser (F Inlines) +image :: MarkdownParser Inlines image = try $ do char '!' (lab,raw) <- reference @@ -1739,38 +1745,33 @@ image = try $ do _ -> B.image src regLink constructor lab <|> referenceLink constructor (lab,raw) -note :: MarkdownParser (F Inlines) +note :: MarkdownParser Inlines note = try $ do guardEnabled Ext_footnotes + (stateInFootnote <$> getState) >>= guard . not ref <- noteMarker - return $ do - notes <- asksF stateNotes' + notes <- asks stateNotes' + return $ case lookup ref notes of - Nothing -> return $ B.str $ "[^" ++ ref ++ "]" - Just contents -> do - st <- askF - -- process the note in a context that doesn't resolve - -- notes, to avoid infinite looping with notes inside - -- notes: - let contents' = runF contents st{ stateNotes' = [] } - return $ B.note contents' - -inlineNote :: MarkdownParser (F Inlines) + Nothing -> B.str $ "[^" ++ ref ++ "]" + Just contents -> B.note contents + +inlineNote :: MarkdownParser Inlines inlineNote = try $ do guardEnabled Ext_inline_notes char '^' contents <- inlinesInBalancedBrackets - return $ B.note . B.para <$> contents + return . B.note . B.para $ contents -rawLaTeXInline' :: MarkdownParser (F Inlines) +rawLaTeXInline' :: MarkdownParser Inlines rawLaTeXInline' = try $ do guardEnabled Ext_raw_tex lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env - RawInline _ s <- rawLaTeXInline - return $ return $ B.rawInline "tex" s + RawInline _ s <- generalize rawLaTeXInline + return $ B.rawInline "tex" s -- "tex" because it might be context or latex -rawConTeXtEnvironment :: Parser [Char] st String +rawConTeXtEnvironment :: Monad m => ParserT [Char] st m String rawConTeXtEnvironment = try $ do string "\\start" completion <- inBrackets (letter <|> digit <|> spaceChar) @@ -1779,14 +1780,14 @@ rawConTeXtEnvironment = try $ do (try $ string "\\stop" >> string completion) return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion -inBrackets :: (Parser [Char] st Char) -> Parser [Char] st String +inBrackets :: Monad m => (ParserT [Char] st m Char) -> ParserT [Char] st m String inBrackets parser = do char '[' contents <- many parser char ']' return $ "[" ++ contents ++ "]" -spanHtml :: MarkdownParser (F Inlines) +spanHtml :: MarkdownParser Inlines spanHtml = try $ do guardEnabled Ext_native_spans (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" []) @@ -1798,10 +1799,10 @@ spanHtml = try $ do Just s | null ident && null classes && map toLower (filter (`notElem` " \t;") s) == "font-variant:small-caps" - -> return $ B.smallcaps <$> contents - _ -> return $ B.spanWith (ident, classes, keyvals) <$> contents + -> return $ B.smallcaps contents + _ -> return $ B.spanWith (ident, classes, keyvals) contents -divHtml :: MarkdownParser (F Blocks) +divHtml :: MarkdownParser Blocks divHtml = try $ do guardEnabled Ext_native_divs (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" []) @@ -1819,11 +1820,11 @@ divHtml = try $ do let ident = fromMaybe "" $ lookup "id" attrs let classes = maybe [] words $ lookup "class" attrs let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] - return $ B.divWith (ident, classes, keyvals) <$> contents + return $ B.divWith (ident, classes, keyvals) contents else -- avoid backtracing - return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents + return $ (B.rawBlock "html" (rawtag <> bls)) <> contents -rawHtmlInline :: MarkdownParser (F Inlines) +rawHtmlInline :: MarkdownParser Inlines rawHtmlInline = do guardEnabled Ext_raw_html inHtmlBlock <- stateInHtmlBlock <$> getState @@ -1838,19 +1839,19 @@ rawHtmlInline = do then (\x -> isInlineTag x && not (isCloseBlockTag x)) else not . isTextTag - return $ return $ B.rawInline "html" result + return $ B.rawInline "html" result -- Citations -cite :: MarkdownParser (F Inlines) +cite :: MarkdownParser Inlines cite = do guardEnabled Ext_citations citations <- textualCite <|> do (cs, raw) <- withRaw normalCite - return $ (flip B.cite (B.text raw)) <$> cs + return $ (flip B.cite (B.text raw)) cs return citations -textualCite :: MarkdownParser (F Inlines) +textualCite :: MarkdownParser Inlines textualCite = try $ do (_, key) <- citeKey let first = Citation{ citationId = key @@ -1864,29 +1865,26 @@ textualCite = try $ do case mbrest of Just (rest, raw) -> return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw) . (first:)) - <$> rest + rest Nothing -> (do (cs, raw) <- withRaw $ bareloc first - return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw)) <$> cs) - <|> return (do st <- askF - return $ case M.lookup key (stateExamples st) of - Just n -> B.str (show n) - _ -> B.cite [first] $ B.str $ '@':key) + return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw)) cs) + <|> do st <- ask + return $ case M.lookup key (stateExamples st) of + Just n -> B.str (show n) + _ -> B.cite [first] $ B.str $ '@':key -bareloc :: Citation -> MarkdownParser (F [Citation]) +bareloc :: Citation -> MarkdownParser [Citation] bareloc c = try $ do spnl char '[' suff <- suffix - rest <- option (return []) $ try $ char ';' >> citeList + rest <- option [] $ try $ char ';' >> citeList spnl char ']' - return $ do - suff' <- suff - rest' <- rest - return $ c{ citationSuffix = B.toList suff' } : rest' + return $ c{ citationSuffix = B.toList suff } : rest -normalCite :: MarkdownParser (F [Citation]) +normalCite :: MarkdownParser [Citation] normalCite = try $ do char '[' spnl @@ -1895,60 +1893,57 @@ normalCite = try $ do char ']' return citations -suffix :: MarkdownParser (F Inlines) +suffix :: MarkdownParser Inlines suffix = try $ do hasSpace <- option False (notFollowedBy nonspaceChar >> return True) spnl - rest <- trimInlinesF . mconcat <$> many (notFollowedBy (oneOf ";]") >> inline) + rest <- trimInlines . mconcat <$> many (notFollowedBy (oneOf ";]") >> inline) return $ if hasSpace - then (B.space <>) <$> rest + then B.space <> rest else rest -prefix :: MarkdownParser (F Inlines) -prefix = trimInlinesF . mconcat <$> +prefix :: MarkdownParser Inlines +prefix = trimInlines . mconcat <$> manyTill inline (char ']' <|> liftM (const ']') (lookAhead citeKey)) -citeList :: MarkdownParser (F [Citation]) -citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl) +citeList :: MarkdownParser [Citation] +citeList = sepBy1 citation (try $ char ';' >> spnl) -citation :: MarkdownParser (F Citation) +citation :: MarkdownParser (Citation) citation = try $ do pref <- prefix (suppress_author, key) <- citeKey suff <- suffix - return $ do - x <- pref - y <- suff - return $ Citation{ citationId = key - , citationPrefix = B.toList x - , citationSuffix = B.toList y - , citationMode = if suppress_author - then SuppressAuthor - else NormalCitation - , citationNoteNum = 0 - , citationHash = 0 - } - -smart :: MarkdownParser (F Inlines) + return $ Citation{ citationId = key + , citationPrefix = B.toList pref + , citationSuffix = B.toList suff + , citationMode = if suppress_author + then SuppressAuthor + else NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } + +smart :: MarkdownParser Inlines smart = do getOption readerSmart >>= guard doubleQuoted <|> singleQuoted <|> - choice (map (return <$>) [apostrophe, dash, ellipses]) + choice [apostrophe, dash, ellipses] -singleQuoted :: MarkdownParser (F Inlines) +singleQuoted :: MarkdownParser Inlines singleQuoted = try $ do singleQuoteStart withQuoteContext InSingleQuote $ - fmap B.singleQuoted . trimInlinesF . mconcat <$> + B.singleQuoted . trimInlines . mconcat <$> many1Till inline singleQuoteEnd -- doubleQuoted will handle regular double-quoted sections, as well -- as dialogues with an open double-quote without a close double-quote -- in the same paragraph. -doubleQuoted :: MarkdownParser (F Inlines) +doubleQuoted :: MarkdownParser Inlines doubleQuoted = try $ do doubleQuoteStart contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) (withQuoteContext InDoubleQuote $ doubleQuoteEnd >> return - (fmap B.doubleQuoted . trimInlinesF $ contents)) - <|> (return $ return (B.str "\8220") <> contents) + (B.doubleQuoted . trimInlines $ contents)) + <|> return ((B.str "\8220") <> contents) -- cgit v1.2.3 From febe5112af6c917525b9fded66e00abc5245ab07 Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Fri, 30 Jan 2015 14:08:44 +0000 Subject: HLint changes --- src/Text/Pandoc/Readers/Markdown.hs | 158 +++++++++++++++++------------------- 1 file changed, 76 insertions(+), 82 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 400873fe6..043d7e94c 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -31,7 +31,7 @@ Conversion of markdown-formatted plain text to 'Pandoc' document. module Text.Pandoc.Readers.Markdown ( readMarkdown, readMarkdownWithWarnings ) where -import Data.List ( transpose, sortBy, findIndex, intersperse, intercalate ) +import Data.List ( transpose, sortBy, intersperse, intercalate, elemIndex) import qualified Data.Map as M import Data.Scientific (coefficient, base10Exponent) import Data.Ord ( comparing ) @@ -72,7 +72,7 @@ readMarkdown :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) -> Pandoc readMarkdown opts s = - (runMarkdown opts s parseMarkdown) + runMarkdown opts s parseMarkdown -- | Read markdown from an input string and return a pair of a Pandoc document -- and a list of warnings. @@ -132,7 +132,7 @@ inList = do guard (ctx == ListItemState) isNull :: Inlines -> Bool -isNull ils = B.isNull ils +isNull = B.isNull spnl :: Monad m => ParserT [Char] st m () spnl = try $ do @@ -236,7 +236,7 @@ pandocTitleBlock = try $ do $ nullMeta updateState $ \st -> st{ stateMeta = stateMeta st <> meta' } -yamlMetaBlock :: MarkdownParser (Blocks) +yamlMetaBlock :: MarkdownParser Blocks yamlMetaBlock = try $ do guardEnabled Ext_yaml_metadata_block pos <- getPosition @@ -284,7 +284,7 @@ yamlMetaBlock = try $ do -- ignore fields ending with _ ignorable :: Text -> Bool -ignorable t = (T.pack "_") `T.isSuffixOf` t +ignorable t = T.pack "_" `T.isSuffixOf` t toMetaValue :: ReaderOptions -> Text -> MetaValue toMetaValue opts x = @@ -294,7 +294,7 @@ toMetaValue opts x = | endsWithNewline x -> MetaBlocks [Para xs] | otherwise -> MetaInlines xs Pandoc _ bs -> MetaBlocks bs - where endsWithNewline t = (T.pack "\n") `T.isSuffixOf` t + where endsWithNewline t = T.pack "\n" `T.isSuffixOf` t yamlToMeta :: ReaderOptions -> Yaml.Value -> MetaValue yamlToMeta opts (Yaml.String t) = toMetaValue opts t @@ -347,7 +347,7 @@ parseMarkdown = do let Pandoc _ bs = B.doc blocks return $ Pandoc meta bs -referenceKey :: MarkdownParser (Blocks) +referenceKey :: MarkdownParser Blocks referenceKey = try $ do pos <- getPosition skipNonindentSpaces @@ -394,7 +394,7 @@ quotedTitle c = try $ do -- | PHP Markdown Extra style abbreviation key. Currently -- we just skip them, since Pandoc doesn't have an element for -- an abbreviation. -abbrevKey :: MarkdownParser (Blocks) +abbrevKey :: MarkdownParser Blocks abbrevKey = do guardEnabled Ext_abbreviations try $ do @@ -421,7 +421,7 @@ rawLines = do rest <- many rawLine return $ unlines (first:rest) -noteBlock :: MarkdownParser (Blocks) +noteBlock :: MarkdownParser Blocks noteBlock = try $ do pos <- getPosition skipNonindentSpaces @@ -454,10 +454,10 @@ inFootnote p = do -- parsing blocks -- -parseBlocks :: MarkdownParser (Blocks) +parseBlocks :: MarkdownParser Blocks parseBlocks = mconcat <$> manyTill block eof -block :: MarkdownParser (Blocks) +block :: MarkdownParser Blocks block = do tr <- getOption readerTrace pos <- getPosition @@ -486,21 +486,21 @@ block = do , para , plain ] "block" - when tr $ do + when tr $ trace (printf "line %d: %s" (sourceLine pos) - (take 60 $ show $ B.toList $ res)) (return ()) + (take 60 . show . B.toList $ res)) (return ()) return res -- -- header blocks -- -header :: MarkdownParser (Blocks) +header :: MarkdownParser Blocks header = setextHeader <|> atxHeader "header" atxHeader :: MarkdownParser Blocks atxHeader = try $ do - level <- many1 (char '#') >>= return . length + level <- length <$> many1 (char '#') notFollowedBy $ guardEnabled Ext_fancy_lists >> (char '.' <|> char ')') -- this would be a list skipSpaces @@ -544,7 +544,7 @@ setextHeader = try $ do underlineChar <- oneOf setextHChars many (char underlineChar) blanklines - let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1 + let level = (fromMaybe 0 $ elemIndex underlineChar setextHChars) + 1 attr' <- registerHeader attr text return $ B.headerWith attr' level text @@ -567,7 +567,7 @@ hrule = try $ do -- indentedLine :: MarkdownParser String -indentedLine = indentSpaces >> anyLine >>= return . (++ "\n") +indentedLine = indentSpaces >> ((++ "\n") <$> anyLine) blockDelimiter :: Monad m => (Char -> Bool) @@ -577,8 +577,7 @@ blockDelimiter f len = try $ do c <- lookAhead (satisfy f) case len of Just l -> count l (char c) >> many (char c) >> return l - Nothing -> count 3 (char c) >> many (char c) >>= - return . (+ 3) . length + Nothing -> count 3 (char c) >> ((+ 3) . length <$> many (char c)) attributes :: MarkdownParser Attr attributes = try $ do @@ -644,7 +643,7 @@ toLanguageId = map toLower . go go "objective-c" = "objectivec" go x = x -codeBlockIndented :: MarkdownParser (Blocks) +codeBlockIndented :: MarkdownParser Blocks codeBlockIndented = do contents <- many1 (indentedLine <|> try (do b <- blanklines @@ -655,7 +654,7 @@ codeBlockIndented = do return $ B.codeBlockWith ("", classes, []) $ stripTrailingNewlines $ concat contents -lhsCodeBlock :: MarkdownParser (Blocks) +lhsCodeBlock :: MarkdownParser Blocks lhsCodeBlock = do guardEnabled Ext_literate_haskell (B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> @@ -717,11 +716,11 @@ emailBlockQuote = try $ do optional blanklines return raw -blockQuote :: MarkdownParser (Blocks) +blockQuote :: MarkdownParser Blocks blockQuote = do raw <- emailBlockQuote -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n" + contents <- parseFromString parseBlocks $ intercalate "\n" raw ++ "\n\n" return $ B.blockQuote contents -- @@ -765,7 +764,7 @@ anyOrderedListStart = try $ do return res listStart :: MarkdownParser () -listStart = bulletListStart <|> (anyOrderedListStart >> return ()) +listStart = bulletListStart <|> void anyOrderedListStart listLine :: MarkdownParser String listLine = try $ do @@ -820,7 +819,7 @@ listContinuationLine = try $ do return $ result ++ "\n" listItem :: MarkdownParser a - -> MarkdownParser (Blocks) + -> MarkdownParser Blocks listItem start = try $ do first <- rawListItem start continuations <- many listContinuation @@ -857,7 +856,7 @@ orderedList = try $ do start' <- option 1 $ guardEnabled Ext_startnum >> return start return $ B.orderedListWith (start', style, delim) (compactify' items) -bulletList :: MarkdownParser (Blocks) +bulletList :: MarkdownParser Blocks bulletList = do items <- many1 $ listItem bulletListStart return $ B.bulletList (compactify' items) @@ -882,7 +881,7 @@ definitionListItem compact = try $ do term <- parseFromString (trimInlines . mconcat <$> many inline) rawLine' contents <- mapM (parseFromString parseBlocks) raw optional blanklines - return $ (term, contents) + return (term, contents) defRawBlock :: Bool -> MarkdownParser String defRawBlock compact = try $ do @@ -905,18 +904,18 @@ defRawBlock compact = try $ do return $ trimr (firstline ++ "\n" ++ unlines rawlines ++ cont) ++ if hasBlank || not (null cont) then "\n\n" else "" -definitionList :: MarkdownParser (Blocks) +definitionList :: MarkdownParser Blocks definitionList = try $ do lookAhead (anyLine >> optional blankline >> defListMarker) compactDefinitionList <|> normalDefinitionList -compactDefinitionList :: MarkdownParser (Blocks) +compactDefinitionList :: MarkdownParser Blocks compactDefinitionList = do guardEnabled Ext_compact_definition_lists items <- many1 $ definitionListItem True return $ B.definitionList (compactify'DL items) -normalDefinitionList :: MarkdownParser (Blocks) +normalDefinitionList :: MarkdownParser Blocks normalDefinitionList = do guardEnabled Ext_definition_lists items <- many1 $ definitionListItem False @@ -947,7 +946,7 @@ para = try $ do Just "div" -> () <$ lookAhead (htmlTag (~== TagClose "div")) _ -> mzero - return $ do + return $ case B.toList result of [Image alt (src,tit)] | Ext_implicit_figures `Set.member` exts -> @@ -956,7 +955,7 @@ para = try $ do $ Image alt (src,'f':'i':'g':':':tit) _ -> B.para result -plain :: MarkdownParser (Blocks) +plain :: MarkdownParser Blocks plain = B.plain . trimInlines . mconcat <$> many1 inline -- @@ -968,13 +967,13 @@ htmlElement = rawVerbatimBlock <|> strictHtmlBlock <|> liftM snd (htmlTag isBlockTag) -htmlBlock :: MarkdownParser (Blocks) +htmlBlock :: MarkdownParser Blocks htmlBlock = do guardEnabled Ext_raw_html try (do (TagOpen t attrs) <- lookAhead $ fst <$> htmlTag isBlockTag (guard (t `elem` ["pre","style","script"]) >> - (B.rawBlock "html") <$> rawVerbatimBlock) + B.rawBlock "html" <$> rawVerbatimBlock) <|> (do guardEnabled Ext_markdown_attribute oldMarkdownAttribute <- stateMarkdownAttribute <$> getState markdownAttribute <- @@ -993,7 +992,7 @@ htmlBlock = do <|> (guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks)) <|> htmlBlock' -htmlBlock' :: MarkdownParser (Blocks) +htmlBlock' :: MarkdownParser Blocks htmlBlock' = try $ do first <- htmlElement skipMany spaceChar @@ -1005,23 +1004,23 @@ strictHtmlBlock = htmlInBalanced (not . isInlineTag) rawVerbatimBlock :: MarkdownParser String rawVerbatimBlock = try $ do - (TagOpen tag _, open) <- htmlTag (tagOpen (flip elem - ["pre", "style", "script"]) - (const True)) + (TagOpen tag _, open) <- + htmlTag (tagOpen (`elem` ["pre", "style", "script"]) + (const True)) contents <- manyTill anyChar (htmlTag (~== TagClose tag)) return $ open ++ contents ++ renderTags' [TagClose tag] -rawTeXBlock :: MarkdownParser (Blocks) +rawTeXBlock :: MarkdownParser Blocks rawTeXBlock = do guardEnabled Ext_raw_tex result <- (B.rawBlock "latex" . concat <$> - (generalize rawLaTeXBlock) `sepEndBy1` blankline) + generalize rawLaTeXBlock `sepEndBy1` blankline) <|> (B.rawBlock "context" . concat <$> rawConTeXtEnvironment `sepEndBy1` blankline) spaces return result -rawHtmlBlocks :: MarkdownParser (Blocks) +rawHtmlBlocks :: MarkdownParser Blocks rawHtmlBlocks = do (TagOpen tagtype _, raw) <- htmlTag isBlockTag -- try to find closing tag @@ -1036,7 +1035,7 @@ rawHtmlBlocks = do (B.rawBlock "html" $ stripMarkdownAttribute raw) <> contents <> (B.rawBlock "html" rawcloser))) - <|> return ((B.rawBlock "html" raw) <> contents) + <|> return (B.rawBlock "html" raw <> contents) updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock } return result @@ -1051,7 +1050,7 @@ stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s -- line block -- -lineBlock :: MarkdownParser (Blocks) +lineBlock :: MarkdownParser Blocks lineBlock = try $ do guardEnabled Ext_line_blocks lines' <- lineBlockLines >>= @@ -1069,7 +1068,7 @@ dashedLine :: Monad m => Char dashedLine ch = do dashes <- many1 (char ch) sp <- many spaceChar - return $ (length dashes, length $ dashes ++ sp) + return (length dashes, length $ dashes ++ sp) -- Parse a table header with dashed lines of '-' preceded by -- one (or zero) line of text. @@ -1094,8 +1093,7 @@ simpleTableHeader headless = try $ do then replicate (length dashes) "" else rawHeads heads <- - mapM (parseFromString (mconcat <$> many plain)) - $ map trim rawHeads' + mapM (parseFromString (mconcat <$> many plain) . trim) rawHeads' return (heads, aligns, indices) -- Returns an alignment type for a table, based on a list of strings @@ -1247,9 +1245,7 @@ gridTableHeader headless = try $ do else many1 (notFollowedBy (gridTableSep '=') >> char '|' >> many1Till anyChar newline) - if headless - then return () - else gridTableSep '=' >> return () + unless headless (void $ gridTableSep '=') let lines' = map snd dashes let indices = scanl (+) 0 lines' let aligns = replicate (length lines') AlignDefault @@ -1274,7 +1270,7 @@ gridTableRow indices = do colLines <- many1 (gridTableRawLine indices) let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ transpose colLines - compactify' <$> (mapM (parseFromString parseBlocks) cols) + compactify' <$> mapM (parseFromString parseBlocks) cols removeOneLeadingSpace :: [String] -> [String] removeOneLeadingSpace xs = @@ -1309,7 +1305,7 @@ pipeTable = try $ do return (row, als) ) lines' <- many1 pipeTableRow let widths = replicate (length aligns) 0.0 - return $ (aligns, widths, heads, lines') + return (aligns, widths, heads, lines') sepPipe :: MarkdownParser () sepPipe = try $ do @@ -1334,7 +1330,7 @@ pipeTableRow = do map (\ils -> case trimInlines ils of ils' | B.isNull ils' -> mempty - | otherwise -> B.plain $ ils') cells + | otherwise -> B.plain ils') cells pipeTableHeaderPart :: Monad m => ParserT [Char] st m Alignment pipeTableHeaderPart = try $ do @@ -1371,10 +1367,10 @@ tableWith headerParser rowParser lineParser footerParser = try $ do lines' <- rowParser indices `sepEndBy1` lineParser footerParser numColumns <- getOption readerColumns - let widths = if (indices == []) - then replicate (length aligns) 0.0 - else widthsFromIndices numColumns indices - return $ (aligns, widths, heads, lines') + let widths = case indices of + [] -> replicate (length aligns) 0.0 + _ -> widthsFromIndices numColumns indices + return (aligns, widths, heads, lines') table :: MarkdownParser Blocks table = try $ do @@ -1495,8 +1491,8 @@ enclosure c = do <|> guard (c == '*') <|> (guard =<< notAfterString) cs <- many1 (char c) - ((B.str cs) <>) <$> whitespace - <|> do + (B.str cs <>) <$> whitespace + <|> case length cs of 3 -> three c 2 -> two c mempty @@ -1520,7 +1516,7 @@ three c = do (ender c 3 >> return ((B.strong . B.emph) contents)) <|> (ender c 2 >> one c (B.strong contents)) <|> (ender c 1 >> two c (B.emph contents)) - <|> return ((B.str [c,c,c]) <> contents) + <|> return (B.str [c,c,c] <> contents) -- Parse inlines til you hit two c's, and emit strong. -- If you never do hit two cs, emit ** plus inlines parsed. @@ -1528,7 +1524,7 @@ two :: Char -> Inlines -> MarkdownParser Inlines two c prefix' = do contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline) (ender c 2 >> return (B.strong (prefix' <> contents))) - <|> return ((B.str [c,c]) <> (prefix' <> contents)) + <|> return (B.str [c,c] <> (prefix' <> contents)) -- Parse inlines til you hit a c, and emit emph. -- If you never hit a c, emit * plus inlines parsed. @@ -1539,7 +1535,7 @@ one c prefix' = do notFollowedBy (ender c 1) >> two c mempty) ) (ender c 1 >> return (B.emph (prefix' <> contents))) - <|> return ((B.str [c]) <> (prefix' <> contents)) + <|> return (B.str [c] <> (prefix' <> contents)) strongOrEmph :: MarkdownParser Inlines strongOrEmph = enclosure '*' <|> enclosure '_' @@ -1593,8 +1589,8 @@ str = do xs -> choice (map (\x -> try (string x >> oneOf " \n" >> lookAhead alphaNum >> - return (B.str - $ result ++ spacesToNbr x ++ "\160"))) xs) + return (B.str $ + result ++ spacesToNbr x ++ "\160"))) xs) <|> (return $ B.str result) else return $ B.str result @@ -1626,7 +1622,7 @@ endline = try $ do (eof >> return mempty) <|> (guardEnabled Ext_hard_line_breaks >> return B.linebreak) <|> (guardEnabled Ext_ignore_line_breaks >> return mempty) - <|> (return B.space) + <|> return B.space -- -- links @@ -1822,7 +1818,7 @@ divHtml = try $ do let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] return $ B.divWith (ident, classes, keyvals) contents else -- avoid backtracing - return $ (B.rawBlock "html" (rawtag <> bls)) <> contents + return $ B.rawBlock "html" (rawtag <> bls) <> contents rawHtmlInline :: MarkdownParser Inlines rawHtmlInline = do @@ -1846,10 +1842,8 @@ rawHtmlInline = do cite :: MarkdownParser Inlines cite = do guardEnabled Ext_citations - citations <- textualCite - <|> do (cs, raw) <- withRaw normalCite - return $ (flip B.cite (B.text raw)) cs - return citations + textualCite <|> do (cs, raw) <- withRaw normalCite + return $ B.cite cs (B.text raw) textualCite :: MarkdownParser Inlines textualCite = try $ do @@ -1868,7 +1862,7 @@ textualCite = try $ do rest Nothing -> (do (cs, raw) <- withRaw $ bareloc first - return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw)) cs) + return $ B.cite cs (B.text $ '@':key ++ " " ++ raw)) <|> do st <- ask return $ case M.lookup key (stateExamples st) of Just n -> B.str (show n) @@ -1909,20 +1903,20 @@ prefix = trimInlines . mconcat <$> citeList :: MarkdownParser [Citation] citeList = sepBy1 citation (try $ char ';' >> spnl) -citation :: MarkdownParser (Citation) +citation :: MarkdownParser Citation citation = try $ do pref <- prefix (suppress_author, key) <- citeKey suff <- suffix - return $ Citation{ citationId = key - , citationPrefix = B.toList pref - , citationSuffix = B.toList suff - , citationMode = if suppress_author - then SuppressAuthor - else NormalCitation - , citationNoteNum = 0 - , citationHash = 0 - } + return Citation{ citationId = key + , citationPrefix = B.toList pref + , citationSuffix = B.toList suff + , citationMode = if suppress_author + then SuppressAuthor + else NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } smart :: MarkdownParser Inlines smart = do @@ -1944,6 +1938,6 @@ doubleQuoted :: MarkdownParser Inlines doubleQuoted = try $ do doubleQuoteStart contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) - (withQuoteContext InDoubleQuote $ doubleQuoteEnd >> return + (withQuoteContext InDoubleQuote doubleQuoteEnd >> return (B.doubleQuoted . trimInlines $ contents)) - <|> return ((B.str "\8220") <> contents) + <|> return (B.str "\8220" <> contents) -- cgit v1.2.3 From 9cd0bdb41a2c14e0f28e5ab179b0da73a0b8ba78 Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Mon, 2 Feb 2015 10:38:30 +0000 Subject: Factor out "returnState" into Parsing module --- src/Text/Pandoc/Parsing.hs | 5 +++++ src/Text/Pandoc/Readers/Markdown.hs | 9 +-------- src/Text/Pandoc/Readers/Org.hs | 8 +------- 3 files changed, 7 insertions(+), 15 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 3e9d559dc..aebdcae4c 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -66,6 +66,7 @@ module Text.Pandoc.Parsing ( anyLine, gridTableWith, readWith, returnWarnings, + returnState, readWithM, testStringWith, guardEnabled, @@ -873,6 +874,10 @@ returnWarnings p = do warnings <- stateWarnings <$> getState return (doc, warnings) +-- | Return the final internal state with the result of a parser +returnState :: (Stream s m c) => ParsecT s st m a -> ParsecT s st m (a, st) +returnState p = (,) <$> p <*> getState + -- | Parse a string with @parser@ (for testing). testStringWith :: (Show a, Stream [Char] Identity Char) => ParserT [Char] ParserState Identity a diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 043d7e94c..92cf9a22e 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -81,17 +81,10 @@ readMarkdownWithWarnings :: ReaderOptions -- ^ Reader options -> (Pandoc, [String]) readMarkdownWithWarnings opts s = runMarkdown opts s (returnWarnings parseMarkdown) - -retState :: MarkdownParser a -> MarkdownParser (a, ParserState) -retState p = do - r <- p - s <- getState - return (r, s) - runMarkdown :: ReaderOptions -> String -> MarkdownParser a -> a runMarkdown opts inp p = fst res where - imd = readWithM (retState p) def{ stateOptions = opts } (inp ++ "\n\n") + imd = readWithM (returnState p) def{ stateOptions = opts } (inp ++ "\n\n") res = runReader imd s s :: ParserState s = snd $ runReader imd s diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 5cb66bfa7..fd58956d0 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -67,17 +67,11 @@ readOrg opts s = runOrg opts s parseOrg runOrg :: ReaderOptions -> String -> OrgParser a -> a runOrg opts inp p = fst res where - imd = readWithM (retState p) def{ orgStateOptions = opts } (inp ++ "\n\n") + imd = readWithM (returnState p) def{ orgStateOptions = opts } (inp ++ "\n\n") res = runReader imd s s :: OrgParserState s = snd $ runReader imd s -retState :: OrgParser a -> OrgParser (a, OrgParserState) -retState p = do - r <- p - s <- getState - return (r, s) - type OrgParser a = ParserT [Char] OrgParserState (Reader OrgParserState) a parseOrg :: OrgParser Pandoc -- cgit v1.2.3 From bf8667660d027f2aac7256e25b904170302d440f Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Mon, 19 Jan 2015 12:00:29 +0000 Subject: Remove landmine from ImageSize --- src/Text/Pandoc/ImageSize.hs | 67 ++++++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 30 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index 68b34dcf3..963057b6f 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -1,4 +1,5 @@ {-# LANGUAGE OverloadedStrings, ScopedTypeVariables #-} +{-# OPTIONS_GHC -fno-warn-type-defaults #-} {- Copyright (C) 2011-2014 John MacFarlane @@ -40,6 +41,10 @@ import Data.Binary import Data.Binary.Get import Text.Pandoc.Shared (safeRead) import qualified Data.Map as M +import Text.Pandoc.Compat.Except +import Text.Pandoc.Error +import Control.Monad.Trans +import Data.Maybe (fromMaybe) -- quick and dirty functions to get image sizes -- algorithms borrowed from wwwis.pl @@ -64,7 +69,7 @@ imageType img = case B.take 4 img of "%!PS" | (B.take 4 $ B.drop 1 $ B.dropWhile (/=' ') img) == "EPSF" -> return Eps - _ -> fail "Unknown image type" + _ -> (hush . Left) "Unknown image type" imageSize :: ByteString -> Maybe ImageSize imageSize img = do @@ -114,7 +119,7 @@ pngSize img = do ([w1,w2,w3,w4,h1,h2,h3,h4] :: [Integer]) -> return ((shift w1 24) + (shift w2 16) + (shift w3 8) + w4, (shift h1 24) + (shift h2 16) + (shift h3 8) + h4) - _ -> fail "PNG parse error" + _ -> (hush . Left) "PNG parse error" let (dpix, dpiy) = findpHYs rest'' return $ ImageSize { pxX = x, pxY = y, dpiX = dpix, dpiY = dpiy } @@ -143,7 +148,7 @@ gifSize img = do dpiX = 72, dpiY = 72 } - _ -> fail "GIF parse error" + _ -> (hush . Left) "GIF parse error" jpegSize :: ByteString -> Maybe ImageSize jpegSize img = do @@ -174,36 +179,37 @@ findJfifSize bs = do Just (c,bs'') | c >= '\xc0' && c <= '\xc3' -> do case map fromIntegral $ unpack $ B.take 4 $ B.drop 3 bs'' of [h1,h2,w1,w2] -> return (shift w1 8 + w2, shift h1 8 + h2) - _ -> fail "JPEG parse error" + _ -> (hush . Left) "JPEG parse error" Just (_,bs'') -> do case map fromIntegral $ unpack $ B.take 2 bs'' of [c1,c2] -> do let len = shift c1 8 + c2 -- skip variables findJfifSize $ B.drop len bs'' - _ -> fail "JPEG parse error" - Nothing -> fail "Did not find length record" + _ -> (hush . Left) "JPEG parse error" + Nothing -> (hush . Left) "Did not find length record" exifSize :: ByteString -> Maybe ImageSize -exifSize bs = runGet (Just <$> exifHeader bl) bl +exifSize bs = hush . runGet header $ bl where bl = BL.fromChunks [bs] + header = runExceptT $ exifHeader bl -- NOTE: It would be nicer to do -- runGet ((Just <$> exifHeader) <|> return Nothing) -- which would prevent pandoc from raising an error when an exif header can't -- be parsed. But we only get an Alternative instance for Get in binary 0.6, -- and binary 0.5 ships with ghc 7.6. -exifHeader :: BL.ByteString -> Get ImageSize +exifHeader :: BL.ByteString -> ExceptT String Get ImageSize exifHeader hdr = do - _app1DataSize <- getWord16be - exifHdr <- getWord32be - unless (exifHdr == 0x45786966) $ fail "Did not find exif header" - zeros <- getWord16be - unless (zeros == 0) $ fail "Expected zeros after exif header" + _app1DataSize <- lift getWord16be + exifHdr <- lift getWord32be + unless (exifHdr == 0x45786966) $ throwError "Did not find exif header" + zeros <- lift getWord16be + unless (zeros == 0) $ throwError "Expected zeros after exif header" -- beginning of tiff header -- we read whole thing to use -- in getting data from offsets: let tiffHeader = BL.drop 8 hdr - byteAlign <- getWord16be + byteAlign <- lift getWord16be let bigEndian = byteAlign == 0x4d4d let (getWord16, getWord32, getWord64) = if bigEndian @@ -213,17 +219,17 @@ exifHeader hdr = do num <- getWord32 den <- getWord32 return $ fromIntegral num / fromIntegral den - tagmark <- getWord16 - unless (tagmark == 0x002a) $ fail "Failed alignment sanity check" - ifdOffset <- getWord32 - skip (fromIntegral ifdOffset - 8) -- skip to IDF - numentries <- getWord16 - let ifdEntry = do - tag <- getWord16 >>= \t -> - maybe (return UnknownTagType) return - (M.lookup t tagTypeTable) - dataFormat <- getWord16 - numComponents <- getWord32 + tagmark <- lift getWord16 + unless (tagmark == 0x002a) $ throwError "Failed alignment sanity check" + ifdOffset <- lift getWord32 + lift $ skip (fromIntegral ifdOffset - 8) -- skip to IDF + numentries <- lift getWord16 + let ifdEntry :: ExceptT String Get (TagType, DataFormat) + ifdEntry = do + tag <- fromMaybe UnknownTagType . flip M.lookup tagTypeTable + <$> lift getWord16 + dataFormat <- lift getWord16 + numComponents <- lift getWord32 (fmt, bytesPerComponent) <- case dataFormat of 1 -> return (UnsignedByte . runGet getWord8, 1) @@ -238,9 +244,10 @@ exifHeader hdr = do 10 -> return (SignedRational . runGet getRational, 8) 11 -> return (SingleFloat . runGet getWord32 {- TODO -}, 4) 12 -> return (DoubleFloat . runGet getWord64 {- TODO -}, 8) - _ -> fail $ "Unknown data format " ++ show dataFormat + _ -> throwError $ "Unknown data format " ++ show dataFormat let totalBytes = fromIntegral $ numComponents * bytesPerComponent - payload <- if totalBytes <= 4 -- data is right here + payload <- lift $ + if totalBytes <= 4 -- data is right here then fmt <$> (getLazyByteString (fromIntegral totalBytes) <* skip (4 - totalBytes)) @@ -252,9 +259,9 @@ exifHeader hdr = do entries <- sequence $ replicate (fromIntegral numentries) ifdEntry subentries <- case lookup ExifOffset entries of Just (UnsignedLong offset) -> do - pos <- bytesRead - skip (fromIntegral offset - (fromIntegral pos - 8)) - numsubentries <- getWord16 + pos <- lift bytesRead + lift $ skip (fromIntegral offset - (fromIntegral pos - 8)) + numsubentries <- lift getWord16 sequence $ replicate (fromIntegral numsubentries) ifdEntry _ -> return [] -- cgit v1.2.3 From ef981492fde284ceaedf0fd7e40416326c1f2d13 Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Mon, 19 Jan 2015 12:46:27 +0000 Subject: Remove partial function from Pretty --- src/Text/Pandoc/Pretty.hs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs index 2f2656086..9a97dfc21 100644 --- a/src/Text/Pandoc/Pretty.hs +++ b/src/Text/Pandoc/Pretty.hs @@ -250,6 +250,11 @@ renderDoc :: (IsString a, Monoid a) => Doc -> DocState a renderDoc = renderList . toList . unDoc +data IsBlock = IsBlock Int [String] + +-- This would be nicer with a pattern synonym +-- pattern VBlock i s <- mkIsBlock -> Just (IsBlock ..) + renderList :: (IsString a, Monoid a) => [D] -> DocState a renderList [] = return () @@ -323,11 +328,11 @@ renderList (BreakingSpace : xs) = do outp 1 " " renderList xs' -renderList (b1@Block{} : b2@Block{} : xs) = - renderList (mergeBlocks False b1 b2 : xs) +renderList (Block i1 s1 : Block i2 s2 : xs) = + renderList (mergeBlocks False (IsBlock i1 s1) (IsBlock i2 s2) : xs) -renderList (b1@Block{} : BreakingSpace : b2@Block{} : xs) = - renderList (mergeBlocks True b1 b2 : xs) +renderList (Block i1 s1 : BreakingSpace : Block i2 s2 : xs) = + renderList (mergeBlocks True (IsBlock i1 s1) (IsBlock i2 s2) : xs) renderList (Block width lns : xs) = do st <- get @@ -339,15 +344,14 @@ renderList (Block width lns : xs) = do modify $ \s -> s{ prefix = oldPref } renderList xs -mergeBlocks :: Bool -> D -> D -> D -mergeBlocks addSpace (Block w1 lns1) (Block w2 lns2) = +mergeBlocks :: Bool -> IsBlock -> IsBlock -> D +mergeBlocks addSpace (IsBlock w1 lns1) (IsBlock w2 lns2) = Block (w1 + w2 + if addSpace then 1 else 0) $ zipWith (\l1 l2 -> pad w1 l1 ++ l2) (lns1 ++ empties) (map sp lns2 ++ empties) where empties = replicate (abs $ length lns1 - length lns2) "" pad n s = s ++ replicate (n - realLength s) ' ' sp "" = "" sp xs = if addSpace then (' ' : xs) else xs -mergeBlocks _ _ _ = error "mergeBlocks tried on non-Block!" blockToDoc :: Int -> [String] -> Doc blockToDoc _ lns = text $ intercalate "\n" lns -- cgit v1.2.3 From 8381ac3b02e2dd818bc44dc31707efe222ec40c9 Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Wed, 18 Feb 2015 12:55:04 +0000 Subject: Add Text.Pandoc.Error module with PandocError type --- pandoc.cabal | 1 + src/Text/Pandoc/Error.hs | 39 +++++++++++++++++++++++++++++++++++++++ src/Text/Pandoc/Parsing.hs | 19 ++++++------------- 3 files changed, 46 insertions(+), 13 deletions(-) create mode 100644 src/Text/Pandoc/Error.hs (limited to 'src/Text/Pandoc') diff --git a/pandoc.cabal b/pandoc.cabal index 16106f896..dd31927c7 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -289,6 +289,7 @@ Library Text.Pandoc.Pretty, Text.Pandoc.Shared, Text.Pandoc.MediaBag, + Text.Pandoc.Error, Text.Pandoc.Readers.HTML, Text.Pandoc.Readers.LaTeX, Text.Pandoc.Readers.Markdown, diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs new file mode 100644 index 000000000..d4172f7ca --- /dev/null +++ b/src/Text/Pandoc/Error.hs @@ -0,0 +1,39 @@ +module Text.Pandoc.Error (PandocError(..), handleError,hush, mapLeft) where + +import Text.Parsec.Error +import Text.Parsec.Pos hiding (Line) +import Text.Pandoc.Compat.Except + +type Input = String + +data PandocError = ParseFailure String + | ParsecError Input ParseError + deriving (Show) + + +instance Error PandocError where + strMsg = ParseFailure + + +mapLeft :: (a -> b) -> Either a c -> Either b c +mapLeft f (Left x) = Left (f x) +mapLeft _ (Right x) = Right x + +hush :: Either a b -> Maybe b +hush (Left _) = Nothing +hush (Right x) = Just x + +handleError :: Either PandocError a -> a +handleError (Right r) = r +handleError (Left err) = + case err of + ParseFailure string -> error string + ParsecError input err' -> + let errPos = errorPos err' + errLine = sourceLine errPos + errColumn = sourceColumn errPos + theline = (lines input ++ [""]) !! (errLine - 1) + in error $ "\nError at " ++ show err' ++ "\n" ++ + theline ++ "\n" ++ replicate (errColumn - 1) ' ' ++ + "^" + diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index aebdcae4c..c18aa331f 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -190,6 +190,8 @@ import Control.Applicative ((<$>), (<*>), (*>), (<*), (<$)) import Data.Monoid import Data.Maybe (catMaybes) +import Text.Pandoc.Error + type Parser t s = Parsec t s type ParserT = ParsecT @@ -845,25 +847,16 @@ readWithM :: (Monad m, Functor m) => ParserT [Char] st m a -- ^ parser -> st -- ^ initial state -> String -- ^ input - -> m a + -> m (Either PandocError a) readWithM parser state input = - handleError <$> (runParserT parser state "source" input) - where - handleError (Left err') = - let errPos = errorPos err' - errLine = sourceLine errPos - errColumn = sourceColumn errPos - theline = (lines input ++ [""]) !! (errLine - 1) - in error $ "\nError at " ++ show err' ++ "\n" ++ - theline ++ "\n" ++ replicate (errColumn - 1) ' ' ++ - "^" - handleError (Right result) = result + mapLeft (ParsecError input) <$> runParserT parser state "source" input + -- | Parse a string with a given parser and state readWith :: Parser [Char] st a -> st -> String - -> a + -> Either PandocError a readWith p t inp = runIdentity $ readWithM p t inp returnWarnings :: (Stream s m c) -- cgit v1.2.3 From dcb4951aadfba86dbf2b7a71978f8a2af0994a91 Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Wed, 18 Feb 2015 13:02:26 +0000 Subject: Change return type of DocBook reader --- src/Text/Pandoc/Readers/DocBook.hs | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 663960a87..f82158ab4 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -15,6 +15,9 @@ import Control.Applicative ((<$>)) import Data.List (intersperse) import Data.Maybe (fromMaybe) import Text.TeXMath (readMathML, writeTeX) +import Text.Pandoc.Error (PandocError) +import Text.Pandoc.Compat.Except +import Data.Default {- @@ -497,7 +500,7 @@ List of all DocBook tags, with [x] indicating implemented, [x] ?asciidoc-br? - line break from asciidoc docbook output -} -type DB = State DBState +type DB = ExceptT PandocError (State DBState) data DBState = DBState{ dbSectionLevel :: Int , dbQuoteType :: QuoteType @@ -507,16 +510,18 @@ data DBState = DBState{ dbSectionLevel :: Int , dbFigureTitle :: Inlines } deriving Show -readDocBook :: ReaderOptions -> String -> Pandoc -readDocBook _ inp = Pandoc (dbMeta st') (toList $ mconcat bs) - where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp') - DBState{ dbSectionLevel = 0 - , dbQuoteType = DoubleQuote - , dbMeta = mempty - , dbAcceptsMeta = False - , dbBook = False - , dbFigureTitle = mempty - } +instance Default DBState where + def = DBState{ dbSectionLevel = 0 + , dbQuoteType = DoubleQuote + , dbMeta = mempty + , dbAcceptsMeta = False + , dbBook = False + , dbFigureTitle = mempty } + + +readDocBook :: ReaderOptions -> String -> Either PandocError Pandoc +readDocBook _ inp = (\blocks -> Pandoc (dbMeta st') (toList . mconcat $ blocks)) <$> bs + where (bs , st') = flip runState def . runExceptT . mapM parseBlock . normalizeTree . parseXML $ inp' inp' = handleInstructions inp -- We treat specially (issue #1236), converting it -- cgit v1.2.3 From 1b12340859e6c4be286bc14036f34f653274632a Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Wed, 18 Feb 2015 13:02:44 +0000 Subject: Change return type of Docx reader --- src/Text/Pandoc/Readers/Docx.hs | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index d4680cb7e..900d0a1bf 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -96,14 +96,17 @@ import Control.Applicative ((<$>)) import Data.Sequence (ViewL(..), viewl) import qualified Data.Sequence as Seq (null) +import Text.Pandoc.Error +import Text.Pandoc.Compat.Except + readDocx :: ReaderOptions -> B.ByteString - -> (Pandoc, MediaBag) + -> Either PandocError (Pandoc, MediaBag) readDocx opts bytes = case archiveToDocx (toArchive bytes) of - Right docx -> (Pandoc meta blks, mediaBag) where - (meta, blks, mediaBag) = (docxToOutput opts docx) - Left _ -> error $ "couldn't parse docx file" + Right docx -> (\(meta, blks, mediaBag) -> (Pandoc meta blks, mediaBag)) + <$> (docxToOutput opts docx) + Left _ -> Left (ParseFailure "couldn't parse docx file") data DState = DState { docxAnchorMap :: M.Map String String , docxMediaBag :: MediaBag @@ -122,10 +125,10 @@ data DEnv = DEnv { docxOptions :: ReaderOptions instance Default DEnv where def = DEnv def False -type DocxContext = ReaderT DEnv (State DState) +type DocxContext = ExceptT PandocError (ReaderT DEnv (State DState)) -evalDocxContext :: DocxContext a -> DEnv -> DState -> a -evalDocxContext ctx env st = evalState (runReaderT ctx env) st +evalDocxContext :: DocxContext a -> DEnv -> DState -> Either PandocError a +evalDocxContext ctx env st = flip evalState st . flip runReaderT env . runExceptT $ ctx -- This is empty, but we put it in for future-proofing. spansToKeep :: [String] @@ -545,7 +548,7 @@ bodyToOutput (Body bps) = do blks', mediaBag) -docxToOutput :: ReaderOptions -> Docx -> (Meta, [Block], MediaBag) +docxToOutput :: ReaderOptions -> Docx -> Either PandocError (Meta, [Block], MediaBag) docxToOutput opts (Docx (Document _ body)) = let dEnv = def { docxOptions = opts} in evalDocxContext (bodyToOutput body) dEnv def -- cgit v1.2.3 From b935ef6de58c1b3365012b7fa8ec5c0a0b30d13b Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Wed, 18 Feb 2015 13:02:59 +0000 Subject: Change return type of EPUB reader --- src/Text/Pandoc/Readers/EPUB.hs | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index b061d8683..338540533 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -35,18 +35,20 @@ import Control.DeepSeq.Generics (deepseq, NFData) import Debug.Trace (trace) +import Text.Pandoc.Error + type Items = M.Map String (FilePath, MimeType) -readEPUB :: ReaderOptions -> BL.ByteString -> (Pandoc, MediaBag) +readEPUB :: ReaderOptions -> BL.ByteString -> Either PandocError (Pandoc, MediaBag) readEPUB opts bytes = runEPUB (archiveToEPUB opts $ toArchive bytes) -runEPUB :: Except String a -> a -runEPUB = either error id . runExcept +runEPUB :: Except PandocError a -> Either PandocError a +runEPUB = runExcept -- Note that internal reference are aggresively normalised so that all ids -- are of the form "filename#id" -- -archiveToEPUB :: (MonadError String m) => ReaderOptions -> Archive -> m (Pandoc, MediaBag) +archiveToEPUB :: (MonadError PandocError m) => ReaderOptions -> Archive -> m (Pandoc, MediaBag) archiveToEPUB os archive = do -- root is path to folder with manifest file in (root, content) <- getManifest archive @@ -64,19 +66,20 @@ archiveToEPUB os archive = do return $ (ast, mediaBag) where os' = os {readerParseRaw = True} - parseSpineElem :: MonadError String m => FilePath -> (FilePath, MimeType) -> m Pandoc + parseSpineElem :: MonadError PandocError m => FilePath -> (FilePath, MimeType) -> m Pandoc parseSpineElem (normalise -> r) (normalise -> path, mime) = do when (readerTrace os) (traceM path) doc <- mimeToReader mime r path let docSpan = B.doc $ B.para $ B.spanWith (takeFileName path, [], []) mempty return $ docSpan <> doc - mimeToReader :: MonadError String m => MimeType -> FilePath -> FilePath -> m Pandoc + mimeToReader :: MonadError PandocError m => MimeType -> FilePath -> FilePath -> m Pandoc mimeToReader "application/xhtml+xml" (normalise -> root) (normalise -> path) = do fname <- findEntryByPathE (root path) archive - return $ fixInternalReferences path . + html <- either throwError return . readHtml os' . UTF8.toStringLazy $ fromEntry fname + return $ fixInternalReferences path html mimeToReader s _ path | s `elem` imageMimes = return $ imageToPandoc path | otherwise = return $ mempty @@ -114,7 +117,7 @@ imageMimes = ["image/gif", "image/jpeg", "image/png"] type CoverImage = FilePath -parseManifest :: (MonadError String m) => Element -> m (Maybe CoverImage, Items) +parseManifest :: (MonadError PandocError m) => Element -> m (Maybe CoverImage, Items) parseManifest content = do manifest <- findElementE (dfName "manifest") content let items = findChildren (dfName "item") manifest @@ -130,7 +133,7 @@ parseManifest content = do mime <- findAttrE (emptyName "media-type") e return (uid, (href, mime)) -parseSpine :: MonadError String m => Items -> Element -> m [(FilePath, MimeType)] +parseSpine :: MonadError PandocError m => Items -> Element -> m [(FilePath, MimeType)] parseSpine is e = do spine <- findElementE (dfName "spine") e let itemRefs = findChildren (dfName "itemref") spine @@ -141,7 +144,7 @@ parseSpine is e = do guard linear findAttr (emptyName "idref") ref -parseMeta :: MonadError String m => Element -> m Meta +parseMeta :: MonadError PandocError m => Element -> m Meta parseMeta content = do meta <- findElementE (dfName "metadata") content let dcspace (QName _ (Just "http://purl.org/dc/elements/1.1/") (Just "dc")) = True @@ -159,7 +162,7 @@ renameMeta :: String -> String renameMeta "creator" = "author" renameMeta s = s -getManifest :: MonadError String m => Archive -> m (String, Element) +getManifest :: MonadError PandocError m => Archive -> m (String, Element) getManifest archive = do metaEntry <- findEntryByPathE ("META-INF" "container.xml") archive docElem <- (parseXMLDocE . UTF8.toStringLazy . fromEntry) metaEntry @@ -266,18 +269,18 @@ emptyName s = QName s Nothing Nothing -- Convert Maybe interface to Either -findAttrE :: MonadError String m => QName -> Element -> m String +findAttrE :: MonadError PandocError m => QName -> Element -> m String findAttrE q e = mkE "findAttr" $ findAttr q e -findEntryByPathE :: MonadError String m => FilePath -> Archive -> m Entry +findEntryByPathE :: MonadError PandocError m => FilePath -> Archive -> m Entry findEntryByPathE (normalise -> path) a = mkE ("No entry on path: " ++ path) $ findEntryByPath path a -parseXMLDocE :: MonadError String m => String -> m Element +parseXMLDocE :: MonadError PandocError m => String -> m Element parseXMLDocE doc = mkE "Unable to parse XML doc" $ parseXMLDoc doc -findElementE :: MonadError String m => QName -> Element -> m Element +findElementE :: MonadError PandocError m => QName -> Element -> m Element findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x -mkE :: MonadError String m => String -> Maybe a -> m a -mkE s = maybe (throwError s) return +mkE :: MonadError PandocError m => String -> Maybe a -> m a +mkE s = maybe (throwError . ParseFailure $ s) return -- cgit v1.2.3 From b9e04825cf993cebc7f48d00a9faf057c3443578 Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Wed, 18 Feb 2015 13:03:12 +0000 Subject: Change return type of HTML reader --- src/Text/Pandoc/Readers/HTML.hs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 02ff07e73..b6338aeff 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -1,4 +1,5 @@ -{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses #-} +{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses, +ViewPatterns#-} {- Copyright (C) 2006-2014 John MacFarlane @@ -62,15 +63,18 @@ import Text.TeXMath (readMathML, writeTeX) import Data.Default (Default (..), def) import Control.Monad.Reader (Reader,ask, asks, local, runReader) +import Text.Pandoc.Error + +import Text.Parsec.Error + -- | Convert HTML-formatted string to 'Pandoc' document. readHtml :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assumes @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readHtml opts inp = - case flip runReader def $ runParserT parseDoc (HTMLState def{ stateOptions = opts } []) "source" tags of - Left err' -> error $ "\nError at " ++ show err' - Right result -> result + mapLeft (ParseFailure . getError) . flip runReader def $ + runParserT parseDoc (HTMLState def{ stateOptions = opts } []) "source" tags where tags = stripPrefixes . canonicalizeTags $ parseTagsOptions parseOptions{ optTagPosition = True } inp parseDoc = do @@ -78,6 +82,9 @@ readHtml opts inp = meta <- stateMeta . parserState <$> getState bs' <- replaceNotes (B.toList blocks) return $ Pandoc meta bs' + getError (errorMessages -> ms) = case ms of + [] -> "" + (m:_) -> messageString m replaceNotes :: [Block] -> TagParser [Block] replaceNotes = walkM replaceNotes' -- cgit v1.2.3 From 9f6a92d664aa89eac4641e80cf2b9b10b6360e7f Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Wed, 18 Feb 2015 13:03:28 +0000 Subject: Change return type of Haddock reader --- src/Text/Pandoc/Readers/Haddock.hs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index c03382c17..aa2534afc 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -26,15 +26,17 @@ import Documentation.Haddock.Parser import Documentation.Haddock.Types import Debug.Trace (trace) +import Text.Pandoc.Error + -- | Parse Haddock markup and return a 'Pandoc' document. readHaddock :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse - -> Pandoc + -> Either PandocError Pandoc readHaddock opts = #if MIN_VERSION_haddock_library(1,2,0) - B.doc . docHToBlocks . trace' . _doc . parseParas + Right . B.doc . docHToBlocks . trace' . _doc . parseParas #else - B.doc . docHToBlocks . trace' . parseParas + Right . B.doc . docHToBlocks . trace' . parseParas #endif where trace' x = if readerTrace opts then trace (show x) x -- cgit v1.2.3 From b8acb9f541a460504ad4356beed82cbaf3374a27 Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Wed, 18 Feb 2015 13:03:45 +0000 Subject: Change return type of LaTeX reader --- src/Text/Pandoc/Readers/LaTeX.hs | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 942b9f3b3..633388db8 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -57,11 +57,12 @@ import qualified Data.Map as M import qualified Control.Exception as E import System.FilePath (takeExtension, addExtension) import Text.Pandoc.Highlighting (fromListingsLanguage) +import Text.Pandoc.Error -- | Parse LaTeX from string and return 'Pandoc' document. readLaTeX :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assumes @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readLaTeX opts = readWith parseLaTeX def{ stateOptions = opts } parseLaTeX :: LP Pandoc @@ -851,12 +852,8 @@ rawEnv name = do type IncludeParser = ParserT [Char] [String] IO String -- | Replace "include" commands with file contents. -handleIncludes :: String -> IO String -handleIncludes s = do - res <- runParserT includeParser' [] "input" s - case res of - Right s' -> return s' - Left e -> error $ show e +handleIncludes :: String -> IO (Either PandocError String) +handleIncludes s = mapLeft (ParsecError s) <$> runParserT includeParser' [] "input" s includeParser' :: IncludeParser includeParser' = -- cgit v1.2.3 From acefbe99a95cd5bd8434237fdb0a81c1b63ed06d Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Wed, 18 Feb 2015 13:04:08 +0000 Subject: Change return type of Markdown reader --- src/Text/Pandoc/Readers/Markdown.hs | 58 +++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 25 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 92cf9a22e..b43dda3a1 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1,4 +1,5 @@ {-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7 +{-# LANGUAGE ScopedTypeVariables #-} {- Copyright (C) 2006-2014 John MacFarlane @@ -64,13 +65,14 @@ import Text.HTML.TagSoup.Match (tagOpen) import qualified Data.Set as Set import Text.Printf (printf) import Debug.Trace (trace) +import Text.Pandoc.Error type MarkdownParser a = ParserT [Char] ParserState (Reader ParserState) a -- | Read markdown from an input string and return a Pandoc document. readMarkdown :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readMarkdown opts s = runMarkdown opts s parseMarkdown @@ -78,16 +80,17 @@ readMarkdown opts s = -- and a list of warnings. readMarkdownWithWarnings :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> (Pandoc, [String]) + -> Either PandocError (Pandoc, [String]) readMarkdownWithWarnings opts s = runMarkdown opts s (returnWarnings parseMarkdown) -runMarkdown :: ReaderOptions -> String -> MarkdownParser a -> a -runMarkdown opts inp p = fst res +runMarkdown :: forall a . ReaderOptions -> String -> MarkdownParser a -> Either PandocError a +runMarkdown opts inp p = fst <$> res where imd = readWithM (returnState p) def{ stateOptions = opts } (inp ++ "\n\n") + res :: Either PandocError (a, ParserState) res = runReader imd s s :: ParserState - s = snd $ runReader imd s + s = either def snd res -- -- Constants and data structure definitions @@ -246,8 +249,9 @@ yamlMetaBlock = try $ do H.foldrWithKey (\k v m -> if ignorable k then m - else B.setMeta (T.unpack k) - (yamlToMeta opts v) m) + else case yamlToMeta opts v of + Left _ -> m + Right v' -> B.setMeta (T.unpack k) v' m) nullMeta hashmap Right Yaml.Null -> return nullMeta Right _ -> do @@ -279,33 +283,37 @@ yamlMetaBlock = try $ do ignorable :: Text -> Bool ignorable t = T.pack "_" `T.isSuffixOf` t -toMetaValue :: ReaderOptions -> Text -> MetaValue -toMetaValue opts x = - case readMarkdown opts (T.unpack x) of - Pandoc _ [Plain xs] -> MetaInlines xs - Pandoc _ [Para xs] +toMetaValue :: ReaderOptions -> Text -> Either PandocError MetaValue +toMetaValue opts x = toMeta <$> readMarkdown opts (T.unpack x) + where + toMeta p = + case p of + Pandoc _ [Plain xs] -> MetaInlines xs + Pandoc _ [Para xs] | endsWithNewline x -> MetaBlocks [Para xs] | otherwise -> MetaInlines xs - Pandoc _ bs -> MetaBlocks bs - where endsWithNewline t = T.pack "\n" `T.isSuffixOf` t + Pandoc _ bs -> MetaBlocks bs + endsWithNewline t = T.pack "\n" `T.isSuffixOf` t -yamlToMeta :: ReaderOptions -> Yaml.Value -> MetaValue +yamlToMeta :: ReaderOptions -> Yaml.Value -> Either PandocError MetaValue yamlToMeta opts (Yaml.String t) = toMetaValue opts t yamlToMeta _ (Yaml.Number n) -- avoid decimal points for numbers that don't need them: - | base10Exponent n >= 0 = MetaString $ show + | base10Exponent n >= 0 = return $ MetaString $ show $ coefficient n * (10 ^ base10Exponent n) - | otherwise = MetaString $ show n -yamlToMeta _ (Yaml.Bool b) = MetaBool b -yamlToMeta opts (Yaml.Array xs) = B.toMetaValue $ map (yamlToMeta opts) - $ V.toList xs -yamlToMeta opts (Yaml.Object o) = MetaMap $ H.foldrWithKey (\k v m -> + | otherwise = return $ MetaString $ show n +yamlToMeta _ (Yaml.Bool b) = return $ MetaBool b +yamlToMeta opts (Yaml.Array xs) = B.toMetaValue <$> mapM (yamlToMeta opts) + (V.toList xs) +yamlToMeta opts (Yaml.Object o) = MetaMap <$> H.foldrWithKey (\k v m -> if ignorable k then m - else M.insert (T.unpack k) - (yamlToMeta opts v) m) - M.empty o -yamlToMeta _ _ = MetaString "" + else (do + v' <- yamlToMeta opts v + m' <- m + return (M.insert (T.unpack k) v' m'))) + (return M.empty) o +yamlToMeta _ _ = return $ MetaString "" stopLine :: MarkdownParser () stopLine = try $ (string "---" <|> string "...") >> blankline >> return () -- cgit v1.2.3 From 41d9527533e21716e1268da3e4e37a0c75b7ff34 Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Wed, 18 Feb 2015 13:04:24 +0000 Subject: Change return type of Mediawiki reader --- src/Text/Pandoc/Readers/MediaWiki.hs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index e43b8a86c..0bb6dd436 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -58,21 +58,21 @@ import Data.Maybe (fromMaybe) import Text.Printf (printf) import Debug.Trace (trace) +import Text.Pandoc.Error + -- | Read mediawiki from an input string and return a Pandoc document. readMediaWiki :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readMediaWiki opts s = - case runParser parseMediaWiki MWState{ mwOptions = opts + readWith parseMediaWiki MWState{ mwOptions = opts , mwMaxNestingLevel = 4 , mwNextLinkNumber = 1 , mwCategoryLinks = [] , mwHeaderMap = M.empty , mwIdentifierList = [] } - "source" (s ++ "\n") of - Left err' -> error $ "\nError:\n" ++ show err' - Right result -> result + (s ++ "\n") data MWState = MWState { mwOptions :: ReaderOptions , mwMaxNestingLevel :: Int -- cgit v1.2.3 From ef2a8107e2cfca9cf0b8be63f07d86197b53bc74 Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Wed, 18 Feb 2015 13:04:37 +0000 Subject: Change return type of Native reader --- src/Text/Pandoc/Readers/Native.hs | 40 ++++++++++++++------------------------- 1 file changed, 14 insertions(+), 26 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs index f4dfa62c1..fc6b3362a 100644 --- a/src/Text/Pandoc/Readers/Native.hs +++ b/src/Text/Pandoc/Readers/Native.hs @@ -3,7 +3,7 @@ Copyright (C) 2011-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or +the Free Software Foundation; Either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, @@ -33,6 +33,9 @@ module Text.Pandoc.Readers.Native ( readNative ) where import Text.Pandoc.Definition import Text.Pandoc.Shared (safeRead) +import Text.Pandoc.Error +import Control.Applicative + -- | Read native formatted text and return a Pandoc document. -- The input may be a full pandoc document, a block list, a block, -- an inline list, or an inline. Thus, for example, @@ -44,33 +47,18 @@ import Text.Pandoc.Shared (safeRead) -- > Pandoc nullMeta [Plain [Str "hi"]] -- readNative :: String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc -readNative s = - case safeRead s of - Just d -> d - Nothing -> Pandoc nullMeta $ readBlocks s + -> Either PandocError Pandoc +readNative s = maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) -readBlocks :: String -> [Block] -readBlocks s = - case safeRead s of - Just d -> d - Nothing -> [readBlock s] +readBlocks :: String -> Either PandocError [Block] +readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s) -readBlock :: String -> Block -readBlock s = - case safeRead s of - Just d -> d - Nothing -> Plain $ readInlines s +readBlock :: String -> Either PandocError Block +readBlock s = maybe (Plain <$> readInlines s) Right (safeRead s) -readInlines :: String -> [Inline] -readInlines s = - case safeRead s of - Just d -> d - Nothing -> [readInline s] +readInlines :: String -> Either PandocError [Inline] +readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead s) -readInline :: String -> Inline -readInline s = - case safeRead s of - Just d -> d - Nothing -> error "Cannot parse document" +readInline :: String -> Either PandocError Inline +readInline s = maybe (Left . ParseFailure $ "Could not read: " ++ s) Right (safeRead s) -- cgit v1.2.3 From db6baab2171cd1866e3f4e46ecfedfe51a26ec06 Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Wed, 18 Feb 2015 13:05:05 +0000 Subject: Change return type of OPML reader --- src/Text/Pandoc/Readers/OPML.hs | 48 ++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 20 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index 35d01e877..19ddba36b 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE FlexibleContexts #-} module Text.Pandoc.Readers.OPML ( readOPML ) where import Data.Char (toUpper) import Text.Pandoc.Options @@ -11,8 +12,11 @@ import Data.Generics import Data.Monoid import Control.Monad.State import Control.Applicative ((<$>), (<$)) +import Data.Default +import Text.Pandoc.Compat.Except +import Text.Pandoc.Error -type OPML = State OPMLState +type OPML = ExceptT PandocError (State OPMLState) data OPMLState = OPMLState{ opmlSectionLevel :: Int @@ -21,17 +25,19 @@ data OPMLState = OPMLState{ , opmlDocDate :: Inlines } deriving Show -readOPML :: ReaderOptions -> String -> Pandoc +instance Default OPMLState where + def = OPMLState{ opmlSectionLevel = 0 + , opmlDocTitle = mempty + , opmlDocAuthors = [] + , opmlDocDate = mempty + } + +readOPML :: ReaderOptions -> String -> Either PandocError Pandoc readOPML _ inp = setTitle (opmlDocTitle st') - $ setAuthors (opmlDocAuthors st') - $ setDate (opmlDocDate st') - $ doc $ mconcat bs - where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp) - OPMLState{ opmlSectionLevel = 0 - , opmlDocTitle = mempty - , opmlDocAuthors = [] - , opmlDocDate = mempty - } + . setAuthors (opmlDocAuthors st') + . setDate (opmlDocDate st') + . doc . mconcat <$> bs + where (bs, st') = flip runState def . runExceptT $ (mapM parseBlock $ normalizeTree $ parseXML inp) -- normalize input, consolidating adjacent Text and CRef elements normalizeTree :: [Content] -> [Content] @@ -58,14 +64,16 @@ attrValue attr elt = Just z -> z Nothing -> "" -asHtml :: String -> Inlines -asHtml s = case readHtml def s of - Pandoc _ [Plain ils] -> fromList ils - _ -> mempty +exceptT :: Either PandocError a -> OPML a +exceptT = either throwError return + +asHtml :: String -> OPML Inlines +asHtml s = (\(Pandoc _ bs) -> case bs of + [Plain ils] -> fromList ils + _ -> mempty) <$> exceptT (readHtml def s) -asMarkdown :: String -> Blocks -asMarkdown s = fromList bs - where Pandoc _ bs = readMarkdown def s +asMarkdown :: String -> OPML Blocks +asMarkdown s = (\(Pandoc _ bs) -> fromList bs) <$> exceptT (readMarkdown def s) getBlocks :: Element -> OPML Blocks getBlocks e = mconcat <$> (mapM parseBlock $ elContent e) @@ -82,8 +90,8 @@ parseBlock (Elem e) = "outline" -> gets opmlSectionLevel >>= sect . (+1) "?xml" -> return mempty _ -> getBlocks e - where sect n = do let headerText = asHtml $ attrValue "text" e - let noteBlocks = asMarkdown $ attrValue "_note" e + where sect n = do headerText <- asHtml $ attrValue "text" e + noteBlocks <- asMarkdown $ attrValue "_note" e modify $ \st -> st{ opmlSectionLevel = n } bs <- getBlocks e modify $ \st -> st{ opmlSectionLevel = n - 1 } -- cgit v1.2.3 From 4c910493458052e9a0a91a5ea948ab1b7457c9be Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Wed, 18 Feb 2015 13:05:15 +0000 Subject: Change return type of Org reader --- src/Text/Pandoc/Readers/Org.hs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index fd58956d0..9d2c355ee 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -58,19 +58,21 @@ import Data.Maybe (fromMaybe, isJust) import Data.Monoid (mconcat, mempty, mappend) import Network.HTTP (urlEncode) +import Text.Pandoc.Error + -- | Parse org-mode string and return a Pandoc document. readOrg :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readOrg opts s = runOrg opts s parseOrg -runOrg :: ReaderOptions -> String -> OrgParser a -> a -runOrg opts inp p = fst res +runOrg :: ReaderOptions -> String -> OrgParser a -> Either PandocError a +runOrg opts inp p = fst <$> res where imd = readWithM (returnState p) def{ orgStateOptions = opts } (inp ++ "\n\n") res = runReader imd s s :: OrgParserState - s = snd $ runReader imd s + s = either def snd res type OrgParser a = ParserT [Char] OrgParserState (Reader OrgParserState) a -- cgit v1.2.3 From 2b7073860bf943d9fd41a5d29cd9821c09c4b643 Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Wed, 18 Feb 2015 13:05:29 +0000 Subject: Change return type of RST reader --- src/Text/Pandoc/Readers/RST.hs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 4ae9d52ae..a8112bc81 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -51,13 +51,15 @@ import Data.Monoid (mconcat, mempty) import Data.Sequence (viewr, ViewR(..)) import Data.Char (toLower, isHexDigit, isSpace) +import Text.Pandoc.Error + -- | Parse reStructuredText string and return Pandoc document. readRST :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readRST opts s = (readWith parseRST) def{ stateOptions = opts } (s ++ "\n\n") -readRSTWithWarnings :: ReaderOptions -> String -> (Pandoc, [String]) +readRSTWithWarnings :: ReaderOptions -> String -> Either PandocError (Pandoc, [String]) readRSTWithWarnings opts s = (readWith (returnWarnings parseRST)) def{ stateOptions = opts } (s ++ "\n\n") type RSTParser = Parser [Char] ParserState -- cgit v1.2.3 From f61db382905883b7c500b7b3542d95c3b7d0502c Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Wed, 18 Feb 2015 13:05:49 +0000 Subject: Change return type of TWiki reader --- src/Text/Pandoc/Readers/TWiki.hs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs index 9f5738478..07b414431 100644 --- a/src/Text/Pandoc/Readers/TWiki.hs +++ b/src/Text/Pandoc/Readers/TWiki.hs @@ -48,17 +48,18 @@ import Data.Maybe (fromMaybe) import Text.HTML.TagSoup import Data.Char (isAlphaNum) import qualified Data.Foldable as F +import Text.Pandoc.Error -- | Read twiki from an input string and return a Pandoc document. readTWiki :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readTWiki opts s = (readWith parseTWiki) def{ stateOptions = opts } (s ++ "\n\n") readTWikiWithWarnings :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> (Pandoc, [String]) + -> Either PandocError (Pandoc, [String]) readTWikiWithWarnings opts s = (readWith parseTWikiWithWarnings) def{ stateOptions = opts } (s ++ "\n\n") where parseTWikiWithWarnings = do -- cgit v1.2.3 From 6de6eae737e0efd7bad0c742d078e1071692de45 Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Wed, 18 Feb 2015 13:06:09 +0000 Subject: Change return type of Textile reader --- src/Text/Pandoc/Readers/Textile.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 63ab80eb9..4565b26a1 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -68,11 +68,12 @@ import Text.Printf import Control.Applicative ((<$>), (*>), (<*), (<$)) import Data.Monoid import Debug.Trace (trace) +import Text.Pandoc.Error -- | Parse a Textile text and return a Pandoc document. readTextile :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readTextile opts s = (readWith parseTextile) def{ stateOptions = opts } (s ++ "\n\n") -- cgit v1.2.3 From b02b5b9d5e64f5de959e156179789e66c3629396 Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Wed, 18 Feb 2015 13:06:22 +0000 Subject: Change return type of Txt2Tags reader --- src/Text/Pandoc/Readers/Txt2Tags.hs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index 834d18c5c..304d6d4c5 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -48,6 +48,7 @@ import Data.Monoid (Monoid, mconcat, mempty, mappend) import Control.Monad (void, guard, when) import Data.Default import Control.Monad.Reader (Reader, runReader, asks) +import Text.Pandoc.Error import Data.Time.LocalTime (getZonedTime) import Text.Pandoc.Compat.Directory(getModificationTime) @@ -83,12 +84,12 @@ getT2TMeta inps out = do return $ T2TMeta curDate curMtime (intercalate ", " inps) out -- | Read Txt2Tags from an input string returning a Pandoc document -readTxt2Tags :: T2TMeta -> ReaderOptions -> String -> Pandoc +readTxt2Tags :: T2TMeta -> ReaderOptions -> String -> Either PandocError Pandoc readTxt2Tags t opts s = flip runReader t $ readWithM parseT2T (def {stateOptions = opts}) (s ++ "\n\n") -- | Read Txt2Tags (ignoring all macros) from an input string returning -- a Pandoc document -readTxt2TagsNoMacros :: ReaderOptions -> String -> Pandoc +readTxt2TagsNoMacros :: ReaderOptions -> String -> Either PandocError Pandoc readTxt2TagsNoMacros = readTxt2Tags def parseT2T :: T2T Pandoc -- cgit v1.2.3 From 615aa94c3d44fa8a6f483247d30519ff4b258e91 Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Wed, 18 Feb 2015 18:40:36 +0000 Subject: Make safeRead safe. Fixes #1801 --- src/Text/Pandoc/Shared.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index bc960fd38..90d0941c1 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -113,7 +113,7 @@ import System.FilePath ( (), takeExtension, dropExtension) import Data.Generics (Typeable, Data) import qualified Control.Monad.State as S import qualified Control.Exception as E -import Control.Monad (msum, unless) +import Control.Monad (msum, unless, MonadPlus(..)) import Text.Pandoc.Pretty (charWidth) import Text.Pandoc.Compat.Locale (defaultTimeLocale) import Data.Time @@ -883,11 +883,11 @@ collapseFilePath = joinPath . reverse . foldl go [] . splitDirectories -- Safe read -- -safeRead :: (Monad m, Read a) => String -> m a +safeRead :: (MonadPlus m, Read a) => String -> m a safeRead s = case reads s of (d,x):_ | all isSpace x -> return d - _ -> fail $ "Could not read `" ++ s ++ "'" + _ -> mzero -- -- Temp directory -- cgit v1.2.3 From 48f442f4770c774534b3696e6dd696da45395874 Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Wed, 18 Feb 2015 21:00:46 +0000 Subject: Update haddocks and copyright notices --- src/Text/Pandoc/Error.hs | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index d4172f7ca..70c333bbf 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -1,3 +1,33 @@ +{- +Copyright (C) 2006-2015 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} +{- | + Module : Text.Pandoc.Error + Copyright : Copyright (C) 2006-2015 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +This module provides a standard way to deal with possible errors encounted +during parsing. + +-} module Text.Pandoc.Error (PandocError(..), handleError,hush, mapLeft) where import Text.Parsec.Error @@ -6,7 +36,9 @@ import Text.Pandoc.Compat.Except type Input = String -data PandocError = ParseFailure String +data PandocError = -- | Generic parse failure + ParseFailure String + -- | Error thrown by a Parsec parser | ParsecError Input ParseError deriving (Show) @@ -14,7 +46,6 @@ data PandocError = ParseFailure String instance Error PandocError where strMsg = ParseFailure - mapLeft :: (a -> b) -> Either a c -> Either b c mapLeft f (Left x) = Left (f x) mapLeft _ (Right x) = Right x @@ -23,6 +54,7 @@ hush :: Either a b -> Maybe b hush (Left _) = Nothing hush (Right x) = Just x +-- | An unsafe method to handle `PandocError`s. handleError :: Either PandocError a -> a handleError (Right r) = r handleError (Left err) = -- cgit v1.2.3 From ad39bc7009e320b3afb91a5683521eb1eccf0ef7 Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Wed, 18 Feb 2015 21:05:47 +0000 Subject: Move utility error functions to Text.Pandoc.Shared --- src/Text/Pandoc.hs | 2 +- src/Text/Pandoc/Error.hs | 9 +-------- src/Text/Pandoc/ImageSize.hs | 3 +-- src/Text/Pandoc/Readers/HTML.hs | 2 +- src/Text/Pandoc/Shared.hs | 10 ++++++++++ 5 files changed, 14 insertions(+), 12 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 79ca4a6b7..89f61089b 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -164,7 +164,7 @@ import Text.Pandoc.Writers.Haddock import Text.Pandoc.Writers.Custom import Text.Pandoc.Templates import Text.Pandoc.Options -import Text.Pandoc.Shared (safeRead, warn) +import Text.Pandoc.Shared (safeRead, warn, mapLeft) import Text.Pandoc.MediaBag (MediaBag) import Text.Pandoc.Error import Data.Aeson diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index 70c333bbf..73d1e8f08 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -28,7 +28,7 @@ This module provides a standard way to deal with possible errors encounted during parsing. -} -module Text.Pandoc.Error (PandocError(..), handleError,hush, mapLeft) where +module Text.Pandoc.Error (PandocError(..), handleError) where import Text.Parsec.Error import Text.Parsec.Pos hiding (Line) @@ -46,13 +46,6 @@ data PandocError = -- | Generic parse failure instance Error PandocError where strMsg = ParseFailure -mapLeft :: (a -> b) -> Either a c -> Either b c -mapLeft f (Left x) = Left (f x) -mapLeft _ (Right x) = Right x - -hush :: Either a b -> Maybe b -hush (Left _) = Nothing -hush (Right x) = Just x -- | An unsafe method to handle `PandocError`s. handleError :: Either PandocError a -> a diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index 963057b6f..8f0a991ba 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -39,10 +39,9 @@ import Control.Monad import Data.Bits import Data.Binary import Data.Binary.Get -import Text.Pandoc.Shared (safeRead) +import Text.Pandoc.Shared (safeRead, hush) import qualified Data.Map as M import Text.Pandoc.Compat.Except -import Text.Pandoc.Error import Control.Monad.Trans import Data.Maybe (fromMaybe) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index b6338aeff..59f71589e 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -44,7 +44,7 @@ import Text.Pandoc.Definition import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..)) import Text.Pandoc.Shared ( extractSpaces, renderTags' - , escapeURI, safeRead ) + , escapeURI, safeRead, mapLeft ) import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace) , Extension (Ext_epub_html_exts, Ext_native_divs, Ext_native_spans)) diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 90d0941c1..e0460c66e 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -85,6 +85,8 @@ module Text.Pandoc.Shared ( -- * Error handling err, warn, + mapLeft, + hush, -- * Safe read safeRead, -- * Temp directory @@ -855,6 +857,14 @@ warn msg = do name <- getProgName UTF8.hPutStrLn stderr $ name ++ ": " ++ msg +mapLeft :: (a -> b) -> Either a c -> Either b c +mapLeft f (Left x) = Left (f x) +mapLeft _ (Right x) = Right x + +hush :: Either a b -> Maybe b +hush (Left _) = Nothing +hush (Right x) = Just x + -- | Remove intermediate "." and ".." directories from a path. -- -- > collapseFilePath "./foo" == "foo" -- cgit v1.2.3