diff options
-rw-r--r-- | src/Text/Pandoc/Parsing.hs | 55 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 671 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org.hs | 474 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 2 |
4 files changed, 627 insertions, 575 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 33120e55d..5c27d3e6d 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -65,8 +65,7 @@ module Text.Pandoc.Parsing ( anyLine, widthsFromIndices, gridTableWith, readWith, - returnWarnings, - returnState, + readWithWarnings, readWithM, testStringWith, guardEnabled, @@ -105,8 +104,11 @@ module Text.Pandoc.Parsing ( anyLine, applyMacros', Parser, ParserT, + F(..), + runF, + askF, + asksF, token, - generalize, -- * Re-exports from Text.Pandoc.Parsec Stream, runParser, @@ -187,7 +189,7 @@ import Data.Default import qualified Data.Set as Set import Control.Monad.Reader import Control.Monad.Identity -import Control.Applicative ((<$>), (<*>), (*>), (<*), (<$)) +import Control.Applicative ((<$>), (<*>), (*>), (<*), (<$), Applicative) import Data.Monoid import Data.Maybe (catMaybes) @@ -197,6 +199,22 @@ type Parser t s = Parsec t s type ParserT = ParsecT +newtype F a = F { unF :: Reader ParserState a } deriving (Monad, Applicative, Functor) + +runF :: F a -> ParserState -> a +runF = runReader . unF + +askF :: F ParserState +askF = F ask + +asksF :: (ParserState -> a) -> F a +asksF f = F $ asks f + +instance Monoid a => Monoid (F a) where + mempty = return mempty + mappend = liftM2 mappend + mconcat = liftM mconcat . sequence + -- | Parse any line of text anyLine :: Stream [Char] m Char => ParserT [Char] st m [Char] anyLine = do @@ -860,18 +878,15 @@ readWith :: Parser [Char] st a -> Either PandocError a readWith p t inp = runIdentity $ readWithM p t inp -returnWarnings :: (Stream s m c) - => ParserT s ParserState m a - -> ParserT s ParserState m (a, [String]) -returnWarnings p = do +readWithWarnings :: Parser [Char] ParserState a + -> ParserState + -> String + -> Either PandocError (a, [String]) +readWithWarnings p = readWith $ do doc <- p warnings <- stateWarnings <$> getState return (doc, warnings) --- | Return the final internal state with the result of a parser -returnState :: (Stream s m c) => ParsecT s st m a -> ParsecT s st m (a, st) -returnState p = (,) <$> p <*> getState - -- | Parse a string with @parser@ (for testing). testStringWith :: (Show a, Stream [Char] Identity Char) => ParserT [Char] ParserState Identity a @@ -893,6 +908,7 @@ data ParserState = ParserState stateNotes :: NoteTable, -- ^ List of notes (raw bodies) stateNotes' :: NoteTable', -- ^ List of notes (parsed bodies) stateMeta :: Meta, -- ^ Document metadata + stateMeta' :: F Meta, -- ^ Document metadata stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used stateHeaders :: M.Map Inlines String, -- ^ List of headers and ids (used for implicit ref links) stateIdentifiers :: [String], -- ^ List of header identifiers used @@ -907,8 +923,7 @@ data ParserState = ParserState stateCaption :: Maybe Inlines, -- ^ Caption in current environment stateInHtmlBlock :: Maybe String, -- ^ Tag type of HTML block being parsed stateMarkdownAttribute :: Bool, -- ^ True if in markdown=1 context - stateWarnings :: [String], -- ^ Warnings generated by the parser - stateInFootnote :: Bool -- ^ True if in a footnote block. + stateWarnings :: [String] -- ^ Warnings generated by the parser } instance Default ParserState where @@ -990,6 +1005,7 @@ defaultParserState = stateNotes = [], stateNotes' = [], stateMeta = nullMeta, + stateMeta' = return nullMeta, stateHeaderTable = [], stateHeaders = M.empty, stateIdentifiers = [], @@ -1002,8 +1018,7 @@ defaultParserState = stateCaption = Nothing, stateInHtmlBlock = Nothing, stateMarkdownAttribute = False, - stateWarnings = [], - stateInFootnote = False } + stateWarnings = []} -- | Succeed only if the extension is enabled. guardEnabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m () @@ -1042,7 +1057,7 @@ data QuoteContext type NoteTable = [(String, String)] -type NoteTable' = [(String, Blocks)] -- used in markdown reader +type NoteTable' = [(String, F Blocks)] -- used in markdown reader newtype Key = Key String deriving (Show, Read, Eq, Ord) @@ -1238,15 +1253,11 @@ applyMacros' target = do else return target -- | Append a warning to the log. -addWarning :: (Stream s m c) => Maybe SourcePos -> String -> ParserT s ParserState m () +addWarning :: Maybe SourcePos -> String -> Parser [Char] ParserState () addWarning mbpos msg = updateState $ \st -> st{ stateWarnings = (msg ++ maybe "" (\pos -> " " ++ show pos) mbpos) : stateWarnings st } - -generalize :: (Monad m) => Parser s st a -> ParserT s st m a -generalize m = mkPT (\ s -> (return $ (return . runIdentity) <$> runIdentity (runParsecT m s))) - infixr 5 <+?> (<+?>) :: (Monoid a, Monad m) => ParserT s st m a -> ParserT s st m a -> ParserT s st m a a <+?> b = a >>= flip fmap (try b <|> return mempty) . (<>) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 17270b741..656e4ec66 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -32,7 +32,7 @@ Conversion of markdown-formatted plain text to 'Pandoc' document. module Text.Pandoc.Readers.Markdown ( readMarkdown, readMarkdownWithWarnings ) where -import Data.List ( transpose, sortBy, intersperse, intercalate, elemIndex) +import Data.List ( transpose, sortBy, findIndex, intersperse, intercalate ) import qualified Data.Map as M import Data.Scientific (coefficient, base10Exponent) import Data.Ord ( comparing ) @@ -58,7 +58,6 @@ import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockT import Data.Monoid (mconcat, mempty) import Control.Applicative ((<$>), (<*), (*>), (<$), (<*>)) import Control.Monad -import Control.Monad.Reader import System.FilePath (takeExtension, addExtension) import Text.HTML.TagSoup import Text.HTML.TagSoup.Match (tagOpen) @@ -67,30 +66,25 @@ import Text.Printf (printf) import Debug.Trace (trace) import Text.Pandoc.Error -type MarkdownParser a = ParserT [Char] ParserState (Reader ParserState) a +type MarkdownParser = Parser [Char] ParserState -- | Read markdown from an input string and return a Pandoc document. readMarkdown :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) -> Either PandocError Pandoc readMarkdown opts s = - runMarkdown opts s parseMarkdown + (readWith parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n") -- | Read markdown from an input string and return a pair of a Pandoc document -- and a list of warnings. readMarkdownWithWarnings :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Either PandocError (Pandoc, [String]) -readMarkdownWithWarnings opts s = runMarkdown opts s (returnWarnings parseMarkdown) + -> Either PandocError (Pandoc, [String]) +readMarkdownWithWarnings opts s = + (readWithWarnings parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n") -runMarkdown :: forall a . ReaderOptions -> String -> MarkdownParser a -> Either PandocError a -runMarkdown opts inp p = fst <$> res - where - imd = readWithM (returnState p) def{ stateOptions = opts } (inp ++ "\n\n") - res :: Either PandocError (a, ParserState) - res = runReader imd s - s :: ParserState - s = either def snd res +trimInlinesF :: F Inlines -> F Inlines +trimInlinesF = liftM trimInlines -- -- Constants and data structure definitions @@ -127,10 +121,10 @@ inList = do ctx <- stateParserContext <$> getState guard (ctx == ListItemState) -isNull :: Inlines -> Bool -isNull = B.isNull +isNull :: F Inlines -> Bool +isNull ils = B.isNull $ runF ils def -spnl :: Monad m => ParserT [Char] st m () +spnl :: Parser [Char] st () spnl = try $ do skipSpaces optional newline @@ -170,9 +164,9 @@ litChar = escapedChar' -- | Parse a sequence of inline elements between square brackets, -- including inlines between balanced pairs of square brackets. -inlinesInBalancedBrackets :: MarkdownParser Inlines +inlinesInBalancedBrackets :: MarkdownParser (F Inlines) inlinesInBalancedBrackets = charsInBalancedBrackets >>= - parseFromString (trimInlines . mconcat <$> many inline) + parseFromString (trimInlinesF . mconcat <$> many inline) charsInBalancedBrackets :: MarkdownParser [Char] charsInBalancedBrackets = do @@ -189,16 +183,16 @@ charsInBalancedBrackets = do -- document structure -- -titleLine :: MarkdownParser Inlines +titleLine :: MarkdownParser (F Inlines) titleLine = try $ do char '%' skipSpaces res <- many $ (notFollowedBy newline >> inline) <|> try (endline >> whitespace) newline - return $ trimInlines $ mconcat res + return $ trimInlinesF $ mconcat res -authorsLine :: MarkdownParser [Inlines] +authorsLine :: MarkdownParser (F [Inlines]) authorsLine = try $ do char '%' skipSpaces @@ -207,13 +201,13 @@ authorsLine = try $ do (char ';' <|> try (newline >> notFollowedBy blankline >> spaceChar)) newline - return $ filter (not . isNull) $ map (trimInlines . mconcat) authors + return $ sequence $ filter (not . isNull) $ map (trimInlinesF . mconcat) authors -dateLine :: MarkdownParser Inlines +dateLine :: MarkdownParser (F Inlines) dateLine = try $ do char '%' skipSpaces - trimInlines . mconcat <$> manyTill inline newline + trimInlinesF . mconcat <$> manyTill inline newline titleBlock :: MarkdownParser () titleBlock = pandocTitleBlock <|> mmdTitleBlock @@ -223,16 +217,20 @@ pandocTitleBlock = try $ do guardEnabled Ext_pandoc_title_block lookAhead (char '%') title <- option mempty titleLine - author <- option [] authorsLine + author <- option (return []) authorsLine date <- option mempty dateLine optional blanklines - let meta' = (if B.isNull title then id else B.setMeta "title" title) - . (if null author then id else B.setMeta "author" author) - . (if B.isNull date then id else B.setMeta "date" date) - $ nullMeta - updateState $ \st -> st{ stateMeta = stateMeta st <> meta' } - -yamlMetaBlock :: MarkdownParser Blocks + let meta' = do title' <- title + author' <- author + date' <- date + return $ + (if B.isNull title' then id else B.setMeta "title" title') + . (if null author' then id else B.setMeta "author" author') + . (if B.isNull date' then id else B.setMeta "date" date') + $ nullMeta + updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' } + +yamlMetaBlock :: MarkdownParser (F Blocks) yamlMetaBlock = try $ do guardEnabled Ext_yaml_metadata_block pos <- getPosition @@ -245,7 +243,7 @@ yamlMetaBlock = try $ do optional blanklines opts <- stateOptions <$> getState meta' <- case Yaml.decodeEither' $ UTF8.fromString rawYaml of - Right (Yaml.Object hashmap) -> return $ + Right (Yaml.Object hashmap) -> return $ return $ H.foldrWithKey (\k v m -> if ignorable k then m @@ -253,10 +251,10 @@ yamlMetaBlock = try $ do Left _ -> m Right v' -> B.setMeta (T.unpack k) v' m) nullMeta hashmap - Right Yaml.Null -> return nullMeta + Right Yaml.Null -> return $ return nullMeta Right _ -> do addWarning (Just pos) "YAML header is not an object" - return nullMeta + return $ return nullMeta Left err' -> do case err' of InvalidYaml (Just YamlParseException{ @@ -275,13 +273,13 @@ yamlMetaBlock = try $ do _ -> addWarning (Just pos) $ "Could not parse YAML header: " ++ show err' - return nullMeta - updateState $ \st -> st{ stateMeta = stateMeta st <> meta' } + return $ return nullMeta + updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' } return mempty -- ignore fields ending with _ ignorable :: Text -> Bool -ignorable t = T.pack "_" `T.isSuffixOf` t +ignorable t = (T.pack "_") `T.isSuffixOf` t toMetaValue :: ReaderOptions -> Text -> Either PandocError MetaValue toMetaValue opts x = toMeta <$> readMarkdown opts' (T.unpack x) @@ -328,8 +326,8 @@ mmdTitleBlock = try $ do guardEnabled Ext_mmd_title_block kvPairs <- many1 kvPair blanklines - updateState $ \st -> st{ stateMeta = stateMeta st <> - (Meta $ M.fromList kvPairs) } + updateState $ \st -> st{ stateMeta' = stateMeta' st <> + return (Meta $ M.fromList kvPairs) } kvPair :: MarkdownParser (String, MetaValue) kvPair = try $ do @@ -353,11 +351,11 @@ parseMarkdown = do optional titleBlock blocks <- parseBlocks st <- getState - let meta = stateMeta st - let Pandoc _ bs = B.doc blocks + let meta = runF (stateMeta' st) st + let Pandoc _ bs = B.doc $ runF blocks st return $ Pandoc meta bs -referenceKey :: MarkdownParser Blocks +referenceKey :: MarkdownParser (F Blocks) referenceKey = try $ do pos <- getPosition skipNonindentSpaces @@ -384,7 +382,7 @@ referenceKey = try $ do Just _ -> addWarning (Just pos) $ "Duplicate link reference `" ++ raw ++ "'" Nothing -> return () updateState $ \s -> s { stateKeys = M.insert key target oldkeys } - return mempty + return $ return mempty referenceTitle :: MarkdownParser String referenceTitle = try $ do @@ -404,7 +402,7 @@ quotedTitle c = try $ do -- | PHP Markdown Extra style abbreviation key. Currently -- we just skip them, since Pandoc doesn't have an element for -- an abbreviation. -abbrevKey :: MarkdownParser Blocks +abbrevKey :: MarkdownParser (F Blocks) abbrevKey = do guardEnabled Ext_abbreviations try $ do @@ -413,7 +411,7 @@ abbrevKey = do char ':' skipMany (satisfy (/= '\n')) blanklines - return mempty + return $ return mempty noteMarker :: MarkdownParser String noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']') @@ -431,7 +429,7 @@ rawLines = do rest <- many rawLine return $ unlines (first:rest) -noteBlock :: MarkdownParser Blocks +noteBlock :: MarkdownParser (F Blocks) noteBlock = try $ do pos <- getPosition skipNonindentSpaces @@ -443,7 +441,7 @@ noteBlock = try $ do rest <- many $ try $ blanklines >> indentSpaces >> rawLines let raw = unlines (first:rest) ++ "\n" optional blanklines - parsed <- parseFromString (inFootnote parseBlocks) raw + parsed <- parseFromString parseBlocks raw let newnote = (ref, parsed) oldnotes <- stateNotes' <$> getState case lookup ref oldnotes of @@ -452,29 +450,21 @@ noteBlock = try $ do updateState $ \s -> s { stateNotes' = newnote : oldnotes } return mempty -inFootnote :: MarkdownParser a -> MarkdownParser a -inFootnote p = do - st <- stateInFootnote <$> getState - updateState (\s -> s { stateInFootnote = True } ) - r <- p - updateState (\s -> s { stateInFootnote = st } ) - return r - -- -- parsing blocks -- -parseBlocks :: MarkdownParser Blocks +parseBlocks :: MarkdownParser (F Blocks) parseBlocks = mconcat <$> manyTill block eof -block :: MarkdownParser Blocks +block :: MarkdownParser (F Blocks) block = do tr <- getOption readerTrace pos <- getPosition res <- choice [ mempty <$ blanklines , codeBlockFenced , yamlMetaBlock - , guardEnabled Ext_latex_macros *> macro + , guardEnabled Ext_latex_macros *> (macro >>= return . return) -- note: bulletList needs to be before header because of -- the possibility of empty list items: - , bulletList @@ -496,28 +486,29 @@ block = do , para , plain ] <?> "block" - when tr $ + when tr $ do + st <- getState trace (printf "line %d: %s" (sourceLine pos) - (take 60 . show . B.toList $ res)) (return ()) + (take 60 $ show $ B.toList $ runF res st)) (return ()) return res -- -- header blocks -- -header :: MarkdownParser Blocks +header :: MarkdownParser (F Blocks) header = setextHeader <|> atxHeader <?> "header" -atxHeader :: MarkdownParser Blocks +atxHeader :: MarkdownParser (F Blocks) atxHeader = try $ do - level <- length <$> many1 (char '#') + level <- many1 (char '#') >>= return . length notFollowedBy $ guardEnabled Ext_fancy_lists >> (char '.' <|> char ')') -- this would be a list skipSpaces - text <- trimInlines . mconcat <$> many (notFollowedBy atxClosing >> inline) + text <- trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline) attr <- atxClosing - attr' <- registerHeader attr text - return $ B.headerWith attr' level text + attr' <- registerHeader attr (runF text defaultParserState) + return $ B.headerWith attr' level <$> text atxClosing :: MarkdownParser Attr atxClosing = try $ do @@ -544,25 +535,25 @@ mmdHeaderIdentifier = do skipSpaces return (ident,[],[]) -setextHeader :: MarkdownParser Blocks +setextHeader :: MarkdownParser (F Blocks) setextHeader = try $ do -- This lookahead prevents us from wasting time parsing Inlines -- unless necessary -- it gives a significant performance boost. lookAhead $ anyLine >> many1 (oneOf setextHChars) >> blankline - text <- trimInlines . mconcat <$> many1 (notFollowedBy setextHeaderEnd >> inline) + text <- trimInlinesF . mconcat <$> many1 (notFollowedBy setextHeaderEnd >> inline) attr <- setextHeaderEnd underlineChar <- oneOf setextHChars many (char underlineChar) blanklines - let level = (fromMaybe 0 $ elemIndex underlineChar setextHChars) + 1 - attr' <- registerHeader attr text - return $ B.headerWith attr' level text + let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1 + attr' <- registerHeader attr (runF text defaultParserState) + return $ B.headerWith attr' level <$> text -- -- hrule block -- -hrule :: Monad m => ParserT [Char] st m Blocks +hrule :: Parser [Char] st (F Blocks) hrule = try $ do skipSpaces start <- satisfy isHruleChar @@ -570,24 +561,24 @@ hrule = try $ do skipMany (spaceChar <|> char start) newline optional blanklines - return B.horizontalRule + return $ return B.horizontalRule -- -- code blocks -- indentedLine :: MarkdownParser String -indentedLine = indentSpaces >> ((++ "\n") <$> anyLine) +indentedLine = indentSpaces >> anyLine >>= return . (++ "\n") -blockDelimiter :: Monad m - => (Char -> Bool) +blockDelimiter :: (Char -> Bool) -> Maybe Int - -> ParserT [Char] st m Int + -> Parser [Char] st Int blockDelimiter f len = try $ do c <- lookAhead (satisfy f) case len of Just l -> count l (char c) >> many (char c) >> return l - Nothing -> count 3 (char c) >> ((+ 3) . length <$> many (char c)) + Nothing -> count 3 (char c) >> many (char c) >>= + return . (+ 3) . length attributes :: MarkdownParser Attr attributes = try $ do @@ -632,7 +623,7 @@ specialAttr = do char '-' return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs) -codeBlockFenced :: MarkdownParser Blocks +codeBlockFenced :: MarkdownParser (F Blocks) codeBlockFenced = try $ do c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~')) <|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`')) @@ -644,7 +635,7 @@ codeBlockFenced = try $ do blankline contents <- manyTill anyLine (blockDelimiter (== c) (Just size)) blanklines - return $ B.codeBlockWith attr $ intercalate "\n" contents + return $ return $ B.codeBlockWith attr $ intercalate "\n" contents -- correctly handle github language identifiers toLanguageId :: String -> String @@ -653,7 +644,7 @@ toLanguageId = map toLower . go go "objective-c" = "objectivec" go x = x -codeBlockIndented :: MarkdownParser Blocks +codeBlockIndented :: MarkdownParser (F Blocks) codeBlockIndented = do contents <- many1 (indentedLine <|> try (do b <- blanklines @@ -661,15 +652,15 @@ codeBlockIndented = do return $ b ++ l)) optional blanklines classes <- getOption readerIndentedCodeClasses - return $ B.codeBlockWith ("", classes, []) $ + return $ return $ B.codeBlockWith ("", classes, []) $ stripTrailingNewlines $ concat contents -lhsCodeBlock :: MarkdownParser Blocks +lhsCodeBlock :: MarkdownParser (F Blocks) lhsCodeBlock = do guardEnabled Ext_literate_haskell - (B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> + (return . B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> (lhsCodeBlockBird <|> lhsCodeBlockLaTeX)) - <|> (B.codeBlockWith ("",["sourceCode","haskell"],[]) <$> + <|> (return . B.codeBlockWith ("",["sourceCode","haskell"],[]) <$> lhsCodeBlockInverseBird) lhsCodeBlockLaTeX :: MarkdownParser String @@ -698,7 +689,7 @@ lhsCodeBlockBirdWith c = try $ do blanklines return $ intercalate "\n" lns' -birdTrackLine :: Monad m => Char -> ParserT [Char] st m String +birdTrackLine :: Char -> Parser [Char] st String birdTrackLine c = try $ do char c -- allow html tags on left margin: @@ -726,12 +717,12 @@ emailBlockQuote = try $ do optional blanklines return raw -blockQuote :: MarkdownParser Blocks +blockQuote :: MarkdownParser (F Blocks) blockQuote = do raw <- emailBlockQuote -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ intercalate "\n" raw ++ "\n\n" - return $ B.blockQuote contents + contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n" + return $ B.blockQuote <$> contents -- -- list blocks @@ -774,7 +765,7 @@ anyOrderedListStart = try $ do return res listStart :: MarkdownParser () -listStart = bulletListStart <|> void anyOrderedListStart +listStart = bulletListStart <|> (anyOrderedListStart >> return ()) listLine :: MarkdownParser String listLine = try $ do @@ -829,7 +820,7 @@ listContinuationLine = try $ do return $ result ++ "\n" listItem :: MarkdownParser a - -> MarkdownParser Blocks + -> MarkdownParser (F Blocks) listItem start = try $ do first <- rawListItem start continuations <- many listContinuation @@ -845,14 +836,14 @@ listItem start = try $ do updateState (\st -> st {stateParserContext = oldContext}) return contents -orderedList :: MarkdownParser Blocks +orderedList :: MarkdownParser (F Blocks) orderedList = try $ do (start, style, delim) <- lookAhead anyOrderedListStart unless (style `elem` [DefaultStyle, Decimal, Example] && delim `elem` [DefaultDelim, Period]) $ guardEnabled Ext_fancy_lists when (style == Example) $ guardEnabled Ext_example_lists - items <- many1 $ listItem + items <- fmap sequence $ many1 $ listItem ( try $ do optional newline -- if preceded by Plain block in a list startpos <- sourceColumn <$> getPosition @@ -864,12 +855,12 @@ orderedList = try $ do atMostSpaces (tabStop - (endpos - startpos)) return res ) start' <- option 1 $ guardEnabled Ext_startnum >> return start - return $ B.orderedListWith (start', style, delim) (compactify' items) + return $ B.orderedListWith (start', style, delim) <$> fmap compactify' items -bulletList :: MarkdownParser Blocks +bulletList :: MarkdownParser (F Blocks) bulletList = do - items <- many1 $ listItem bulletListStart - return $ B.bulletList (compactify' items) + items <- fmap sequence $ many1 $ listItem bulletListStart + return $ B.bulletList <$> fmap compactify' items -- definition lists @@ -884,14 +875,14 @@ defListMarker = do else mzero return () -definitionListItem :: Bool -> MarkdownParser (Inlines, [Blocks]) +definitionListItem :: Bool -> MarkdownParser (F (Inlines, [Blocks])) definitionListItem compact = try $ do rawLine' <- anyLine raw <- many1 $ defRawBlock compact - term <- parseFromString (trimInlines . mconcat <$> many inline) rawLine' + term <- parseFromString (trimInlinesF . mconcat <$> many inline) rawLine' contents <- mapM (parseFromString parseBlocks) raw optional blanklines - return (term, contents) + return $ liftM2 (,) term (sequence contents) defRawBlock :: Bool -> MarkdownParser String defRawBlock compact = try $ do @@ -914,34 +905,35 @@ defRawBlock compact = try $ do return $ trimr (firstline ++ "\n" ++ unlines rawlines ++ cont) ++ if hasBlank || not (null cont) then "\n\n" else "" -definitionList :: MarkdownParser Blocks +definitionList :: MarkdownParser (F Blocks) definitionList = try $ do - lookAhead (anyLine >> optional (blankline >> notFollowedBy table) >> + lookAhead (anyLine >> + optional (blankline >> notFollowedBy (table >> return ())) >> -- don't capture table caption as def list! defListMarker) compactDefinitionList <|> normalDefinitionList -compactDefinitionList :: MarkdownParser Blocks +compactDefinitionList :: MarkdownParser (F Blocks) compactDefinitionList = do guardEnabled Ext_compact_definition_lists - items <- many1 $ definitionListItem True - return $ B.definitionList (compactify'DL items) + items <- fmap sequence $ many1 $ definitionListItem True + return $ B.definitionList <$> fmap compactify'DL items -normalDefinitionList :: MarkdownParser Blocks +normalDefinitionList :: MarkdownParser (F Blocks) normalDefinitionList = do guardEnabled Ext_definition_lists - items <- many1 $ definitionListItem False - return $ B.definitionList items + items <- fmap sequence $ many1 $ definitionListItem False + return $ B.definitionList <$> items -- -- paragraph block -- -para :: MarkdownParser Blocks +para :: MarkdownParser (F Blocks) para = try $ do exts <- getOption readerExtensions - result <- trimInlines . mconcat <$> many1 inline - option (B.plain result) + result <- trimInlinesF . mconcat <$> many1 inline + option (B.plain <$> result) $ try $ do newline (blanklines >> return mempty) @@ -958,17 +950,18 @@ para = try $ do Just "div" -> () <$ lookAhead (htmlTag (~== TagClose "div")) _ -> mzero - return $ - case B.toList result of + return $ do + result' <- result + case B.toList result' of [Image alt (src,tit)] | Ext_implicit_figures `Set.member` exts -> -- the fig: at beginning of title indicates a figure - B.para $ B.singleton + return $ B.para $ B.singleton $ Image alt (src,'f':'i':'g':':':tit) - _ -> B.para result + _ -> return $ B.para result' -plain :: MarkdownParser Blocks -plain = B.plain . trimInlines . mconcat <$> many1 inline +plain :: MarkdownParser (F Blocks) +plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline -- -- raw html @@ -979,13 +972,13 @@ htmlElement = rawVerbatimBlock <|> strictHtmlBlock <|> liftM snd (htmlTag isBlockTag) -htmlBlock :: MarkdownParser Blocks +htmlBlock :: MarkdownParser (F Blocks) htmlBlock = do guardEnabled Ext_raw_html try (do (TagOpen t attrs) <- lookAhead $ fst <$> htmlTag isBlockTag (guard (t `elem` ["pre","style","script"]) >> - B.rawBlock "html" <$> rawVerbatimBlock) + (return . B.rawBlock "html") <$> rawVerbatimBlock) <|> (do guardEnabled Ext_markdown_attribute oldMarkdownAttribute <- stateMarkdownAttribute <$> getState markdownAttribute <- @@ -1004,35 +997,35 @@ htmlBlock = do <|> (guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks)) <|> htmlBlock' -htmlBlock' :: MarkdownParser Blocks +htmlBlock' :: MarkdownParser (F Blocks) htmlBlock' = try $ do first <- htmlElement skipMany spaceChar optional blanklines - return $ B.rawBlock "html" first + return $ return $ B.rawBlock "html" first strictHtmlBlock :: MarkdownParser String strictHtmlBlock = htmlInBalanced (not . isInlineTag) rawVerbatimBlock :: MarkdownParser String rawVerbatimBlock = try $ do - (TagOpen tag _, open) <- - htmlTag (tagOpen (`elem` ["pre", "style", "script"]) - (const True)) + (TagOpen tag _, open) <- htmlTag (tagOpen (flip elem + ["pre", "style", "script"]) + (const True)) contents <- manyTill anyChar (htmlTag (~== TagClose tag)) return $ open ++ contents ++ renderTags' [TagClose tag] -rawTeXBlock :: MarkdownParser Blocks +rawTeXBlock :: MarkdownParser (F Blocks) rawTeXBlock = do guardEnabled Ext_raw_tex result <- (B.rawBlock "latex" . concat <$> - generalize rawLaTeXBlock `sepEndBy1` blankline) + rawLaTeXBlock `sepEndBy1` blankline) <|> (B.rawBlock "context" . concat <$> rawConTeXtEnvironment `sepEndBy1` blankline) spaces - return result + return $ return result -rawHtmlBlocks :: MarkdownParser Blocks +rawHtmlBlocks :: MarkdownParser (F Blocks) rawHtmlBlocks = do (TagOpen tagtype _, raw) <- htmlTag isBlockTag -- try to find closing tag @@ -1044,10 +1037,10 @@ rawHtmlBlocks = do contents <- mconcat <$> many (notFollowedBy' closer >> block) result <- (closer >>= \(_, rawcloser) -> return ( - (B.rawBlock "html" $ stripMarkdownAttribute raw) <> + return (B.rawBlock "html" $ stripMarkdownAttribute raw) <> contents <> - (B.rawBlock "html" rawcloser))) - <|> return (B.rawBlock "html" raw <> contents) + return (B.rawBlock "html" rawcloser))) + <|> return (return (B.rawBlock "html" raw) <> contents) updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock } return result @@ -1062,12 +1055,12 @@ stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s -- line block -- -lineBlock :: MarkdownParser Blocks +lineBlock :: MarkdownParser (F Blocks) lineBlock = try $ do guardEnabled Ext_line_blocks lines' <- lineBlockLines >>= - mapM (parseFromString (trimInlines . mconcat <$> many inline)) - return $ B.para (mconcat $ intersperse B.linebreak lines') + mapM (parseFromString (trimInlinesF . mconcat <$> many inline)) + return $ B.para <$> (mconcat $ intersperse (return B.linebreak) lines') -- -- Tables @@ -1075,8 +1068,8 @@ lineBlock = try $ do -- Parse a dashed line with optional trailing spaces; return its length -- and the length including trailing space. -dashedLine :: Monad m => Char - -> ParserT [Char] st m (Int, Int) +dashedLine :: Char + -> Parser [Char] st (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many spaceChar @@ -1087,7 +1080,7 @@ dashedLine ch = do -- Parse a table header with dashed lines of '-' preceded by -- one (or zero) line of text. simpleTableHeader :: Bool -- ^ Headerless table - -> MarkdownParser ([Blocks], [Alignment], [Int]) + -> MarkdownParser (F [Blocks], [Alignment], [Int]) simpleTableHeader headless = try $ do rawContent <- if headless then return "" @@ -1106,8 +1099,9 @@ simpleTableHeader headless = try $ do let rawHeads' = if headless then replicate (length dashes) "" else rawHeads - heads <- - mapM (parseFromString (mconcat <$> many plain) . trim) rawHeads' + heads <- fmap sequence + $ mapM (parseFromString (mconcat <$> many plain)) + $ map trim rawHeads' return (heads, aligns, indices) -- Returns an alignment type for a table, based on a list of strings @@ -1148,30 +1142,30 @@ rawTableLine indices = do -- Parse a table line and return a list of lists of blocks (columns). tableLine :: [Int] - -> MarkdownParser [Blocks] + -> MarkdownParser (F [Blocks]) tableLine indices = rawTableLine indices >>= - mapM (parseFromString (mconcat <$> many plain)) + fmap sequence . mapM (parseFromString (mconcat <$> many plain)) -- Parse a multiline table row and return a list of blocks (columns). multilineRow :: [Int] - -> MarkdownParser [Blocks] + -> MarkdownParser (F [Blocks]) multilineRow indices = do colLines <- many1 (rawTableLine indices) let cols = map unlines $ transpose colLines - mapM (parseFromString (mconcat <$> many plain)) cols + fmap sequence $ mapM (parseFromString (mconcat <$> many plain)) cols -- Parses a table caption: inlines beginning with 'Table:' -- and followed by blank lines. -tableCaption :: MarkdownParser Inlines +tableCaption :: MarkdownParser (F Inlines) tableCaption = try $ do guardEnabled Ext_table_captions skipNonindentSpaces string ":" <|> string "Table:" - trimInlines . mconcat <$> many1 inline <* blanklines + trimInlinesF . mconcat <$> many1 inline <* blanklines -- Parse a simple table with '---' header and one line per row. simpleTable :: Bool -- ^ Headerless table - -> MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]]) + -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) simpleTable headless = do (aligns, _widths, heads', lines') <- tableWith (simpleTableHeader headless) tableLine @@ -1185,12 +1179,12 @@ simpleTable headless = do -- which may be multiline, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). multilineTable :: Bool -- ^ Headerless table - -> MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]]) + -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) multilineTable headless = tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter multilineTableHeader :: Bool -- ^ Headerless table - -> MarkdownParser ([Blocks], [Alignment], [Int]) + -> MarkdownParser (F [Blocks], [Alignment], [Int]) multilineTableHeader headless = try $ do unless headless $ tableSep >> notFollowedBy blankline @@ -1212,7 +1206,7 @@ multilineTableHeader headless = try $ do let rawHeads = if headless then replicate (length dashes) "" else map (unlines . map trim) rawHeadsList - heads <- + heads <- fmap sequence $ mapM (parseFromString (mconcat <$> many plain)) $ map trim rawHeads return (heads, aligns, indices) @@ -1222,7 +1216,7 @@ multilineTableHeader headless = try $ do -- which may be grid, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). gridTable :: Bool -- ^ Headerless table - -> MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]]) + -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) gridTable headless = tableWith (gridTableHeader headless) gridTableRow (gridTableSep '-') gridTableFooter @@ -1231,14 +1225,14 @@ gridTableSplitLine :: [Int] -> String -> [String] gridTableSplitLine indices line = map removeFinalBar $ tail $ splitStringByIndices (init indices) $ trimr line -gridPart :: Monad m => Char -> ParserT [Char] st m (Int, Int) +gridPart :: Char -> Parser [Char] st (Int, Int) gridPart ch = do dashes <- many1 (char ch) char '+' let lengthDashes = length dashes return (lengthDashes, lengthDashes + 1) -gridDashedLines :: Monad m => Char -> ParserT [Char] st m [(Int,Int)] +gridDashedLines :: Char -> Parser [Char] st [(Int,Int)] gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline removeFinalBar :: String -> String @@ -1251,7 +1245,7 @@ gridTableSep ch = try $ gridDashedLines ch >> return '\n' -- | Parse header for a grid table. gridTableHeader :: Bool -- ^ Headerless table - -> MarkdownParser ([Blocks], [Alignment], [Int]) + -> MarkdownParser (F [Blocks], [Alignment], [Int]) gridTableHeader headless = try $ do optional blanklines dashes <- gridDashedLines '-' @@ -1260,7 +1254,9 @@ gridTableHeader headless = try $ do else many1 (notFollowedBy (gridTableSep '=') >> char '|' >> many1Till anyChar newline) - unless headless (void $ gridTableSep '=') + if headless + then return () + else gridTableSep '=' >> return () let lines' = map snd dashes let indices = scanl (+) 0 lines' let aligns = replicate (length lines') AlignDefault @@ -1269,7 +1265,7 @@ gridTableHeader headless = try $ do then replicate (length dashes) "" else map (unlines . map trim) $ transpose $ map (gridTableSplitLine indices) rawContent - heads <- mapM (parseFromString parseBlocks . trim) rawHeads + heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads return (heads, aligns, indices) gridTableRawLine :: [Int] -> MarkdownParser [String] @@ -1280,12 +1276,12 @@ gridTableRawLine indices = do -- | Parse row of grid table. gridTableRow :: [Int] - -> MarkdownParser [Blocks] + -> MarkdownParser (F [Blocks]) gridTableRow indices = do colLines <- many1 (gridTableRawLine indices) let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ transpose colLines - compactify' <$> mapM (parseFromString parseBlocks) cols + fmap compactify' <$> fmap sequence (mapM (parseFromString parseBlocks) cols) removeOneLeadingSpace :: [String] -> [String] removeOneLeadingSpace xs = @@ -1311,12 +1307,12 @@ pipeBreak = try $ do blankline return (first:rest) -pipeTable :: MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]]) +pipeTable :: MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) pipeTable = try $ do (heads,aligns) <- (,) <$> pipeTableRow <*> pipeBreak - lines' <- many pipeTableRow + lines' <- sequence <$> many pipeTableRow let widths = replicate (length aligns) 0.0 - return (aligns, widths, heads, lines') + return $ (aligns, widths, heads, lines') sepPipe :: MarkdownParser () sepPipe = try $ do @@ -1324,7 +1320,7 @@ sepPipe = try $ do notFollowedBy blankline -- parse a row, also returning probable alignments for org-table cells -pipeTableRow :: MarkdownParser [Blocks] +pipeTableRow :: MarkdownParser (F [Blocks]) pipeTableRow = do nonindentSpaces openPipe <- (True <$ char '|') <|> return False @@ -1336,14 +1332,16 @@ pipeTableRow = do guard $ not (null rest && not openPipe) optional (char '|') blankline - let cells = first:rest - return $ - map (\ils -> + let cells = sequence (first:rest) + return $ do + cells' <- cells + return $ map + (\ils -> case trimInlines ils of ils' | B.isNull ils' -> mempty - | otherwise -> B.plain ils') cells + | otherwise -> B.plain $ ils') cells' -pipeTableHeaderPart :: Monad m => ParserT [Char] st m Alignment +pipeTableHeaderPart :: Parser [Char] st Alignment pipeTableHeaderPart = try $ do skipMany spaceChar left <- optionMaybe (char ':') @@ -1358,7 +1356,7 @@ pipeTableHeaderPart = try $ do (Just _,Just _) -> AlignCenter -- Succeed only if current line contains a pipe. -scanForPipe :: Monad m => ParserT [Char] st m () +scanForPipe :: Parser [Char] st () scanForPipe = do inp <- getInput case break (\c -> c == '\n' || c == '|') inp of @@ -1368,22 +1366,22 @@ scanForPipe = do -- | Parse a table using 'headerParser', 'rowParser', -- 'lineParser', and 'footerParser'. Variant of the version in -- Text.Pandoc.Parsing. -tableWith :: MarkdownParser ([Blocks], [Alignment], [Int]) - -> ([Int] -> MarkdownParser [Blocks]) +tableWith :: MarkdownParser (F [Blocks], [Alignment], [Int]) + -> ([Int] -> MarkdownParser (F [Blocks])) -> MarkdownParser sep -> MarkdownParser end - -> MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]]) + -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) tableWith headerParser rowParser lineParser footerParser = try $ do (heads, aligns, indices) <- headerParser - lines' <- rowParser indices `sepEndBy1` lineParser + lines' <- fmap sequence $ rowParser indices `sepEndBy1` lineParser footerParser numColumns <- getOption readerColumns - let widths = case indices of - [] -> replicate (length aligns) 0.0 - _ -> widthsFromIndices numColumns indices - return (aligns, widths, heads, lines') + let widths = if (indices == []) + then replicate (length aligns) 0.0 + else widthsFromIndices numColumns indices + return $ (aligns, widths, heads, lines') -table :: MarkdownParser Blocks +table :: MarkdownParser (F Blocks) table = try $ do frontCaption <- option Nothing (Just <$> tableCaption) (aligns, widths, heads, lns) <- @@ -1398,15 +1396,19 @@ table = try $ do (gridTable False <|> gridTable True)) <?> "table" optional blanklines caption <- case frontCaption of - Nothing -> option mempty tableCaption + Nothing -> option (return mempty) tableCaption Just c -> return c - return $ B.table caption (zip aligns widths) heads lns + return $ do + caption' <- caption + heads' <- heads + lns' <- lns + return $ B.table caption' (zip aligns widths) heads' lns' -- -- inline -- -inline :: MarkdownParser Inlines +inline :: MarkdownParser (F Inlines) inline = choice [ whitespace , bareURL , str @@ -1429,7 +1431,7 @@ inline = choice [ whitespace , rawLaTeXInline' , exampleRef , smart - , B.singleton <$> charRef + , return . B.singleton <$> charRef , symbol , ltSign ] <?> "inline" @@ -1440,42 +1442,43 @@ escapedChar' = try $ do (guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum)) <|> oneOf "\\`*_{}[]()>#+-.!~\"" -escapedChar :: MarkdownParser Inlines +escapedChar :: MarkdownParser (F Inlines) escapedChar = do result <- escapedChar' case result of - ' ' -> return $ B.str "\160" -- "\ " is a nonbreaking space + ' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space '\n' -> guardEnabled Ext_escaped_line_breaks >> - return B.linebreak -- "\[newline]" is a linebreak - _ -> return $ B.str [result] + return (return B.linebreak) -- "\[newline]" is a linebreak + _ -> return $ return $ B.str [result] -ltSign :: MarkdownParser Inlines +ltSign :: MarkdownParser (F Inlines) ltSign = do guardDisabled Ext_raw_html <|> (notFollowedByHtmlCloser >> notFollowedBy' (htmlTag isBlockTag)) char '<' - return $ B.str "<" + return $ return $ B.str "<" -exampleRef :: MarkdownParser Inlines +exampleRef :: MarkdownParser (F Inlines) exampleRef = try $ do guardEnabled Ext_example_lists char '@' lab <- many1 (alphaNum <|> oneOf "-_") - st <- ask - return $ case M.lookup lab (stateExamples st) of - Just n -> B.str (show n) - Nothing -> B.str ('@':lab) + return $ do + st <- askF + return $ case M.lookup lab (stateExamples st) of + Just n -> B.str (show n) + Nothing -> B.str ('@':lab) -symbol :: MarkdownParser Inlines +symbol :: MarkdownParser (F Inlines) symbol = do result <- noneOf "<\\\n\t " <|> try (do lookAhead $ char '\\' notFollowedBy' (() <$ rawTeXBlock) char '\\') - return $ B.str [result] + return $ return $ B.str [result] -- parses inline code, between n `s and n `s -code :: MarkdownParser Inlines +code :: MarkdownParser (F Inlines) code = try $ do starts <- many1 (char '`') skipSpaces @@ -1485,17 +1488,18 @@ code = try $ do notFollowedBy (char '`'))) attr <- option ([],[],[]) (try $ guardEnabled Ext_inline_code_attributes >> optional whitespace >> attributes) - return $ B.codeWith attr $ trim $ concat result + return $ return $ B.codeWith attr $ trim $ concat result -math :: MarkdownParser Inlines -math = (B.displayMath <$> (mathDisplay >>= applyMacros')) - <|> ((B.math <$> (mathInline >>= applyMacros')) <+?> - ((getOption readerSmart >>= guard) *> apostrophe <* notFollowedBy space)) +math :: MarkdownParser (F Inlines) +math = (return . B.displayMath <$> (mathDisplay >>= applyMacros')) + <|> (return . B.math <$> (mathInline >>= applyMacros')) <+?> + ((getOption readerSmart >>= guard) *> (return <$> apostrophe) + <* notFollowedBy space) -- Parses material enclosed in *s, **s, _s, or __s. -- Designed to avoid backtracking. enclosure :: Char - -> MarkdownParser Inlines + -> MarkdownParser (F Inlines) enclosure c = do -- we can't start an enclosure with _ if after a string and -- the intraword_underscores extension is enabled: @@ -1503,13 +1507,13 @@ enclosure c = do <|> guard (c == '*') <|> (guard =<< notAfterString) cs <- many1 (char c) - (B.str cs <>) <$> whitespace - <|> + (return (B.str cs) <>) <$> whitespace + <|> do case length cs of 3 -> three c 2 -> two c mempty 1 -> one c mempty - _ -> return $ B.str cs + _ -> return (return $ B.str cs) ender :: Char -> Int -> MarkdownParser () ender c n = try $ do @@ -1522,74 +1526,74 @@ ender c n = try $ do -- If one c, emit emph and then parse two. -- If two cs, emit strong and then parse one. -- Otherwise, emit ccc then the results. -three :: Char -> MarkdownParser Inlines +three :: Char -> MarkdownParser (F Inlines) three c = do contents <- mconcat <$> many (notFollowedBy (ender c 1) >> inline) - (ender c 3 >> return ((B.strong . B.emph) contents)) - <|> (ender c 2 >> one c (B.strong contents)) - <|> (ender c 1 >> two c (B.emph contents)) - <|> return (B.str [c,c,c] <> contents) + (ender c 3 >> return ((B.strong . B.emph) <$> contents)) + <|> (ender c 2 >> one c (B.strong <$> contents)) + <|> (ender c 1 >> two c (B.emph <$> contents)) + <|> return (return (B.str [c,c,c]) <> contents) -- Parse inlines til you hit two c's, and emit strong. -- If you never do hit two cs, emit ** plus inlines parsed. -two :: Char -> Inlines -> MarkdownParser Inlines +two :: Char -> F Inlines -> MarkdownParser (F Inlines) two c prefix' = do contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline) - (ender c 2 >> return (B.strong (prefix' <> contents))) - <|> return (B.str [c,c] <> (prefix' <> contents)) + (ender c 2 >> return (B.strong <$> (prefix' <> contents))) + <|> return (return (B.str [c,c]) <> (prefix' <> contents)) -- Parse inlines til you hit a c, and emit emph. -- If you never hit a c, emit * plus inlines parsed. -one :: Char -> Inlines -> MarkdownParser Inlines +one :: Char -> F Inlines -> MarkdownParser (F Inlines) one c prefix' = do contents <- mconcat <$> many ( (notFollowedBy (ender c 1) >> inline) <|> try (string [c,c] >> notFollowedBy (ender c 1) >> two c mempty) ) - (ender c 1 >> return (B.emph (prefix' <> contents))) - <|> return (B.str [c] <> (prefix' <> contents)) + (ender c 1 >> return (B.emph <$> (prefix' <> contents))) + <|> return (return (B.str [c]) <> (prefix' <> contents)) -strongOrEmph :: MarkdownParser Inlines +strongOrEmph :: MarkdownParser (F Inlines) strongOrEmph = enclosure '*' <|> enclosure '_' --- | Parses a list oInlines between start and end delimiters. +-- | Parses a list of inlines between start and end delimiters. inlinesBetween :: (Show b) => MarkdownParser a -> MarkdownParser b - -> MarkdownParser Inlines + -> MarkdownParser (F Inlines) inlinesBetween start end = - (trimInlines . mconcat) <$> try (start >> many1Till inner end) + (trimInlinesF . mconcat) <$> try (start >> many1Till inner end) where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline) innerSpace = try $ whitespace <* notFollowedBy' end -strikeout :: MarkdownParser Inlines -strikeout = B.strikeout <$> +strikeout :: MarkdownParser (F Inlines) +strikeout = fmap B.strikeout <$> (guardEnabled Ext_strikeout >> inlinesBetween strikeStart strikeEnd) where strikeStart = string "~~" >> lookAhead nonspaceChar >> notFollowedBy (char '~') strikeEnd = try $ string "~~" -superscript :: MarkdownParser Inlines -superscript = B.superscript <$> try (do +superscript :: MarkdownParser (F Inlines) +superscript = fmap B.superscript <$> try (do guardEnabled Ext_superscript char '^' mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '^')) -subscript :: MarkdownParser Inlines -subscript = B.subscript <$> try (do +subscript :: MarkdownParser (F Inlines) +subscript = fmap B.subscript <$> try (do guardEnabled Ext_subscript char '~' mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '~')) -whitespace :: MarkdownParser Inlines -whitespace = spaceChar >> (lb <|> regsp) <?> "whitespace" +whitespace :: MarkdownParser (F Inlines) +whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace" where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak) regsp = skipMany spaceChar >> return B.space -nonEndline :: Monad m => ParserT [Char] st m Char +nonEndline :: Parser [Char] st Char nonEndline = satisfy (/='\n') -str :: MarkdownParser Inlines +str :: MarkdownParser (F Inlines) str = do result <- many1 alphaNum updateLastStrPos @@ -1597,14 +1601,14 @@ str = do isSmart <- getOption readerSmart if isSmart then case likelyAbbrev result of - [] -> return $ B.str result + [] -> return $ return $ B.str result xs -> choice (map (\x -> try (string x >> oneOf " \n" >> lookAhead alphaNum >> - return (B.str $ - result ++ spacesToNbr x ++ "\160"))) xs) - <|> (return $ B.str result) - else return $ B.str result + return (return $ B.str + $ result ++ spacesToNbr x ++ "\160"))) xs) + <|> (return $ return $ B.str result) + else return $ return $ B.str result -- | if the string matches the beginning of an abbreviation (before -- the first period, return strings that would finish the abbreviation. @@ -1619,7 +1623,7 @@ likelyAbbrev x = in map snd $ filter (\(y,_) -> y == x) abbrPairs -- an endline character that can be treated as a space, not a structural break -endline :: MarkdownParser Inlines +endline :: MarkdownParser (F Inlines) endline = try $ do newline notFollowedBy blankline @@ -1632,18 +1636,18 @@ endline = try $ do notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced)) notFollowedByHtmlCloser (eof >> return mempty) - <|> (guardEnabled Ext_hard_line_breaks >> return B.linebreak) + <|> (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak)) <|> (guardEnabled Ext_ignore_line_breaks >> return mempty) - <|> return B.space + <|> (return $ return B.space) -- -- links -- -- a reference label for a link -reference :: MarkdownParser (Inlines, String) +reference :: MarkdownParser (F Inlines, String) reference = do notFollowedBy' (string "[^") -- footnote reference - withRaw $ trimInlines <$> inlinesInBalancedBrackets + withRaw $ trimInlinesF <$> inlinesInBalancedBrackets parenthesizedChars :: MarkdownParser [Char] parenthesizedChars = do @@ -1671,7 +1675,7 @@ source = do linkTitle :: MarkdownParser String linkTitle = quotedTitle '"' <|> quotedTitle '\'' -link :: MarkdownParser Inlines +link :: MarkdownParser (F Inlines) link = try $ do st <- getState guard $ stateAllowLinks st @@ -1681,14 +1685,14 @@ link = try $ do regLink B.link lab <|> referenceLink B.link (lab,raw) regLink :: (String -> String -> Inlines -> Inlines) - -> Inlines -> MarkdownParser Inlines + -> F Inlines -> MarkdownParser (F Inlines) regLink constructor lab = try $ do (src, tit) <- source - return $ constructor src tit lab + return $ constructor src tit <$> lab -- a link like [this][ref] or [this][] or [this] referenceLink :: (String -> String -> Inlines -> Inlines) - -> (Inlines, String) -> MarkdownParser Inlines + -> (F Inlines, String) -> MarkdownParser (F Inlines) referenceLink constructor (lab, raw) = do sp <- (True <$ lookAhead (char ' ')) <|> return False (ref,raw') <- option (mempty, "") $ @@ -1702,22 +1706,24 @@ referenceLink constructor (lab, raw) = do fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw implicitHeaderRefs <- option False $ True <$ guardEnabled Ext_implicit_header_references - let makeFallback = - B.str "[" <> fallback <> B.str "]" <> + let makeFallback = do + parsedRaw' <- parsedRaw + fallback' <- fallback + return $ B.str "[" <> fallback' <> B.str "]" <> (if sp && not (null raw) then B.space else mempty) <> - parsedRaw - keys <- asks stateKeys - headers <- asks stateHeaders - return $ - case M.lookup key keys of - Nothing -> - let ref' = if labIsRef then lab else ref in - if implicitHeaderRefs - then case M.lookup ref' headers of - Just ident -> constructor ('#':ident) "" lab - Nothing -> makeFallback - else makeFallback - Just (src,tit) -> constructor src tit lab + parsedRaw' + return $ do + keys <- asksF stateKeys + case M.lookup key keys of + Nothing -> do + headers <- asksF stateHeaders + ref' <- if labIsRef then lab else ref + if implicitHeaderRefs + then case M.lookup ref' headers of + Just ident -> constructor ('#':ident) "" <$> lab + Nothing -> makeFallback + else makeFallback + Just (src,tit) -> constructor src tit <$> lab dropBrackets :: String -> String dropBrackets = reverse . dropRB . reverse . dropLB @@ -1726,14 +1732,14 @@ dropBrackets = reverse . dropRB . reverse . dropLB dropLB ('[':xs) = xs dropLB xs = xs -bareURL :: MarkdownParser Inlines +bareURL :: MarkdownParser (F Inlines) bareURL = try $ do guardEnabled Ext_autolink_bare_uris (orig, src) <- uri <|> emailAddress notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a") - return $ B.link src "" (B.str orig) + return $ return $ B.link src "" (B.str orig) -autoLink :: MarkdownParser Inlines +autoLink :: MarkdownParser (F Inlines) autoLink = try $ do char '<' (orig, src) <- uri <|> emailAddress @@ -1742,9 +1748,9 @@ autoLink = try $ do -- final punctuation. for example: in `<http://hi---there>`, -- the URI parser will stop before the dashes. extra <- fromEntities <$> manyTill nonspaceChar (char '>') - return $ B.link (src ++ escapeURI extra) "" (B.str $ orig ++ extra) + return $ return $ B.link (src ++ escapeURI extra) "" (B.str $ orig ++ extra) -image :: MarkdownParser Inlines +image :: MarkdownParser (F Inlines) image = try $ do char '!' (lab,raw) <- reference @@ -1754,33 +1760,38 @@ image = try $ do _ -> B.image src regLink constructor lab <|> referenceLink constructor (lab,raw) -note :: MarkdownParser Inlines +note :: MarkdownParser (F Inlines) note = try $ do guardEnabled Ext_footnotes - (stateInFootnote <$> getState) >>= guard . not ref <- noteMarker - notes <- asks stateNotes' - return $ + return $ do + notes <- asksF stateNotes' case lookup ref notes of - Nothing -> B.str $ "[^" ++ ref ++ "]" - Just contents -> B.note contents - -inlineNote :: MarkdownParser Inlines + Nothing -> return $ B.str $ "[^" ++ ref ++ "]" + Just contents -> do + st <- askF + -- process the note in a context that doesn't resolve + -- notes, to avoid infinite looping with notes inside + -- notes: + let contents' = runF contents st{ stateNotes' = [] } + return $ B.note contents' + +inlineNote :: MarkdownParser (F Inlines) inlineNote = try $ do guardEnabled Ext_inline_notes char '^' contents <- inlinesInBalancedBrackets - return . B.note . B.para $ contents + return $ B.note . B.para <$> contents -rawLaTeXInline' :: MarkdownParser Inlines +rawLaTeXInline' :: MarkdownParser (F Inlines) rawLaTeXInline' = try $ do guardEnabled Ext_raw_tex lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env - RawInline _ s <- generalize rawLaTeXInline - return $ B.rawInline "tex" s + RawInline _ s <- rawLaTeXInline + return $ return $ B.rawInline "tex" s -- "tex" because it might be context or latex -rawConTeXtEnvironment :: Monad m => ParserT [Char] st m String +rawConTeXtEnvironment :: Parser [Char] st String rawConTeXtEnvironment = try $ do string "\\start" completion <- inBrackets (letter <|> digit <|> spaceChar) @@ -1789,14 +1800,14 @@ rawConTeXtEnvironment = try $ do (try $ string "\\stop" >> string completion) return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion -inBrackets :: Monad m => (ParserT [Char] st m Char) -> ParserT [Char] st m String +inBrackets :: (Parser [Char] st Char) -> Parser [Char] st String inBrackets parser = do char '[' contents <- many parser char ']' return $ "[" ++ contents ++ "]" -spanHtml :: MarkdownParser Inlines +spanHtml :: MarkdownParser (F Inlines) spanHtml = try $ do guardEnabled Ext_native_spans (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" []) @@ -1808,10 +1819,10 @@ spanHtml = try $ do Just s | null ident && null classes && map toLower (filter (`notElem` " \t;") s) == "font-variant:small-caps" - -> return $ B.smallcaps contents - _ -> return $ B.spanWith (ident, classes, keyvals) contents + -> return $ B.smallcaps <$> contents + _ -> return $ B.spanWith (ident, classes, keyvals) <$> contents -divHtml :: MarkdownParser Blocks +divHtml :: MarkdownParser (F Blocks) divHtml = try $ do guardEnabled Ext_native_divs (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" []) @@ -1829,11 +1840,11 @@ divHtml = try $ do let ident = fromMaybe "" $ lookup "id" attrs let classes = maybe [] words $ lookup "class" attrs let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] - return $ B.divWith (ident, classes, keyvals) contents + return $ B.divWith (ident, classes, keyvals) <$> contents else -- avoid backtracing - return $ B.rawBlock "html" (rawtag <> bls) <> contents + return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents -rawHtmlInline :: MarkdownParser Inlines +rawHtmlInline :: MarkdownParser (F Inlines) rawHtmlInline = do guardEnabled Ext_raw_html inHtmlBlock <- stateInHtmlBlock <$> getState @@ -1848,17 +1859,19 @@ rawHtmlInline = do then (\x -> isInlineTag x && not (isCloseBlockTag x)) else not . isTextTag - return $ B.rawInline "html" result + return $ return $ B.rawInline "html" result -- Citations -cite :: MarkdownParser Inlines +cite :: MarkdownParser (F Inlines) cite = do guardEnabled Ext_citations - textualCite <|> do (cs, raw) <- withRaw normalCite - return $ B.cite cs (B.text raw) + citations <- textualCite + <|> do (cs, raw) <- withRaw normalCite + return $ (flip B.cite (B.text raw)) <$> cs + return citations -textualCite :: MarkdownParser Inlines +textualCite :: MarkdownParser (F Inlines) textualCite = try $ do (_, key) <- citeKey let first = Citation{ citationId = key @@ -1872,26 +1885,29 @@ textualCite = try $ do case mbrest of Just (rest, raw) -> return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw) . (first:)) - rest + <$> rest Nothing -> (do (cs, raw) <- withRaw $ bareloc first - return $ B.cite cs (B.text $ '@':key ++ " " ++ raw)) - <|> do st <- ask - return $ case M.lookup key (stateExamples st) of - Just n -> B.str (show n) - _ -> B.cite [first] $ B.str $ '@':key + return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw)) <$> cs) + <|> return (do st <- askF + return $ case M.lookup key (stateExamples st) of + Just n -> B.str (show n) + _ -> B.cite [first] $ B.str $ '@':key) -bareloc :: Citation -> MarkdownParser [Citation] +bareloc :: Citation -> MarkdownParser (F [Citation]) bareloc c = try $ do spnl char '[' suff <- suffix - rest <- option [] $ try $ char ';' >> citeList + rest <- option (return []) $ try $ char ';' >> citeList spnl char ']' - return $ c{ citationSuffix = B.toList suff } : rest + return $ do + suff' <- suff + rest' <- rest + return $ c{ citationSuffix = B.toList suff' } : rest' -normalCite :: MarkdownParser [Citation] +normalCite :: MarkdownParser (F [Citation]) normalCite = try $ do char '[' spnl @@ -1900,57 +1916,60 @@ normalCite = try $ do char ']' return citations -suffix :: MarkdownParser Inlines +suffix :: MarkdownParser (F Inlines) suffix = try $ do hasSpace <- option False (notFollowedBy nonspaceChar >> return True) spnl - rest <- trimInlines . mconcat <$> many (notFollowedBy (oneOf ";]") >> inline) + rest <- trimInlinesF . mconcat <$> many (notFollowedBy (oneOf ";]") >> inline) return $ if hasSpace - then B.space <> rest + then (B.space <>) <$> rest else rest -prefix :: MarkdownParser Inlines -prefix = trimInlines . mconcat <$> +prefix :: MarkdownParser (F Inlines) +prefix = trimInlinesF . mconcat <$> manyTill inline (char ']' <|> liftM (const ']') (lookAhead citeKey)) -citeList :: MarkdownParser [Citation] -citeList = sepBy1 citation (try $ char ';' >> spnl) +citeList :: MarkdownParser (F [Citation]) +citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl) -citation :: MarkdownParser Citation +citation :: MarkdownParser (F Citation) citation = try $ do pref <- prefix (suppress_author, key) <- citeKey suff <- suffix - return Citation{ citationId = key - , citationPrefix = B.toList pref - , citationSuffix = B.toList suff - , citationMode = if suppress_author - then SuppressAuthor - else NormalCitation - , citationNoteNum = 0 - , citationHash = 0 - } - -smart :: MarkdownParser Inlines + return $ do + x <- pref + y <- suff + return $ Citation{ citationId = key + , citationPrefix = B.toList x + , citationSuffix = B.toList y + , citationMode = if suppress_author + then SuppressAuthor + else NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } + +smart :: MarkdownParser (F Inlines) smart = do getOption readerSmart >>= guard doubleQuoted <|> singleQuoted <|> - choice [apostrophe, dash, ellipses] + choice (map (return <$>) [apostrophe, dash, ellipses]) -singleQuoted :: MarkdownParser Inlines +singleQuoted :: MarkdownParser (F Inlines) singleQuoted = try $ do singleQuoteStart withQuoteContext InSingleQuote $ - B.singleQuoted . trimInlines . mconcat <$> + fmap B.singleQuoted . trimInlinesF . mconcat <$> many1Till inline singleQuoteEnd -- doubleQuoted will handle regular double-quoted sections, as well -- as dialogues with an open double-quote without a close double-quote -- in the same paragraph. -doubleQuoted :: MarkdownParser Inlines +doubleQuoted :: MarkdownParser (F Inlines) doubleQuoted = try $ do doubleQuoteStart contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) - (withQuoteContext InDoubleQuote doubleQuoteEnd >> return - (B.doubleQuoted . trimInlines $ contents)) - <|> return (B.str "\8220" <> contents) + (withQuoteContext InDoubleQuote $ doubleQuoteEnd >> return + (fmap B.doubleQuoted . trimInlinesF $ contents)) + <|> (return $ return (B.str "\8220") <> contents) diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index fc63cc11e..ad9dc3ee8 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -1,9 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE MultiParamTypeClasses, FlexibleContexts, FlexibleInstances #-} {- Copyright (C) 2014 Albert Krewinkel <tarleb@moltkeplatz.de> @@ -39,7 +36,8 @@ import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..), (<>) import Text.Pandoc.Definition import Text.Pandoc.Options import qualified Text.Pandoc.Parsing as P -import Text.Pandoc.Parsing hiding ( newline, orderedListMarker +import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF + , newline, orderedListMarker , parseFromString, blanklines ) import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) @@ -47,17 +45,17 @@ import Text.Pandoc.Shared (compactify', compactify'DL) import Text.TeXMath (readTeX, writePandoc, DisplayType(..)) import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap -import Control.Applicative ( pure +import Control.Applicative ( Applicative, pure , (<$>), (<$), (<*>), (<*), (*>) ) import Control.Arrow (first) -import Control.Monad (guard, mplus, mzero, when) -import Control.Monad.Reader (Reader, runReader, asks, local) +import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when) +import Control.Monad.Reader (Reader, runReader, ask, asks, local) import Data.Char (isAlphaNum, toLower) import Data.Default -import Data.List (intersperse, isPrefixOf, isSuffixOf, foldl') +import Data.List (intersperse, isPrefixOf, isSuffixOf) import qualified Data.Map as M import Data.Maybe (fromMaybe, isJust) -import Data.Monoid (mconcat, mempty, mappend) +import Data.Monoid (Monoid, mconcat, mempty, mappend) import Network.HTTP (urlEncode) import Text.Pandoc.Error @@ -66,28 +64,19 @@ import Text.Pandoc.Error readOrg :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) -> Either PandocError Pandoc -readOrg opts s = runOrg opts s parseOrg +readOrg opts s = flip runReader def $ readWithM parseOrg def{ orgStateOptions = opts } (s ++ "\n\n") -data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext - , finalState :: OrgParserState } +data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext } type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal) -runOrg :: ReaderOptions -> String -> OrgParser a -> Either PandocError a -runOrg opts inp p = fst <$> res - where - imd = readWithM (returnState p) def{ orgStateOptions = opts } (inp ++ "\n\n") - res = runReader imd def { finalState = s } - s :: OrgParserState - s = either def snd res - parseOrg :: OrgParser Pandoc parseOrg = do blocks' <- parseBlocks st <- getState - let meta = orgStateMeta st + let meta = runF (orgStateMeta' st) st let removeUnwantedBlocks = dropCommentTrees . filter (/= Null) - return $ Pandoc meta $ removeUnwantedBlocks (B.toList $ blocks') + return $ Pandoc meta $ removeUnwantedBlocks (B.toList $ runF blocks' st) -- | Drop COMMENT headers and the document tree below those headers. dropCommentTrees :: [Block] -> [Block] @@ -117,7 +106,7 @@ isHeaderLevelLowerEq n blk = -- Parser State for Org -- -type OrgNoteRecord = (String, Blocks) +type OrgNoteRecord = (String, F Blocks) type OrgNoteTable = [OrgNoteRecord] type OrgBlockAttributes = M.Map String String @@ -136,11 +125,12 @@ data OrgParserState = OrgParserState , orgStateLastStrPos :: Maybe SourcePos , orgStateLinkFormatters :: OrgLinkFormatters , orgStateMeta :: Meta + , orgStateMeta' :: F Meta , orgStateNotes' :: OrgNoteTable } instance Default OrgParserLocal where - def = OrgParserLocal NoQuote def + def = OrgParserLocal NoQuote instance HasReaderOptions OrgParserState where extractReaderOptions = orgStateOptions @@ -174,13 +164,13 @@ defaultOrgParserState = OrgParserState , orgStateLastStrPos = Nothing , orgStateLinkFormatters = M.empty , orgStateMeta = nullMeta + , orgStateMeta' = return nullMeta , orgStateNotes' = [] } recordAnchorId :: String -> OrgParser () recordAnchorId i = updateState $ \s -> - let as = orgStateAnchorIds s in - s{ orgStateAnchorIds = i : as } + s{ orgStateAnchorIds = i : (orgStateAnchorIds s) } addBlockAttribute :: String -> String -> OrgParser () addBlockAttribute key val = updateState $ \s -> @@ -259,6 +249,30 @@ parseFromString parser str' = do -- Adaptions and specializations of parsing utilities -- +newtype F a = F { unF :: Reader OrgParserState a + } deriving (Monad, Applicative, Functor) + +runF :: F a -> OrgParserState -> a +runF = runReader . unF + +askF :: F OrgParserState +askF = F ask + +asksF :: (OrgParserState -> a) -> F a +asksF f = F $ asks f + +instance Monoid a => Monoid (F a) where + mempty = return mempty + mappend = liftM2 mappend + mconcat = fmap mconcat . sequence + +trimInlinesF :: F Inlines -> F Inlines +trimInlinesF = liftM trimInlines + +returnF :: a -> OrgParser (F a) +returnF = return . return + + -- | Like @Text.Parsec.Char.newline@, but causes additional state changes. newline :: OrgParser Char newline = @@ -277,10 +291,10 @@ blanklines = -- parsing blocks -- -parseBlocks :: OrgParser Blocks +parseBlocks :: OrgParser (F Blocks) parseBlocks = mconcat <$> manyTill block eof -block :: OrgParser Blocks +block :: OrgParser (F Blocks) block = choice [ mempty <$ blanklines , optionalAttributes $ choice [ orgBlock @@ -291,14 +305,14 @@ block = choice [ mempty <$ blanklines , drawer , specialLine , header - , hline + , return <$> hline , list , latexFragment , noteBlock , paraOrPlain ] <?> "block" -optionalAttributes :: OrgParser Blocks -> OrgParser Blocks +optionalAttributes :: OrgParser (F Blocks) -> OrgParser (F Blocks) optionalAttributes parser = try $ resetBlockAttributes *> parseBlockAttributes *> parser @@ -318,7 +332,7 @@ parseAndAddAttribute key value = do let key' = map toLower key () <$ addBlockAttribute key' value -lookupInlinesAttr :: String -> OrgParser (Maybe Inlines) +lookupInlinesAttr :: String -> OrgParser (Maybe (F Inlines)) lookupInlinesAttr attr = try $ do val <- lookupBlockAttribute attr maybe (return Nothing) @@ -332,20 +346,20 @@ lookupInlinesAttr attr = try $ do type BlockProperties = (Int, String) -- (Indentation, Block-Type) -orgBlock :: OrgParser Blocks +orgBlock :: OrgParser (F Blocks) orgBlock = try $ do blockProp@(_, blkType) <- blockHeaderStart ($ blockProp) $ case blkType of "comment" -> withRaw' (const mempty) - "html" -> withRaw' (B.rawBlock blkType) - "latex" -> withRaw' (B.rawBlock blkType) - "ascii" -> withRaw' (B.rawBlock blkType) - "example" -> withRaw' exampleCode - "quote" -> withParsed B.blockQuote + "html" -> withRaw' (return . (B.rawBlock blkType)) + "latex" -> withRaw' (return . (B.rawBlock blkType)) + "ascii" -> withRaw' (return . (B.rawBlock blkType)) + "example" -> withRaw' (return . exampleCode) + "quote" -> withParsed (fmap B.blockQuote) "verse" -> verseBlock "src" -> codeBlock - _ -> withParsed (divWithClass blkType) + _ -> withParsed (fmap $ divWithClass blkType) blockHeaderStart :: OrgParser (Int, String) blockHeaderStart = try $ (,) <$> indent <*> blockType @@ -353,10 +367,10 @@ blockHeaderStart = try $ (,) <$> indent <*> blockType indent = length <$> many spaceChar blockType = map toLower <$> (stringAnyCase "#+begin_" *> orgArgWord) -withRaw' :: (String -> Blocks) -> BlockProperties -> OrgParser Blocks +withRaw' :: (String -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) withRaw' f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp)) -withParsed :: (Blocks -> Blocks) -> BlockProperties -> OrgParser Blocks +withParsed :: (F Blocks -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) withParsed f blockProp = (ignHeaders *> (f <$> parsedBlockContent blockProp)) ignHeaders :: OrgParser () @@ -365,11 +379,11 @@ ignHeaders = (() <$ newline) <|> (() <$ anyLine) divWithClass :: String -> Blocks -> Blocks divWithClass cls = B.divWith ("", [cls], []) -verseBlock :: BlockProperties -> OrgParser Blocks +verseBlock :: BlockProperties -> OrgParser (F Blocks) verseBlock blkProp = try $ do ignHeaders content <- rawBlockContent blkProp - B.para . mconcat . intersperse B.linebreak + fmap B.para . mconcat . intersperse (pure B.linebreak) <$> mapM (parseFromString parseInlines) (lines content) exportsCode :: [(String, String)] -> Bool @@ -386,7 +400,7 @@ followingResultsBlock = *> blankline *> (unlines <$> many1 exampleLine)) -codeBlock :: BlockProperties -> OrgParser Blocks +codeBlock :: BlockProperties -> OrgParser (F Blocks) codeBlock blkProp = do skipSpaces (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders) @@ -396,15 +410,17 @@ codeBlock blkProp = do let includeCode = exportsCode kv let includeResults = exportsResults kv let codeBlck = B.codeBlockWith ( id', classes, kv ) content - labelledBlck <- maybe codeBlck (labelDiv codeBlck) + labelledBlck <- maybe (pure codeBlck) + (labelDiv codeBlck) <$> lookupInlinesAttr "caption" - let resultBlck = maybe mempty exampleCode resultsContent + let resultBlck = pure $ maybe mempty (exampleCode) resultsContent return $ (if includeCode then labelledBlck else mempty) <> (if includeResults then resultBlck else mempty) where labelDiv blk value = - B.divWith nullAttr (labelledBlock value <> blk) - labelledBlock = B.plain . B.spanWith ("", ["label"], []) + B.divWith nullAttr <$> (mappend <$> labelledBlock value + <*> pure blk) + labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], [])) rawBlockContent :: BlockProperties -> OrgParser String rawBlockContent (indent, blockType) = try $ @@ -413,7 +429,7 @@ rawBlockContent (indent, blockType) = try $ indentedLine = try $ ("" <$ blankline) <|> (indentWith indent *> anyLine) blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType) -parsedBlockContent :: BlockProperties -> OrgParser Blocks +parsedBlockContent :: BlockProperties -> OrgParser (F Blocks) parsedBlockContent blkProps = try $ do raw <- rawBlockContent blkProps parseFromString parseBlocks (raw ++ "\n") @@ -504,9 +520,9 @@ commaEscaped (',':cs@('*':_)) = cs commaEscaped (',':cs@('#':'+':_)) = cs commaEscaped cs = cs -example :: OrgParser Blocks +example :: OrgParser (F Blocks) example = try $ do - return . exampleCode =<< unlines <$> many1 exampleLine + return . return . exampleCode =<< unlines <$> many1 exampleLine exampleCode :: String -> Blocks exampleCode = B.codeBlockWith ("", ["example"], []) @@ -515,7 +531,7 @@ exampleLine :: OrgParser String exampleLine = try $ skipSpaces *> string ": " *> anyLine -- Drawers for properties or a logbook -drawer :: OrgParser Blocks +drawer :: OrgParser (F Blocks) drawer = try $ do drawerStart manyTill drawerLine (try drawerEnd) @@ -541,12 +557,14 @@ drawerEnd = try $ -- -- Figures (Image on a line by itself, preceded by name and/or caption) -figure :: OrgParser Blocks +figure :: OrgParser (F Blocks) figure = try $ do (cap, nam) <- nameAndCaption src <- skipSpaces *> selfTarget <* skipSpaces <* P.newline guard (isImageFilename src) - return $ B.para $ B.image src nam cap + return $ do + cap' <- cap + return $ B.para $ B.image src nam cap' where nameAndCaption = do @@ -562,8 +580,8 @@ figure = try $ do -- -- Comments, Options and Metadata -specialLine :: OrgParser Blocks -specialLine = try $ metaLine <|> commentLine +specialLine :: OrgParser (F Blocks) +specialLine = fmap return . try $ metaLine <|> commentLine metaLine :: OrgParser Blocks metaLine = try $ mempty @@ -583,14 +601,14 @@ commentLineStart = try $ mappend <$> many spaceChar <*> string "# " declarationLine :: OrgParser () declarationLine = try $ do key <- metaKey - inlines <- metaInlines + inlinesF <- metaInlines updateState $ \st -> - let meta' = B.setMeta key inlines nullMeta - in st { orgStateMeta = orgStateMeta st <> meta' } + let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta + in st { orgStateMeta' = orgStateMeta' st <> meta' } return () -metaInlines :: OrgParser MetaValue -metaInlines = (MetaInlines . B.toList) <$> inlinesTillNewline +metaInlines :: OrgParser (F MetaValue) +metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline metaKey :: OrgParser String metaKey = map toLower <$> many1 (noneOf ": \n\r") @@ -631,11 +649,11 @@ parseFormat = try $ do -- -- | Headers -header :: OrgParser Blocks +header :: OrgParser (F Blocks) header = try $ do level <- headerStart title <- inlinesTillNewline - return $ B.header level title + return $ B.header level <$> title headerStart :: OrgParser Int headerStart = try $ @@ -659,7 +677,7 @@ hline = try $ do -- Tables -- -data OrgTableRow = OrgContentRow [Blocks] +data OrgTableRow = OrgContentRow (F [Blocks]) | OrgAlignRow [Alignment] | OrgHlineRow @@ -670,13 +688,13 @@ data OrgTable = OrgTable , orgTableRows :: [[Blocks]] } -table :: OrgParser Blocks +table :: OrgParser (F Blocks) table = try $ do lookAhead tableStart do rows <- tableRows - (cptn :: Inlines) <- fromMaybe "" <$> lookupInlinesAttr "caption" - return $ ($ cptn) . orgToPandocTable . normalizeTable . rowsToTable $ rows + cptn <- fromMaybe (pure "") <$> lookupInlinesAttr "caption" + return $ (<$> cptn) . orgToPandocTable . normalizeTable =<< rowsToTable rows orgToPandocTable :: OrgTable -> Inlines @@ -692,11 +710,11 @@ tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow) tableContentRow :: OrgParser OrgTableRow tableContentRow = try $ - OrgContentRow <$> (tableStart *> manyTill tableContentCell newline) + OrgContentRow . sequence <$> (tableStart *> manyTill tableContentCell newline) -tableContentCell :: OrgParser Blocks +tableContentCell :: OrgParser (F Blocks) tableContentCell = try $ - B.plain . trimInlines . mconcat <$> many1Till inline endOfCell + fmap B.plain . trimInlinesF . mconcat <$> many1Till inline endOfCell endOfCell :: OrgParser Char endOfCell = try $ char '|' <|> lookAhead newline @@ -728,8 +746,8 @@ tableHline = try $ OrgHlineRow <$ (tableStart *> char '-' *> anyLine) rowsToTable :: [OrgTableRow] - -> OrgTable -rowsToTable = foldl' (flip rowToContent) zeroTable + -> F OrgTable +rowsToTable = foldM (flip rowToContent) zeroTable where zeroTable = OrgTable 0 mempty mempty mempty normalizeTable :: OrgTable @@ -748,43 +766,45 @@ normalizeTable (OrgTable cols aligns heads lns) = -- line as a header. All other horizontal lines are discarded. rowToContent :: OrgTableRow -> OrgTable - -> OrgTable + -> F OrgTable rowToContent OrgHlineRow t = maybeBodyToHeader t -rowToContent (OrgAlignRow as) t = setLongestRow as . setAligns as $ t -rowToContent (OrgContentRow rf) t = setLongestRow rf . appendToBody rf $ t +rowToContent (OrgAlignRow as) t = setLongestRow as =<< setAligns as t +rowToContent (OrgContentRow rf) t = do + rs <- rf + setLongestRow rs =<< appendToBody rs t setLongestRow :: [a] -> OrgTable - -> OrgTable + -> F OrgTable setLongestRow rs t = - t{ orgTableColumns = max (length rs) (orgTableColumns t) } + return t{ orgTableColumns = max (length rs) (orgTableColumns t) } maybeBodyToHeader :: OrgTable - -> OrgTable + -> F OrgTable maybeBodyToHeader t = case t of OrgTable{ orgTableHeader = [], orgTableRows = b:[] } -> - t{ orgTableHeader = b , orgTableRows = [] } - _ -> t + return t{ orgTableHeader = b , orgTableRows = [] } + _ -> return t appendToBody :: [Blocks] -> OrgTable - -> OrgTable -appendToBody r t = t{ orgTableRows = orgTableRows t ++ [r] } + -> F OrgTable +appendToBody r t = return t{ orgTableRows = orgTableRows t ++ [r] } setAligns :: [Alignment] -> OrgTable - -> OrgTable -setAligns aligns t = t{ orgTableAlignments = aligns } + -> F OrgTable +setAligns aligns t = return $ t{ orgTableAlignments = aligns } -- -- LaTeX fragments -- -latexFragment :: OrgParser Blocks +latexFragment :: OrgParser (F Blocks) latexFragment = try $ do envName <- latexEnvStart content <- mconcat <$> manyTill anyLineNewline (latexEnd envName) - return $ B.rawBlock "latex" (content `inLatexEnv` envName) + return . return $ B.rawBlock "latex" (content `inLatexEnv` envName) where c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n" , c @@ -814,7 +834,7 @@ latexEnvName = try $ do -- -- Footnote defintions -- -noteBlock :: OrgParser Blocks +noteBlock :: OrgParser (F Blocks) noteBlock = try $ do ref <- noteMarker <* skipSpaces content <- mconcat <$> blocksTillHeaderOrNote @@ -826,37 +846,37 @@ noteBlock = try $ do <|> () <$ lookAhead headerStart) -- Paragraphs or Plain text -paraOrPlain :: OrgParser Blocks +paraOrPlain :: OrgParser (F Blocks) paraOrPlain = try $ do ils <- parseInlines nl <- option False (newline >> return True) try (guard nl >> notFollowedBy (orderedListStart <|> bulletListStart) >> - (return $ B.para ils)) - <|> (return $ B.plain ils) + return (B.para <$> ils)) + <|> (return (B.plain <$> ils)) -inlinesTillNewline :: OrgParser Inlines -inlinesTillNewline = trimInlines . mconcat <$> manyTill inline newline +inlinesTillNewline :: OrgParser (F Inlines) +inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline -- -- list blocks -- -list :: OrgParser Blocks +list :: OrgParser (F Blocks) list = choice [ definitionList, bulletList, orderedList ] <?> "list" -definitionList :: OrgParser Blocks +definitionList :: OrgParser (F Blocks) definitionList = try $ do n <- lookAhead (bulletListStart' Nothing) - B.definitionList . compactify'DL + fmap B.definitionList . fmap compactify'DL . sequence <$> many1 (definitionListItem $ bulletListStart' (Just n)) -bulletList :: OrgParser Blocks +bulletList :: OrgParser (F Blocks) bulletList = try $ do n <- lookAhead (bulletListStart' Nothing) - B.bulletList . compactify' + fmap B.bulletList . fmap compactify' . sequence <$> many1 (listItem (bulletListStart' $ Just n)) -orderedList :: OrgParser Blocks -orderedList = B.orderedList . compactify' +orderedList :: OrgParser (F Blocks) +orderedList = fmap B.orderedList . fmap compactify' . sequence <$> many1 (listItem orderedListStart) genericListStart :: OrgParser String @@ -893,7 +913,7 @@ orderedListStart = genericListStart orderedListMarker where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)") definitionListItem :: OrgParser Int - -> OrgParser (Inlines, [Blocks]) + -> OrgParser (F (Inlines, [Blocks])) definitionListItem parseMarkerGetLength = try $ do markerLength <- parseMarkerGetLength term <- manyTill (noneOf "\n\r") (try $ string "::") @@ -902,12 +922,12 @@ definitionListItem parseMarkerGetLength = try $ do cont <- concat <$> many (listContinuation markerLength) term' <- parseFromString parseInlines term contents' <- parseFromString parseBlocks $ line1 ++ blank ++ cont - return (term', [contents']) + return $ (,) <$> term' <*> fmap (:[]) contents' -- parse raw text for one list item, excluding start marker and continuations listItem :: OrgParser Int - -> OrgParser Blocks + -> OrgParser (F Blocks) listItem start = try $ do markerLength <- try start firstLine <- anyLineNewline @@ -933,7 +953,7 @@ anyLineNewline = (++ "\n") <$> anyLine -- inline -- -inline :: OrgParser Inlines +inline :: OrgParser (F Inlines) inline = choice [ whitespace , linebreak @@ -960,31 +980,31 @@ inline = ] <* (guard =<< newlinesCountWithinLimits) <?> "inline" -parseInlines :: OrgParser Inlines -parseInlines = trimInlines . mconcat <$> many1 inline +parseInlines :: OrgParser (F Inlines) +parseInlines = trimInlinesF . mconcat <$> many1 inline -- treat these as potentially non-text when parsing inline: specialChars :: [Char] specialChars = "\"$'()*+-,./:<=>[\\]^_{|}~" -whitespace :: OrgParser Inlines -whitespace = B.space <$ skipMany1 spaceChar +whitespace :: OrgParser (F Inlines) +whitespace = pure B.space <$ skipMany1 spaceChar <* updateLastPreCharPos <* updateLastForbiddenCharPos <?> "whitespace" -linebreak :: OrgParser Inlines -linebreak = try $ B.linebreak <$ string "\\\\" <* skipSpaces <* newline +linebreak :: OrgParser (F Inlines) +linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline -str :: OrgParser Inlines -str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") +str :: OrgParser (F Inlines) +str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") <* updateLastStrPos -- | An endline character that can be treated as a space, not a structural -- break. This should reflect the values of the Emacs variable -- @org-element-pagaraph-separate@. -endline :: OrgParser Inlines +endline :: OrgParser (F Inlines) endline = try $ do newline notFollowedBy blankline @@ -1002,72 +1022,77 @@ endline = try $ do decEmphasisNewlinesCount guard =<< newlinesCountWithinLimits updateLastPreCharPos - return $ B.space + return . return $ B.space -cite :: OrgParser Inlines +cite :: OrgParser (F Inlines) cite = try $ do guardEnabled Ext_citations (cs, raw) <- withRaw normalCite - return $ flip B.cite (B.text raw) cs + return $ (flip B.cite (B.text raw)) <$> cs -normalCite :: OrgParser [Citation] +normalCite :: OrgParser (F [Citation]) normalCite = try $ char '[' *> skipSpaces *> citeList <* skipSpaces <* char ']' -citeList :: OrgParser [Citation] -citeList = sepBy1 citation (try $ char ';' *> skipSpaces) +citeList :: OrgParser (F [Citation]) +citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces) -citation :: OrgParser Citation +citation :: OrgParser (F Citation) citation = try $ do pref <- prefix (suppress_author, key) <- citeKey suff <- suffix - return $ Citation{ citationId = key - , citationPrefix = B.toList pref - , citationSuffix = B.toList suff - , citationMode = if suppress_author - then SuppressAuthor - else NormalCitation - , citationNoteNum = 0 - , citationHash = 0 - } + return $ do + x <- pref + y <- suff + return $ Citation{ citationId = key + , citationPrefix = B.toList x + , citationSuffix = B.toList y + , citationMode = if suppress_author + then SuppressAuthor + else NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } where - prefix = trimInlines . mconcat <$> + prefix = trimInlinesF . mconcat <$> manyTill inline (char ']' <|> (']' <$ lookAhead citeKey)) suffix = try $ do hasSpace <- option False (notFollowedBy nonspaceChar >> return True) skipSpaces - rest <- trimInlines . mconcat <$> + rest <- trimInlinesF . mconcat <$> many (notFollowedBy (oneOf ";]") *> inline) - return $ - if hasSpace - then B.space <> rest - else rest + return $ if hasSpace + then (B.space <>) <$> rest + else rest -footnote :: OrgParser Inlines +footnote :: OrgParser (F Inlines) footnote = try $ inlineNote <|> referencedNote -inlineNote :: OrgParser Inlines +inlineNote :: OrgParser (F Inlines) inlineNote = try $ do string "[fn:" ref <- many alphaNum char ':' - note <- B.para . trimInlines . mconcat <$> many1Till inline (char ']') + note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']') when (not $ null ref) $ addToNotesTable ("fn:" ++ ref, note) - return $ B.note note + return $ B.note <$> note -referencedNote :: OrgParser Inlines +referencedNote :: OrgParser (F Inlines) referencedNote = try $ do ref <- noteMarker - notes <- asks (orgStateNotes' . finalState) - return $ + return $ do + notes <- asksF orgStateNotes' case lookup ref notes of - Just contents -> B.note contents - Nothing -> B.str $ "[" ++ ref ++ "]" + Nothing -> return $ B.str $ "[" ++ ref ++ "]" + Just contents -> do + st <- askF + let contents' = runF contents st{ orgStateNotes' = [] } + return $ B.note contents' noteMarker :: OrgParser String noteMarker = try $ do @@ -1077,37 +1102,37 @@ noteMarker = try $ do <*> many1Till (noneOf "\n\r\t ") (char ']') ] -linkOrImage :: OrgParser Inlines +linkOrImage :: OrgParser (F Inlines) linkOrImage = explicitOrImageLink <|> selflinkOrImage <|> angleLink <|> plainLink <?> "link or image" -explicitOrImageLink :: OrgParser Inlines +explicitOrImageLink :: OrgParser (F Inlines) explicitOrImageLink = try $ do char '[' - src <- applyCustomLinkFormat =<< possiblyEmptyLinkTarget + srcF <- applyCustomLinkFormat =<< possiblyEmptyLinkTarget title <- enclosedRaw (char '[') (char ']') title' <- parseFromString (mconcat <$> many inline) title char ']' - alt <- internalLink src title' - return $ - (if isImageFilename title - then B.link src "" $ B.image title mempty mempty - else fromMaybe alt (linkToInlines src title')) + return $ do + src <- srcF + if isImageFilename title + then pure $ B.link src "" $ B.image title mempty mempty + else linkToInlinesF src =<< title' -selflinkOrImage :: OrgParser Inlines +selflinkOrImage :: OrgParser (F Inlines) selflinkOrImage = try $ do src <- char '[' *> linkTarget <* char ']' - return $ fromMaybe "" (linkToInlines src (B.str src)) + return $ linkToInlinesF src (B.str src) -plainLink :: OrgParser Inlines +plainLink :: OrgParser (F Inlines) plainLink = try $ do (orig, src) <- uri - return $ B.link src "" (B.str orig) + returnF $ B.link src "" (B.str orig) -angleLink :: OrgParser Inlines +angleLink :: OrgParser (F Inlines) angleLink = try $ do char '<' link <- plainLink @@ -1123,31 +1148,26 @@ linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]") possiblyEmptyLinkTarget :: OrgParser String possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]") -applyCustomLinkFormat :: String -> OrgParser String +applyCustomLinkFormat :: String -> OrgParser (F String) applyCustomLinkFormat link = do let (linkType, rest) = break (== ':') link - fmts <- asks finalState - return $ - case M.lookup linkType (orgStateLinkFormatters fmts) of - Just v -> (v (drop 1 rest)) - Nothing -> link + return $ do + formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters + return $ maybe link ($ drop 1 rest) formatter -- TODO: might be a lot smarter/cleaner to use parsec and ADTs for this kind -- of parsing. -linkToInlines :: String -> Inlines -> Maybe Inlines -linkToInlines = \s -> +linkToInlinesF :: String -> Inlines -> F Inlines +linkToInlinesF s = case s of - _ | null s -> Just . B.link "" "" - _ | isAnchor s -> Just . B.link s "" - _ | isImageFilename s -> const . Just $ B.image s "" "" - _ | isFileLink s -> Just . B.link (dropLinkType s) "" - _ | isUri s -> Just . B.link s "" - _ | isAbsoluteFilePath s -> Just . B.link ("file://" ++ s) "" - _ | isRelativeFilePath s -> Just . B.link s "" - _ -> const Nothing - -isAnchor :: String -> Bool -isAnchor s = "#" `isPrefixOf` s + "" -> pure . B.link "" "" + ('#':_) -> pure . B.link s "" + _ | isImageFilename s -> const . pure $ B.image s "" "" + _ | isFileLink s -> pure . B.link (dropLinkType s) "" + _ | isUri s -> pure . B.link s "" + _ | isAbsoluteFilePath s -> pure . B.link ("file://" ++ s) "" + _ | isRelativeFilePath s -> pure . B.link s "" + _ -> internalLink s isFileLink :: String -> Bool isFileLink s = ("file:" `isPrefixOf` s) && not ("file://" `isPrefixOf` s) @@ -1176,13 +1196,12 @@ isImageFilename filename = imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ] protocols = [ "file", "http", "https" ] -internalLink :: String -> Inlines -> OrgParser Inlines +internalLink :: String -> Inlines -> F Inlines internalLink link title = do - anchorB <- asks finalState - return $ - if link `elem` (orgStateAnchorIds anchorB) - then B.link ('#':link) "" title - else B.emph title + anchorB <- (link `elem`) <$> asksF orgStateAnchorIds + if anchorB + then return $ B.link ('#':link) "" title + else return $ B.emph title -- | Parse an anchor like @<<anchor-id>>@ and return an empty span with -- @anchor-id@ set as id. Legal anchors in org-mode are defined through @@ -1190,11 +1209,11 @@ internalLink link title = do -- @anchor-id@ contains spaces, we are more restrictive in what is accepted as -- an anchor. -anchor :: OrgParser Inlines +anchor :: OrgParser (F Inlines) anchor = try $ do anchorId <- parseAnchor recordAnchorId anchorId - return $ B.spanWith (solidify anchorId, [], []) mempty + returnF $ B.spanWith (solidify anchorId, [], []) mempty where parseAnchor = string "<<" *> many1 (noneOf "\t\n\r<>\"' ") @@ -1212,7 +1231,7 @@ solidify = map replaceSpecialChar | otherwise = '-' -- | Parses an inline code block and marks it as an babel block. -inlineCodeBlock :: OrgParser Inlines +inlineCodeBlock :: OrgParser (F Inlines) inlineCodeBlock = try $ do string "src_" lang <- many1 orgArgWordChar @@ -1220,7 +1239,7 @@ inlineCodeBlock = try $ do inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r") let attrClasses = [translateLang lang, rundocBlockClass] let attrKeyVal = map toRundocAttrib (("language", lang) : opts) - return $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode + returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode enclosedByPair :: Char -- ^ opening char -> Char -- ^ closing char @@ -1228,51 +1247,54 @@ enclosedByPair :: Char -- ^ opening char -> OrgParser [a] enclosedByPair s e p = char s *> many1Till p (char e) -emph :: OrgParser Inlines -emph = B.emph <$> emphasisBetween '/' +emph :: OrgParser (F Inlines) +emph = fmap B.emph <$> emphasisBetween '/' -strong :: OrgParser Inlines -strong = B.strong <$> emphasisBetween '*' +strong :: OrgParser (F Inlines) +strong = fmap B.strong <$> emphasisBetween '*' -strikeout :: OrgParser Inlines -strikeout = B.strikeout <$> emphasisBetween '+' +strikeout :: OrgParser (F Inlines) +strikeout = fmap B.strikeout <$> emphasisBetween '+' -- There is no underline, so we use strong instead. -underline :: OrgParser Inlines -underline = B.strong <$> emphasisBetween '_' +underline :: OrgParser (F Inlines) +underline = fmap B.strong <$> emphasisBetween '_' -verbatim :: OrgParser Inlines -verbatim = B.code <$> verbatimBetween '=' +verbatim :: OrgParser (F Inlines) +verbatim = return . B.code <$> verbatimBetween '=' -code :: OrgParser Inlines -code = B.code <$> verbatimBetween '~' +code :: OrgParser (F Inlines) +code = return . B.code <$> verbatimBetween '~' -subscript :: OrgParser Inlines -subscript = B.subscript <$> try (char '_' *> subOrSuperExpr) +subscript :: OrgParser (F Inlines) +subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr) -superscript :: OrgParser Inlines -superscript = B.superscript <$> try (char '^' *> subOrSuperExpr) +superscript :: OrgParser (F Inlines) +superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr) -math :: OrgParser Inlines -math = B.math <$> choice [ math1CharBetween '$' +math :: OrgParser (F Inlines) +math = return . B.math <$> choice [ math1CharBetween '$' , mathStringBetween '$' , rawMathBetween "\\(" "\\)" ] -displayMath :: OrgParser Inlines -displayMath = B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" - , rawMathBetween "$$" "$$" - ] +displayMath :: OrgParser (F Inlines) +displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" + , rawMathBetween "$$" "$$" + ] + +updatePositions :: Char + -> OrgParser (Char) +updatePositions c = do + when (c `elem` emphasisPreChars) updateLastPreCharPos + when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos + return c -symbol :: OrgParser Inlines -symbol = B.str . (: "") <$> (oneOf specialChars >>= updatePositions) - where updatePositions c = do - when (c `elem` emphasisPreChars) updateLastPreCharPos - when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos - return c +symbol :: OrgParser (F Inlines) +symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions) emphasisBetween :: Char - -> OrgParser Inlines + -> OrgParser (F Inlines) emphasisBetween c = try $ do startEmphasisNewlinesCounting emphasisAllowedNewlines res <- enclosedInlines (emphasisStart c) (emphasisEnd c) @@ -1349,9 +1371,9 @@ mathEnd c = try $ do enclosedInlines :: OrgParser a -> OrgParser b - -> OrgParser Inlines + -> OrgParser (F Inlines) enclosedInlines start end = try $ - trimInlines . mconcat <$> enclosed start end inline + trimInlinesF . mconcat <$> enclosed start end inline enclosedRaw :: OrgParser a -> OrgParser b @@ -1430,7 +1452,7 @@ notAfterForbiddenBorderChar = do return $ lastFBCPos /= Just pos -- | Read a sub- or superscript expression -subOrSuperExpr :: OrgParser Inlines +subOrSuperExpr :: OrgParser (F Inlines) subOrSuperExpr = try $ choice [ id <$> charsInBalanced '{' '}' (noneOf "\n\r") , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r") @@ -1445,10 +1467,10 @@ simpleSubOrSuperString = try $ <*> many1 alphaNum ] -inlineLaTeX :: OrgParser Inlines +inlineLaTeX :: OrgParser (F Inlines) inlineLaTeX = try $ do cmd <- inlineLaTeXCommand - maybe mzero return $ + maybe mzero returnF $ parseAsMath cmd `mplus` parseAsMathMLSym cmd `mplus` parseAsInlineLaTeX cmd where parseAsMath :: String -> Maybe Inlines @@ -1481,30 +1503,30 @@ inlineLaTeXCommand = try $ do return cs _ -> mzero -smart :: OrgParser Inlines +smart :: OrgParser (F Inlines) smart = do getOption readerSmart >>= guard doubleQuoted <|> singleQuoted <|> - choice [orgApostrophe, dash, ellipses] + choice (map (return <$>) [orgApostrophe, dash, ellipses]) where orgApostrophe = (char '\'' <|> char '\8217') <* updateLastPreCharPos <* updateLastForbiddenCharPos *> return (B.str "\x2019") -singleQuoted :: OrgParser Inlines +singleQuoted :: OrgParser (F Inlines) singleQuoted = try $ do singleQuoteStart withQuoteContext InSingleQuote $ - B.singleQuoted . trimInlines . mconcat <$> + fmap B.singleQuoted . trimInlinesF . mconcat <$> many1Till inline singleQuoteEnd -- doubleQuoted will handle regular double-quoted sections, as well -- as dialogues with an open double-quote without a close double-quote -- in the same paragraph. -doubleQuoted :: OrgParser Inlines +doubleQuoted :: OrgParser (F Inlines) doubleQuoted = try $ do doubleQuoteStart contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) (withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return - (B.doubleQuoted . trimInlines $ contents)) - <|> (return $ (B.str "\8220") <> contents) + (fmap B.doubleQuoted . trimInlinesF $ contents)) + <|> (return $ return (B.str "\8220") <> contents) diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index a8112bc81..564267ee5 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -60,7 +60,7 @@ readRST :: ReaderOptions -- ^ Reader options readRST opts s = (readWith parseRST) def{ stateOptions = opts } (s ++ "\n\n") readRSTWithWarnings :: ReaderOptions -> String -> Either PandocError (Pandoc, [String]) -readRSTWithWarnings opts s = (readWith (returnWarnings parseRST)) def{ stateOptions = opts } (s ++ "\n\n") +readRSTWithWarnings opts s = (readWithWarnings parseRST) def{ stateOptions = opts } (s ++ "\n\n") type RSTParser = Parser [Char] ParserState |