From 2bc5fb6d8303b17be57a509248b687ceeddc07d1 Mon Sep 17 00:00:00 2001 From: Alexander Krotov Date: Sun, 8 Apr 2018 06:44:03 +0300 Subject: Muse reader: document implementation --- src/Text/Pandoc/Readers/Muse.hs | 229 ++++++++++++++++++++++++---------------- 1 file changed, 136 insertions(+), 93 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index 66167b243..fa520fb83 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -84,24 +84,21 @@ data MuseState = MuseState { museMeta :: F Meta -- ^ Document metadata , museLastStrPos :: Maybe SourcePos -- ^ Position after last str parsed , museLogMessages :: [LogMessage] , museNotes :: M.Map String (SourcePos, F Blocks) - , museInLink :: Bool - , museInPara :: Bool + , museInLink :: Bool -- ^ True when parsing a link description to avoid nested links + , museInPara :: Bool -- ^ True when looking for a paragraph terminator } instance Default MuseState where - def = defaultMuseState - -defaultMuseState :: MuseState -defaultMuseState = MuseState { museMeta = return nullMeta - , museOptions = def - , museHeaders = M.empty - , museIdentifierList = Set.empty - , museLastStrPos = Nothing - , museLogMessages = [] - , museNotes = M.empty - , museInLink = False - , museInPara = False - } + def = MuseState { museMeta = return nullMeta + , museOptions = def + , museHeaders = M.empty + , museIdentifierList = Set.empty + , museLastStrPos = Nothing + , museLogMessages = [] + , museNotes = M.empty + , museInLink = False + , museInPara = False + } type MuseParser = ParserT String MuseState @@ -124,10 +121,7 @@ instance HasLogMessages MuseState where addLogMessage m s = s{ museLogMessages = m : museLogMessages s } getLogMessages = reverse . museLogMessages --- --- main parser --- - +-- | Parse Muse document parseMuse :: PandocMonad m => MuseParser m Pandoc parseMuse = do many directive @@ -139,14 +133,56 @@ parseMuse = do reportLogMessages return doc --- --- utility functions --- +-- * Utility functions + +commonPrefix :: String -> String -> String +commonPrefix _ [] = [] +commonPrefix [] _ = [] +commonPrefix (x:xs) (y:ys) + | x == y = x : commonPrefix xs ys + | otherwise = [] + +-- | Trim up to one newline from the beginning of the string. +lchop :: String -> String +lchop s = case s of + '\n':ss -> ss + _ -> s + +-- | Trim up to one newline from the end of the string. +rchop :: String -> String +rchop = reverse . lchop . reverse + +dropSpacePrefix :: [String] -> [String] +dropSpacePrefix lns = + map (drop maxIndent) lns + where flns = filter (not . all (== ' ')) lns + maxIndent = if null flns then maximum (map length lns) else length $ takeWhile (== ' ') $ foldl1 commonPrefix flns + +atStart :: PandocMonad m => MuseParser m a -> MuseParser m a +atStart p = do + pos <- getPosition + st <- getState + guard $ museLastStrPos st /= Just pos + p + +-- * Parsers +-- | Parse end-of-line, which can be either a newline or end-of-file. eol :: Stream s m Char => ParserT s st m () eol = void newline <|> eof -htmlElement :: PandocMonad m => String -> MuseParser m (Attr, String) +someUntil :: (Stream s m t) + => ParserT s u m a + -> ParserT s u m b + -> ParserT s u m ([a], b) +someUntil p end = first <$> ((:) <$> p) <*> manyUntil p end + +-- ** HTML parsers + +-- | Parse HTML tag, returning its attributes and literal contents. +htmlElement :: PandocMonad m + => String -- ^ Tag name + -> MuseParser m (Attr, String) htmlElement tag = try $ do (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag []) content <- manyTill anyChar endtag @@ -154,13 +190,16 @@ htmlElement tag = try $ do where endtag = void $ htmlTag (~== TagClose tag) -htmlBlock :: PandocMonad m => String -> MuseParser m (Attr, String) +htmlBlock :: PandocMonad m + => String -- ^ Tag name + -> MuseParser m (Attr, String) htmlBlock tag = try $ do many spaceChar res <- htmlElement tag manyTill spaceChar eol return res +-- | Convert HTML attributes to Pandoc 'Attr' htmlAttrToPandoc :: [Attribute String] -> Attr htmlAttrToPandoc attrs = (ident, classes, keyvals) where @@ -169,7 +208,8 @@ htmlAttrToPandoc attrs = (ident, classes, keyvals) keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] parseHtmlContent :: PandocMonad m - => String -> MuseParser m (Attr, F Blocks) + => String -- ^ Tag name + -> MuseParser m (Attr, F Blocks) parseHtmlContent tag = try $ do many spaceChar pos <- getPosition @@ -181,29 +221,7 @@ parseHtmlContent tag = try $ do where endtag = void $ htmlTag (~== TagClose tag) -commonPrefix :: String -> String -> String -commonPrefix _ [] = [] -commonPrefix [] _ = [] -commonPrefix (x:xs) (y:ys) - | x == y = x : commonPrefix xs ys - | otherwise = [] - -atStart :: PandocMonad m => MuseParser m a -> MuseParser m a -atStart p = do - pos <- getPosition - st <- getState - guard $ museLastStrPos st /= Just pos - p - -someUntil :: (Stream s m t) - => ParserT s u m a - -> ParserT s u m b - -> ParserT s u m ([a], b) -someUntil p end = first <$> ((:) <$> p) <*> manyUntil p end - --- --- directive parsers --- +-- ** Directive parsers -- While not documented, Emacs Muse allows "-" in directive name parseDirectiveKey :: PandocMonad m => MuseParser m String @@ -234,9 +252,7 @@ directive = do where translateKey "cover" = "cover-image" translateKey x = x --- --- block parsers --- +-- ** Block parsers parseBlocks :: PandocMonad m => MuseParser m (F Blocks) @@ -329,6 +345,7 @@ blockElements = do , commentTag ] +-- | Parse a line comment, starting with @;@ in the first column. comment :: PandocMonad m => MuseParser m (F Blocks) comment = try $ do char ';' @@ -336,6 +353,7 @@ comment = try $ do eol return mempty +-- | Parse a horizontal rule, consisting of 4 or more @\'-\'@ characters. separator :: PandocMonad m => MuseParser m (F Blocks) separator = try $ do string "----" @@ -344,6 +362,7 @@ separator = try $ do eol return $ return B.horizontalRule +-- | Parse a heading. header :: PandocMonad m => MuseParser m (F Blocks) header = try $ do anchorId <- option "" $ try (parseAnchor <* manyTill spaceChar eol) @@ -355,6 +374,8 @@ header = try $ do attr <- registerHeader (anchorId, [], []) (runF content def) return $ B.headerWith attr level <$> content +-- | Parse an example between @{{{@ and @}}}@. +-- It is an Amusewiki extension influenced by Creole wiki, as described in @Text::Amuse@ documentation. example :: PandocMonad m => MuseParser m (F Blocks) example = try $ do string "{{{" @@ -362,27 +383,14 @@ example = try $ do contents <- manyTill anyChar $ try (optional blankline >> string "}}}") return $ return $ B.codeBlock contents --- Trim up to one newline from the beginning of the string. -lchop :: String -> String -lchop s = case s of - '\n':ss -> ss - _ -> s - --- Trim up to one newline from the end of the string. -rchop :: String -> String -rchop = reverse . lchop . reverse - -dropSpacePrefix :: [String] -> [String] -dropSpacePrefix lns = - map (drop maxIndent) lns - where flns = filter (not . all (== ' ')) lns - maxIndent = if null flns then maximum (map length lns) else length $ takeWhile (== ' ') $ foldl1 commonPrefix flns - +-- | Parse an @\@ tag. exampleTag :: PandocMonad m => MuseParser m (F Blocks) exampleTag = try $ do (attr, contents) <- htmlBlock "example" return $ return $ B.codeBlockWith attr $ rchop $ intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop contents +-- | Parse a @\@ tag as a raw block. +-- For 'RawInline' @\@ parser, see 'inlineLiteralTag'. literalTag :: PandocMonad m => MuseParser m (F Blocks) literalTag = try $ do many spaceChar @@ -397,30 +405,36 @@ literalTag = try $ do format (_, _, kvs) = fromMaybe "html" $ lookup "style" kvs rawBlock (attrs, content) = B.rawBlock (format attrs) $ rchop $ intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop content ---
tag is ignored +-- | Parse @\
@ tag. +-- Currently it is ignored as Pandoc cannot represent centered blocks. centerTag :: PandocMonad m => MuseParser m (F Blocks) centerTag = snd <$> parseHtmlContent "center" --- tag is ignored +-- | Parse @\@ tag. +-- Currently it is ignored as Pandoc cannot represent centered blocks. rightTag :: PandocMonad m => MuseParser m (F Blocks) rightTag = snd <$> parseHtmlContent "right" +-- | Parse @\@ tag. quoteTag :: PandocMonad m => MuseParser m (F Blocks) quoteTag = fmap B.blockQuote . snd <$> parseHtmlContent "quote" ---
tag is supported by Emacs Muse, but not Amusewiki 2.025 +-- | Parse @\
@ tag. +-- @\
@ tag is supported by Emacs Muse, but not Amusewiki 2.025. divTag :: PandocMonad m => MuseParser m (F Blocks) divTag = do (attrs, content) <- parseHtmlContent "div" return $ B.divWith attrs <$> content --- tag is supported by Amusewiki only +-- | Parse @\@ tag, the result is the same as @\
@. +-- @\@ tag is supported only in Text::Amuse mode. biblioTag :: PandocMonad m => MuseParser m (F Blocks) biblioTag = do guardEnabled Ext_amuse fmap (B.divWith ("", ["biblio"], [])) . snd <$> parseHtmlContent "biblio" --- tag is supported by Amusewiki only +-- | Parse @\@ tag, the result is the same as @\
@. +-- @\@ tag is supported only in Text::Amuse mode. playTag :: PandocMonad m => MuseParser m (F Blocks) playTag = do guardEnabled Ext_amuse @@ -437,17 +451,19 @@ verseLines = do lns <- many verseLine return $ B.lineBlock <$> sequence lns +-- | Parse @\@ tag. verseTag :: PandocMonad m => MuseParser m (F Blocks) verseTag = do (_, content) <- htmlBlock "verse" parseFromString verseLines (intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop content) +-- | Parse @\@ tag. commentTag :: PandocMonad m => MuseParser m (F Blocks) commentTag = htmlBlock "comment" >> return mempty --- Indented paragraph is either center, right or quote +-- | Parse a paragraph. paraUntil :: PandocMonad m - => MuseParser m a + => MuseParser m a -- ^ Terminator parser -> MuseParser m (F Blocks, a) paraUntil end = do state <- getState @@ -514,19 +530,18 @@ blanklineVerseLine = try $ do blankline pure mempty +-- | Parse a line block indicated by @\'>\'@ characters. lineBlock :: PandocMonad m => MuseParser m (F Blocks) lineBlock = try $ do col <- sourceColumn <$> getPosition lns <- (blanklineVerseLine <|> lineVerseLine) `sepBy1'` try (indentWith (col - 1)) return $ B.lineBlock <$> sequence lns --- --- lists --- +-- *** List parsers bulletListItemsUntil :: PandocMonad m - => Int - -> MuseParser m a + => Int -- ^ Indentation + -> MuseParser m a -- ^ Terminator parser -> MuseParser m ([F Blocks], a) bulletListItemsUntil indent end = try $ do char '-' @@ -537,6 +552,7 @@ bulletListItemsUntil indent end = try $ do Left ee -> return ([x], ee) Right (xs, ee) -> return (x:xs, ee) +-- | Parse a bullet list. bulletListUntil :: PandocMonad m => MuseParser m a -> MuseParser m (F Blocks, a) @@ -585,6 +601,7 @@ orderedListItemsUntil indent style end = Left ee -> return ([x], ee) Right (xs, ee) -> return (x:xs, ee) +-- | Parse an ordered list. orderedListUntil :: PandocMonad m => MuseParser m a -> MuseParser m (F Blocks, a) @@ -629,8 +646,9 @@ definitionListItemsUntil indent end = Left ee -> return ([xx], ee) Right (xs, ee) -> return (xx:xs, ee) +-- | Parse a definition list. definitionListUntil :: PandocMonad m - => MuseParser m a + => MuseParser m a -- ^ Terminator parser -> MuseParser m (F Blocks, a) definitionListUntil end = try $ do many spaceChar @@ -640,15 +658,14 @@ definitionListUntil end = try $ do first (fmap B.definitionList . sequence) <$> definitionListItemsUntil indent end anyListUntil :: PandocMonad m - => MuseParser m a + => MuseParser m a -- ^ Terminator parser -> MuseParser m (F Blocks, a) anyListUntil end = bulletListUntil end <|> orderedListUntil end <|> definitionListUntil end --- --- tables --- +-- *** Table parsers +-- | Internal Muse table representation. data MuseTable = MuseTable { museTableCaption :: Inlines , museTableHeaders :: [[Blocks]] @@ -698,6 +715,7 @@ elementsToTable :: [MuseTableElement] -> F MuseTable elementsToTable = foldM museAppendElement emptyTable where emptyTable = MuseTable mempty mempty mempty mempty +-- | Parse a table. table :: PandocMonad m => MuseParser m (F Blocks) table = try $ fmap museToPandocTable <$> (elementsToTable <$> tableElements) @@ -707,31 +725,35 @@ tableParseElement = tableParseHeader <|> tableParseFooter <|> tableParseCaption -tableParseRow :: PandocMonad m => Int -> MuseParser m (F [Blocks]) +tableParseRow :: PandocMonad m + => Int -- ^ Number of separator characters + -> MuseParser m (F [Blocks]) tableParseRow n = try $ do fields <- tableCell `sepBy2` fieldSep return $ sequence fields where p `sepBy2` sep = (:) <$> p <*> many1 (sep >> p) fieldSep = many1 spaceChar >> count n (char '|') >> (void (many1 spaceChar) <|> void (lookAhead newline)) +-- | Parse a table header row. tableParseHeader :: PandocMonad m => MuseParser m MuseTableElement tableParseHeader = MuseHeaderRow <$> tableParseRow 2 +-- | Parse a table body row. tableParseBody :: PandocMonad m => MuseParser m MuseTableElement tableParseBody = MuseBodyRow <$> tableParseRow 1 +-- | Parse a table footer row. tableParseFooter :: PandocMonad m => MuseParser m MuseTableElement tableParseFooter = MuseFooterRow <$> tableParseRow 3 +-- | Parse table caption. tableParseCaption :: PandocMonad m => MuseParser m MuseTableElement tableParseCaption = try $ do many spaceChar string "|+" MuseCaption <$> (trimInlinesF . mconcat <$> many1Till inline (string "+|")) --- --- inline parsers --- +-- ** Inline parsers inlineList :: PandocMonad m => [MuseParser m (F Inlines)] inlineList = [ whitespace @@ -761,6 +783,7 @@ inlineList = [ whitespace inline :: PandocMonad m => MuseParser m (F Inlines) inline = endline <|> choice inlineList "inline" +-- | Parse a soft break. endline :: PandocMonad m => MuseParser m (F Inlines) endline = try $ do newline @@ -779,6 +802,7 @@ anchor = try $ do skipMany spaceChar <|> void newline return $ return $ B.spanWith (anchorId, [], []) mempty +-- | Parse a footnote reference. footnote :: PandocMonad m => MuseParser m (F Inlines) footnote = try $ do ref <- noteMarker @@ -796,6 +820,7 @@ whitespace = try $ do skipMany1 spaceChar return $ return B.space +-- | Parse @\
@ tag. br :: PandocMonad m => MuseParser m (F Inlines) br = try $ do string "
" @@ -811,42 +836,54 @@ enclosedInlines :: (PandocMonad m, Show a, Show b) enclosedInlines start end = try $ trimInlinesF . mconcat <$> (enclosed (atStart start) end inline <* notFollowedBy (satisfy isLetter)) +-- | Parse an inline tag, such as @\@ and @\@. inlineTag :: PandocMonad m - => String + => String -- ^ Tag name -> MuseParser m (F Inlines) inlineTag tag = try $ do htmlTag (~== TagOpen tag []) mconcat <$> manyTill inline (void $ htmlTag (~== TagClose tag)) -strongTag :: PandocMonad m => MuseParser m (F Inlines) -strongTag = fmap B.strong <$> inlineTag "strong" - +-- | Parse strong inline markup, indicated by @**@. strong :: PandocMonad m => MuseParser m (F Inlines) strong = fmap B.strong <$> emphasisBetween (string "**") +-- | Parse emphasis inline markup, indicated by @*@. emph :: PandocMonad m => MuseParser m (F Inlines) emph = fmap B.emph <$> emphasisBetween (char '*') +-- | Parse underline inline markup, indicated by @_@. +-- Supported only in Emacs Muse mode, not Text::Amuse. underlined :: PandocMonad m => MuseParser m (F Inlines) underlined = do guardDisabled Ext_amuse -- Supported only by Emacs Muse fmap underlineSpan <$> emphasisBetween (char '_') +-- | Parse @\@ tag. +strongTag :: PandocMonad m => MuseParser m (F Inlines) +strongTag = fmap B.strong <$> inlineTag "strong" + +-- | Parse @\@ tag. emphTag :: PandocMonad m => MuseParser m (F Inlines) emphTag = fmap B.emph <$> inlineTag "em" +-- | Parse @\@ tag. superscriptTag :: PandocMonad m => MuseParser m (F Inlines) superscriptTag = fmap B.superscript <$> inlineTag "sup" +-- | Parse @\@ tag. subscriptTag :: PandocMonad m => MuseParser m (F Inlines) subscriptTag = fmap B.subscript <$> inlineTag "sub" +-- | Parse @\@ tag. strikeoutTag :: PandocMonad m => MuseParser m (F Inlines) strikeoutTag = fmap B.strikeout <$> inlineTag "del" +-- | Parse @\@ tag. verbatimTag :: PandocMonad m => MuseParser m (F Inlines) verbatimTag = return . B.text . snd <$> htmlElement "verbatim" +-- | Parse @\@ tag. classTag :: PandocMonad m => MuseParser m (F Inlines) classTag = do (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "class" []) @@ -854,11 +891,13 @@ classTag = do let classes = maybe [] words $ lookup "name" attrs return $ B.spanWith ("", classes, []) <$> mconcat res +-- | Parse "~~" as nonbreaking space. nbsp :: PandocMonad m => MuseParser m (F Inlines) nbsp = try $ do string "~~" return $ return $ B.str "\160" +-- | Parse code markup, indicated by @\'=\'@ characters. code :: PandocMonad m => MuseParser m (F Inlines) code = try $ do atStart $ char '=' @@ -869,13 +908,16 @@ code = try $ do notFollowedBy $ satisfy isLetter return $ return $ B.code contents +-- | Parse @\@ tag. codeTag :: PandocMonad m => MuseParser m (F Inlines) codeTag = return . uncurry B.codeWith <$> htmlElement "code" --- tag is an Emacs Muse extension enabled by (require 'muse-latex2png) +-- | Parse @\@ tag. +-- @\@ tag is an Emacs Muse extension enabled by @(require 'muse-latex2png)@ mathTag :: PandocMonad m => MuseParser m (F Inlines) mathTag = return . B.math . snd <$> htmlElement "math" +-- | Parse inline @\@ tag as a raw inline. inlineLiteralTag :: PandocMonad m => MuseParser m (F Inlines) inlineLiteralTag = (return . rawInline) <$> htmlElement "literal" @@ -890,6 +932,7 @@ str = return . B.str <$> many1 alphaNum <* updateLastStrPos symbol :: PandocMonad m => MuseParser m (F Inlines) symbol = return . B.str <$> count 1 nonspaceChar +-- | Parse a link or image. link :: PandocMonad m => MuseParser m (F Inlines) link = try $ do st <- getState -- cgit v1.2.3