Muse reader: document implementation

author: Alexander Krotov <ilabdsf@gmail.com> 2018-04-08 06:44:03 +0300
committer: Alexander Krotov <ilabdsf@gmail.com> 2018-04-08 06:51:10 +0300
commit: 2bc5fb6d8303b17be57a509248b687ceeddc07d1 (patch)
tree: a9fedddcaa1f692ca59fc1a4c2f13dc1c2fad372
parent: 4cb053ce3d5001a25a9de0a3b1d9dd4dc505f491 (diff)
download: pandoc-2bc5fb6d8303b17be57a509248b687ceeddc07d1.tar.gz
1 files changed, 136 insertions, 93 deletions
diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs
index 66167b243..fa520fb83 100644
--- a/src/Text/Pandoc/Readers/Muse.hs
+++ b/src/Text/Pandoc/Readers/Muse.hs
@@ -84,24 +84,21 @@ data MuseState = MuseState { museMeta :: F Meta -- ^ Document metadata
                            , museLastStrPos :: Maybe SourcePos -- ^ Position after last str parsed
                            , museLogMessages :: [LogMessage]
                            , museNotes :: M.Map String (SourcePos, F Blocks)
-                           , museInLink :: Bool
-                           , museInPara :: Bool
+                           , museInLink :: Bool -- ^ True when parsing a link description to avoid nested links
+                           , museInPara :: Bool -- ^ True when looking for a paragraph terminator
                            }
 
 instance Default MuseState where
-  def = defaultMuseState
-
-defaultMuseState :: MuseState
-defaultMuseState = MuseState { museMeta = return nullMeta
-                             , museOptions = def
-                             , museHeaders = M.empty
-                             , museIdentifierList = Set.empty
-                             , museLastStrPos = Nothing
-                             , museLogMessages = []
-                             , museNotes = M.empty
-                             , museInLink = False
-                             , museInPara = False
-                             }
+  def = MuseState { museMeta = return nullMeta
+                  , museOptions = def
+                  , museHeaders = M.empty
+                  , museIdentifierList = Set.empty
+                  , museLastStrPos = Nothing
+                  , museLogMessages = []
+                  , museNotes = M.empty
+                  , museInLink = False
+                  , museInPara = False
+                  }
 
 type MuseParser = ParserT String MuseState
 
@@ -124,10 +121,7 @@ instance HasLogMessages MuseState where
   addLogMessage m s = s{ museLogMessages = m : museLogMessages s }
   getLogMessages = reverse . museLogMessages
 
---
--- main parser
---
-
+-- | Parse Muse document
 parseMuse :: PandocMonad m => MuseParser m Pandoc
 parseMuse = do
   many directive
@@ -139,14 +133,56 @@ parseMuse = do
   reportLogMessages
   return doc
 
---
--- utility functions
---
+-- * Utility functions
+
+commonPrefix :: String -> String -> String
+commonPrefix _ [] = []
+commonPrefix [] _ = []
+commonPrefix (x:xs) (y:ys)
+  | x == y    = x : commonPrefix xs ys
+  | otherwise = []
+
+-- | Trim up to one newline from the beginning of the string.
+lchop :: String -> String
+lchop s = case s of
+                    '\n':ss -> ss
+                    _       -> s
+
+-- | Trim up to one newline from the end of the string.
+rchop :: String -> String
+rchop = reverse . lchop . reverse
+
+dropSpacePrefix :: [String] -> [String]
+dropSpacePrefix lns =
+  map (drop maxIndent) lns
+  where flns = filter (not . all (== ' ')) lns
+        maxIndent = if null flns then maximum (map length lns) else length $ takeWhile (== ' ') $ foldl1 commonPrefix flns
+
+atStart :: PandocMonad m => MuseParser m a -> MuseParser m a
+atStart p = do
+  pos <- getPosition
+  st <- getState
+  guard $ museLastStrPos st /= Just pos
+  p
+
+-- * Parsers
 
+-- | Parse end-of-line, which can be either a newline or end-of-file.
 eol :: Stream s m Char => ParserT s st m ()
 eol = void newline <|> eof
 
-htmlElement :: PandocMonad m => String -> MuseParser m (Attr, String)
+someUntil :: (Stream s m t)
+          => ParserT s u m a
+          -> ParserT s u m b
+          -> ParserT s u m ([a], b)
+someUntil p end = first <$> ((:) <$> p) <*> manyUntil p end
+
+-- ** HTML parsers
+
+-- | Parse HTML tag, returning its attributes and literal contents.
+htmlElement :: PandocMonad m
+            => String -- ^ Tag name
+            -> MuseParser m (Attr, String)
 htmlElement tag = try $ do
   (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag [])
   content <- manyTill anyChar endtag
@@ -154,13 +190,16 @@ htmlElement tag = try $ do
   where
     endtag = void $ htmlTag (~== TagClose tag)
 
-htmlBlock :: PandocMonad m => String -> MuseParser m (Attr, String)
+htmlBlock :: PandocMonad m
+          => String -- ^ Tag name
+          -> MuseParser m (Attr, String)
 htmlBlock tag = try $ do
   many spaceChar
   res <- htmlElement tag
   manyTill spaceChar eol
   return res
 
+-- | Convert HTML attributes to Pandoc 'Attr'
 htmlAttrToPandoc :: [Attribute String] -> Attr
 htmlAttrToPandoc attrs = (ident, classes, keyvals)
   where
@@ -169,7 +208,8 @@ htmlAttrToPandoc attrs = (ident, classes, keyvals)
     keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
 
 parseHtmlContent :: PandocMonad m
-                 => String -> MuseParser m (Attr, F Blocks)
+                 => String -- ^ Tag name
+                 -> MuseParser m (Attr, F Blocks)
 parseHtmlContent tag = try $ do
   many spaceChar
   pos <- getPosition
@@ -181,29 +221,7 @@ parseHtmlContent tag = try $ do
   where
     endtag = void $ htmlTag (~== TagClose tag)
 
-commonPrefix :: String -> String -> String
-commonPrefix _ [] = []
-commonPrefix [] _ = []
-commonPrefix (x:xs) (y:ys)
-  | x == y    = x : commonPrefix xs ys
-  | otherwise = []
-
-atStart :: PandocMonad m => MuseParser m a -> MuseParser m a
-atStart p = do
-  pos <- getPosition
-  st <- getState
-  guard $ museLastStrPos st /= Just pos
-  p
-
-someUntil :: (Stream s m t)
-          => ParserT s u m a
-          -> ParserT s u m b
-          -> ParserT s u m ([a], b)
-someUntil p end = first <$> ((:) <$> p) <*> manyUntil p end
-
---
--- directive parsers
---
+-- ** Directive parsers
 
 -- While not documented, Emacs Muse allows "-" in directive name
 parseDirectiveKey :: PandocMonad m => MuseParser m String
@@ -234,9 +252,7 @@ directive = do
   where translateKey "cover" = "cover-image"
         translateKey x = x
 
---
--- block parsers
---
+-- ** Block parsers
 
 parseBlocks :: PandocMonad m
             => MuseParser m (F Blocks)
@@ -329,6 +345,7 @@ blockElements = do
          , commentTag
          ]
 
+-- | Parse a line comment, starting with @;@ in the first column.
 comment :: PandocMonad m => MuseParser m (F Blocks)
 comment = try $ do
   char ';'
@@ -336,6 +353,7 @@ comment = try $ do
   eol
   return mempty
 
+-- | Parse a horizontal rule, consisting of 4 or more @\'-\'@ characters.
 separator :: PandocMonad m => MuseParser m (F Blocks)
 separator = try $ do
   string "----"
@@ -344,6 +362,7 @@ separator = try $ do
   eol
   return $ return B.horizontalRule
 
+-- | Parse a heading.
 header :: PandocMonad m => MuseParser m (F Blocks)
 header = try $ do
   anchorId <- option "" $ try (parseAnchor <* manyTill spaceChar eol)
@@ -355,6 +374,8 @@ header = try $ do
   attr <- registerHeader (anchorId, [], []) (runF content def)
   return $ B.headerWith attr level <$> content
 
+-- | Parse an example between @{{{@ and @}}}@.
+-- It is an Amusewiki extension influenced by Creole wiki, as described in @Text::Amuse@ documentation.
 example :: PandocMonad m => MuseParser m (F Blocks)
 example = try $ do
   string "{{{"
@@ -362,27 +383,14 @@ example = try $ do
   contents <- manyTill anyChar $ try (optional blankline >> string "}}}")
   return $ return $ B.codeBlock contents
 
--- Trim up to one newline from the beginning of the string.
-lchop :: String -> String
-lchop s = case s of
-                    '\n':ss -> ss
-                    _       -> s
-
--- Trim up to one newline from the end of the string.
-rchop :: String -> String
-rchop = reverse . lchop . reverse
-
-dropSpacePrefix :: [String] -> [String]
-dropSpacePrefix lns =
-  map (drop maxIndent) lns
-  where flns = filter (not . all (== ' ')) lns
-        maxIndent = if null flns then maximum (map length lns) else length $ takeWhile (== ' ') $ foldl1 commonPrefix flns
-
+-- | Parse an @\<example>@ tag.
 exampleTag :: PandocMonad m => MuseParser m (F Blocks)
 exampleTag = try $ do
   (attr, contents) <- htmlBlock "example"
   return $ return $ B.codeBlockWith attr $ rchop $ intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop contents
 
+-- | Parse a @\<literal>@ tag as a raw block.
+-- For 'RawInline' @\<literal>@ parser, see 'inlineLiteralTag'.
 literalTag :: PandocMonad m => MuseParser m (F Blocks)
 literalTag = try $ do
   many spaceChar
@@ -397,30 +405,36 @@ literalTag = try $ do
     format (_, _, kvs)        = fromMaybe "html" $ lookup "style" kvs
     rawBlock (attrs, content) = B.rawBlock (format attrs) $ rchop $ intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop content
 
--- <center> tag is ignored
+-- | Parse @\<center>@ tag.
+-- Currently it is ignored as Pandoc cannot represent centered blocks.
 centerTag :: PandocMonad m => MuseParser m (F Blocks)
 centerTag = snd <$> parseHtmlContent "center"
 
--- <right> tag is ignored
+-- | Parse @\<right>@ tag.
+-- Currently it is ignored as Pandoc cannot represent centered blocks.
 rightTag :: PandocMonad m => MuseParser m (F Blocks)
 rightTag = snd <$> parseHtmlContent "right"
 
+-- | Parse @\<quote>@ tag.
 quoteTag :: PandocMonad m => MuseParser m (F Blocks)
 quoteTag = fmap B.blockQuote . snd <$> parseHtmlContent "quote"
 
--- <div> tag is supported by Emacs Muse, but not Amusewiki 2.025
+-- | Parse @\<div>@ tag.
+-- @\<div>@ tag is supported by Emacs Muse, but not Amusewiki 2.025.
 divTag :: PandocMonad m => MuseParser m (F Blocks)
 divTag = do
   (attrs, content) <- parseHtmlContent "div"
   return $ B.divWith attrs <$> content
 
--- <biblio> tag is supported by Amusewiki only
+-- | Parse @\<biblio>@ tag, the result is the same as @\<div class="biblio">@.
+-- @\<biblio>@ tag is supported only in Text::Amuse mode.
 biblioTag :: PandocMonad m => MuseParser m (F Blocks)
 biblioTag = do
   guardEnabled Ext_amuse
   fmap (B.divWith ("", ["biblio"], [])) . snd <$> parseHtmlContent "biblio"
 
--- <play> tag is supported by Amusewiki only
+-- | Parse @\<play>@ tag, the result is the same as @\<div class="play">@.
+-- @\<play>@ tag is supported only in Text::Amuse mode.
 playTag :: PandocMonad m => MuseParser m (F Blocks)
 playTag = do
   guardEnabled Ext_amuse
@@ -437,17 +451,19 @@ verseLines = do
   lns <- many verseLine
   return $ B.lineBlock <$> sequence lns
 
+-- | Parse @\<verse>@ tag.
 verseTag :: PandocMonad m => MuseParser m (F Blocks)
 verseTag = do
   (_, content) <- htmlBlock "verse"
   parseFromString verseLines (intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop content)
 
+-- | Parse @\<comment>@ tag.
 commentTag :: PandocMonad m => MuseParser m (F Blocks)
 commentTag = htmlBlock "comment" >> return mempty
 
--- Indented paragraph is either center, right or quote
+-- | Parse a paragraph.
 paraUntil :: PandocMonad m
-          => MuseParser m a
+          => MuseParser m a -- ^ Terminator parser
           -> MuseParser m (F Blocks, a)
 paraUntil end = do
   state <- getState
@@ -514,19 +530,18 @@ blanklineVerseLine = try $ do
   blankline
   pure mempty
 
+-- | Parse a line block indicated by @\'>\'@ characters.
 lineBlock :: PandocMonad m => MuseParser m (F Blocks)
 lineBlock = try $ do
   col <- sourceColumn <$> getPosition
   lns <- (blanklineVerseLine <|> lineVerseLine) `sepBy1'` try (indentWith (col - 1))
   return $ B.lineBlock <$> sequence lns
 
---
--- lists
---
+-- *** List parsers
 
 bulletListItemsUntil :: PandocMonad m
-                     => Int
-                     -> MuseParser m a
+                     => Int -- ^ Indentation
+                     -> MuseParser m a -- ^ Terminator parser
                      -> MuseParser m ([F Blocks], a)
 bulletListItemsUntil indent end = try $ do
   char '-'
@@ -537,6 +552,7 @@ bulletListItemsUntil indent end = try $ do
     Left ee -> return ([x], ee)
     Right (xs, ee) -> return (x:xs, ee)
 
+-- | Parse a bullet list.
 bulletListUntil :: PandocMonad m
                 => MuseParser m a
                 -> MuseParser m (F Blocks, a)
@@ -585,6 +601,7 @@ orderedListItemsUntil indent style end =
         Left ee -> return ([x], ee)
         Right (xs, ee) -> return (x:xs, ee)
 
+-- | Parse an ordered list.
 orderedListUntil :: PandocMonad m
                  => MuseParser m a
                  -> MuseParser m (F Blocks, a)
@@ -629,8 +646,9 @@ definitionListItemsUntil indent end =
         Left ee -> return ([xx], ee)
         Right (xs, ee) -> return (xx:xs, ee)
 
+-- | Parse a definition list.
 definitionListUntil :: PandocMonad m
-                    => MuseParser m a
+                    => MuseParser m a -- ^ Terminator parser
                     -> MuseParser m (F Blocks, a)
 definitionListUntil end = try $ do
   many spaceChar
@@ -640,15 +658,14 @@ definitionListUntil end = try $ do
   first (fmap B.definitionList . sequence) <$> definitionListItemsUntil indent end
 
 anyListUntil :: PandocMonad m
-             => MuseParser m a
+             => MuseParser m a -- ^ Terminator parser
              -> MuseParser m (F Blocks, a)
 anyListUntil end =
   bulletListUntil end <|> orderedListUntil end <|> definitionListUntil end
 
---
--- tables
---
+-- *** Table parsers
 
+-- | Internal Muse table representation.
 data MuseTable = MuseTable
   { museTableCaption :: Inlines
   , museTableHeaders :: [[Blocks]]
@@ -698,6 +715,7 @@ elementsToTable :: [MuseTableElement] -> F MuseTable
 elementsToTable = foldM museAppendElement emptyTable
   where emptyTable = MuseTable mempty mempty mempty mempty
 
+-- | Parse a table.
 table :: PandocMonad m => MuseParser m (F Blocks)
 table = try $ fmap museToPandocTable <$> (elementsToTable <$> tableElements)
 
@@ -707,31 +725,35 @@ tableParseElement = tableParseHeader
                 <|> tableParseFooter
                 <|> tableParseCaption
 
-tableParseRow :: PandocMonad m => Int -> MuseParser m (F [Blocks])
+tableParseRow :: PandocMonad m
+              => Int -- ^ Number of separator characters
+              -> MuseParser m (F [Blocks])
 tableParseRow n = try $ do
   fields <- tableCell `sepBy2` fieldSep
   return $ sequence fields
     where p `sepBy2` sep = (:) <$> p <*> many1 (sep >> p)
           fieldSep = many1 spaceChar >> count n (char '|') >> (void (many1 spaceChar) <|> void (lookAhead newline))
 
+-- | Parse a table header row.
 tableParseHeader :: PandocMonad m => MuseParser m MuseTableElement
 tableParseHeader = MuseHeaderRow <$> tableParseRow 2
 
+-- | Parse a table body row.
 tableParseBody :: PandocMonad m => MuseParser m MuseTableElement
 tableParseBody = MuseBodyRow <$> tableParseRow 1
 
+-- | Parse a table footer row.
 tableParseFooter :: PandocMonad m => MuseParser m MuseTableElement
 tableParseFooter = MuseFooterRow <$> tableParseRow 3
 
+-- | Parse table caption.
 tableParseCaption :: PandocMonad m => MuseParser m MuseTableElement
 tableParseCaption = try $ do
   many spaceChar
   string "|+"
   MuseCaption <$> (trimInlinesF . mconcat <$> many1Till inline (string "+|"))
 
---
--- inline parsers
---
+-- ** Inline parsers
 
 inlineList :: PandocMonad m => [MuseParser m (F Inlines)]
 inlineList = [ whitespace
@@ -761,6 +783,7 @@ inlineList = [ whitespace
 inline :: PandocMonad m => MuseParser m (F Inlines)
 inline = endline <|> choice inlineList <?> "inline"
 
+-- | Parse a soft break.
 endline :: PandocMonad m => MuseParser m (F Inlines)
 endline = try $ do
   newline
@@ -779,6 +802,7 @@ anchor = try $ do
   skipMany spaceChar <|> void newline
   return $ return $ B.spanWith (anchorId, [], []) mempty
 
+-- | Parse a footnote reference.
 footnote :: PandocMonad m => MuseParser m (F Inlines)
 footnote = try $ do
   ref <- noteMarker
@@ -796,6 +820,7 @@ whitespace = try $ do
   skipMany1 spaceChar
   return $ return B.space
 
+-- | Parse @\<br>@ tag.
 br :: PandocMonad m => MuseParser m (F Inlines)
 br = try $ do
   string "<br>"
@@ -811,42 +836,54 @@ enclosedInlines :: (PandocMonad m, Show a, Show b)
 enclosedInlines start end = try $
   trimInlinesF . mconcat <$> (enclosed (atStart start) end inline <* notFollowedBy (satisfy isLetter))
 
+-- | Parse an inline tag, such as @\<em>@ and @\<strong>@.
 inlineTag :: PandocMonad m
-          => String
+          => String -- ^ Tag name
           -> MuseParser m (F Inlines)
 inlineTag tag = try $ do
   htmlTag (~== TagOpen tag [])
   mconcat <$> manyTill inline (void $ htmlTag (~== TagClose tag))
 
-strongTag :: PandocMonad m => MuseParser m (F Inlines)
-strongTag = fmap B.strong <$> inlineTag "strong"
-
+-- | Parse strong inline markup, indicated by @**@.
 strong :: PandocMonad m => MuseParser m (F Inlines)
 strong = fmap B.strong <$> emphasisBetween (string "**")
 
+-- | Parse emphasis inline markup, indicated by @*@.
 emph :: PandocMonad m => MuseParser m (F Inlines)
 emph = fmap B.emph <$> emphasisBetween (char '*')
 
+-- | Parse underline inline markup, indicated by @_@.
+-- Supported only in Emacs Muse mode, not Text::Amuse.
 underlined :: PandocMonad m => MuseParser m (F Inlines)
 underlined = do
   guardDisabled Ext_amuse -- Supported only by Emacs Muse
   fmap underlineSpan <$> emphasisBetween (char '_')
 
+-- | Parse @\<strong>@ tag.
+strongTag :: PandocMonad m => MuseParser m (F Inlines)
+strongTag = fmap B.strong <$> inlineTag "strong"
+
+-- | Parse @\<em>@ tag.
 emphTag :: PandocMonad m => MuseParser m (F Inlines)
 emphTag = fmap B.emph <$> inlineTag "em"
 
+-- | Parse @\<sup>@ tag.
 superscriptTag :: PandocMonad m => MuseParser m (F Inlines)
 superscriptTag = fmap B.superscript <$> inlineTag "sup"
 
+-- | Parse @\<sub>@ tag.
 subscriptTag :: PandocMonad m => MuseParser m (F Inlines)
 subscriptTag = fmap B.subscript <$> inlineTag "sub"
 
+-- | Parse @\<del>@ tag.
 strikeoutTag :: PandocMonad m => MuseParser m (F Inlines)
 strikeoutTag = fmap B.strikeout <$> inlineTag "del"
 
+-- | Parse @\<verbatim>@ tag.
 verbatimTag :: PandocMonad m => MuseParser m (F Inlines)
 verbatimTag = return . B.text . snd <$> htmlElement "verbatim"
 
+-- | Parse @\<class>@ tag.
 classTag :: PandocMonad m => MuseParser m (F Inlines)
 classTag = do
   (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "class" [])
@@ -854,11 +891,13 @@ classTag = do
   let classes = maybe [] words $ lookup "name" attrs
   return $ B.spanWith ("", classes, []) <$> mconcat res
 
+-- | Parse "~~" as nonbreaking space.
 nbsp :: PandocMonad m => MuseParser m (F Inlines)
 nbsp = try $ do
   string "~~"
   return $ return $ B.str "\160"
 
+-- | Parse code markup, indicated by @\'=\'@ characters.
 code :: PandocMonad m => MuseParser m (F Inlines)
 code = try $ do
   atStart $ char '='
@@ -869,13 +908,16 @@ code = try $ do
   notFollowedBy $ satisfy isLetter
   return $ return $ B.code contents
 
+-- | Parse @\<code>@ tag.
 codeTag :: PandocMonad m => MuseParser m (F Inlines)
 codeTag = return . uncurry B.codeWith <$> htmlElement "code"
 
--- <math> tag is an Emacs Muse extension enabled by (require 'muse-latex2png)
+-- | Parse @\<math>@ tag.
+-- @\<math>@ tag is an Emacs Muse extension enabled by @(require 'muse-latex2png)@
 mathTag :: PandocMonad m => MuseParser m (F Inlines)
 mathTag = return . B.math . snd <$> htmlElement "math"
 
+-- | Parse inline @\<literal>@ tag as a raw inline.
 inlineLiteralTag :: PandocMonad m => MuseParser m (F Inlines)
 inlineLiteralTag =
   (return . rawInline) <$> htmlElement "literal"
@@ -890,6 +932,7 @@ str = return . B.str <$> many1 alphaNum <* updateLastStrPos
 symbol :: PandocMonad m => MuseParser m (F Inlines)
 symbol = return . B.str <$> count 1 nonspaceChar
 
+-- | Parse a link or image.
 link :: PandocMonad m => MuseParser m (F Inlines)
 link = try $ do
   st <- getState
author	Alexander Krotov <ilabdsf@gmail.com>	2018-04-08 06:44:03 +0300
committer	Alexander Krotov <ilabdsf@gmail.com>	2018-04-08 06:51:10 +0300
commit	2bc5fb6d8303b17be57a509248b687ceeddc07d1 (patch)
tree	a9fedddcaa1f692ca59fc1a4c2f13dc1c2fad372
parent	4cb053ce3d5001a25a9de0a3b1d9dd4dc505f491 (diff)
download	pandoc-2bc5fb6d8303b17be57a509248b687ceeddc07d1.tar.gz