diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/Muse.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/Muse.hs | 266 |
1 files changed, 108 insertions, 158 deletions
diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index 134598c07..6acc88b3d 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -44,7 +44,6 @@ import Control.Monad import Control.Monad.Reader import Control.Monad.Except (throwError) import Data.Bifunctor -import Data.Char (isAlphaNum) import Data.Default import Data.List (intercalate) import Data.List.Split (splitOn) @@ -59,8 +58,8 @@ import Text.Pandoc.Definition import Text.Pandoc.Error (PandocError (PandocParsecError)) import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (F, enclosed) -import Text.Pandoc.Shared (crFilter, underlineSpan, mapLeft) +import Text.Pandoc.Parsing hiding (F) +import Text.Pandoc.Shared (crFilter, underlineSpan) -- | Read Muse from an input string and return a Pandoc document. readMuse :: PandocMonad m @@ -69,9 +68,9 @@ readMuse :: PandocMonad m -> m Pandoc readMuse opts s = do let input = crFilter s - res <- mapLeft (PandocParsecError $ unpack input) `liftM` runReaderT (runParserT parseMuse def{ museOptions = opts } "source" input) def + res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts } "source" input case res of - Left e -> throwError e + Left e -> throwError $ PandocParsecError (unpack input) e Right d -> return d type F = Future MuseState @@ -83,7 +82,6 @@ data MuseState = MuseState { museMeta :: F Meta -- ^ Document metadata , museLastStrPos :: Maybe SourcePos -- ^ Position after last str parsed , museLogMessages :: [LogMessage] , museNotes :: M.Map String (SourcePos, F Blocks) - , museInPara :: Bool -- ^ True when looking for a paragraph terminator } instance Default MuseState where @@ -94,15 +92,17 @@ instance Default MuseState where , museLastStrPos = Nothing , museLogMessages = [] , museNotes = M.empty - , museInPara = False } data MuseEnv = MuseEnv { museInLink :: Bool -- ^ True when parsing a link description to avoid nested links + , museInPara :: Bool -- ^ True when parsing paragraph is not allowed } instance Default MuseEnv where - def = MuseEnv { museInLink = False } + def = MuseEnv { museInLink = False + , museInPara = False + } type MuseParser m = ParserT Text MuseState (ReaderT MuseEnv m) @@ -131,18 +131,12 @@ parseMuse = do many directive blocks <- (:) <$> parseBlocks <*> many parseSection st <- getState - let doc = runF (Pandoc <$> museMeta st <*> fmap B.toList (mconcat blocks)) st - reportLogMessages - return doc + runF (Pandoc <$> museMeta st <*> fmap B.toList (mconcat blocks)) st <$ reportLogMessages -- * Utility functions commonPrefix :: String -> String -> String -commonPrefix _ [] = [] -commonPrefix [] _ = [] -commonPrefix (x:xs) (y:ys) - | x == y = x : commonPrefix xs ys - | otherwise = [] +commonPrefix xs ys = map fst $ takeWhile (uncurry (==)) $ zip xs ys -- | Trim up to one newline from the beginning of the string. lchop :: String -> String @@ -159,12 +153,11 @@ dropSpacePrefix lns = where flns = filter (not . all (== ' ')) lns maxIndent = if null flns then maximum (map length lns) else length $ takeWhile (== ' ') $ foldl1 commonPrefix flns -atStart :: PandocMonad m => MuseParser m a -> MuseParser m a -atStart p = do +atStart :: PandocMonad m => MuseParser m () +atStart = do pos <- getPosition st <- getState guard $ museLastStrPos st /= Just pos - p firstColumn :: PandocMonad m => MuseParser m () firstColumn = getPosition >>= \pos -> guard (sourceColumn pos == 1) @@ -206,18 +199,16 @@ htmlAttrToPandoc attrs = (ident, classes, keyvals) where ident = fromMaybe "" $ lookup "id" attrs classes = maybe [] words $ lookup "class" attrs - keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] + keyvals = [(k,v) | (k,v) <- attrs, k /= "id", k /= "class"] parseHtmlContent :: PandocMonad m => String -- ^ Tag name -> MuseParser m (Attr, F Blocks) -parseHtmlContent tag = try $ do - indent <- getIndent - attr <- openTag tag - manyTill spaceChar eol - content <- parseBlocksTill $ try $ count indent spaceChar *> closeTag tag - manyTill spaceChar eol -- closing tag must be followed by optional whitespace and newline - return (htmlAttrToPandoc attr, content) +parseHtmlContent tag = try $ getIndent >>= \indent -> (,) + <$> fmap htmlAttrToPandoc (openTag tag) + <* manyTill spaceChar eol + <*> allowPara (parseBlocksTill (try $ indentWith indent *> closeTag tag)) + <* manyTill spaceChar eol -- closing tag must be followed by optional whitespace and newline -- ** Directive parsers @@ -250,6 +241,9 @@ directive = do -- ** Block parsers +allowPara :: MonadReader MuseEnv m => m a -> m a +allowPara p = local (\s -> s { museInPara = False }) p + -- | Parse section contents until EOF or next header parseBlocks :: PandocMonad m => MuseParser m (F Blocks) @@ -263,10 +257,9 @@ parseBlocks = nextSection = mempty <$ lookAhead headingStart parseEnd = mempty <$ eof blockStart = (B.<>) <$> (blockElements <|> emacsNoteBlock) - <*> parseBlocks - listStart = do - updateState (\st -> st { museInPara = False }) - uncurry (B.<>) <$> (anyListUntil parseBlocks <|> amuseNoteBlockUntil parseBlocks) + <*> allowPara parseBlocks + listStart = + uncurry (B.<>) <$> allowPara (anyListUntil parseBlocks <|> amuseNoteBlockUntil parseBlocks) paraStart = do indent <- length <$> many spaceChar uncurry (B.<>) . first (p indent) <$> paraUntil parseBlocks @@ -282,46 +275,36 @@ parseSection = parseBlocksTill :: PandocMonad m => MuseParser m a -> MuseParser m (F Blocks) -parseBlocksTill end = - try (parseEnd <|> - blockStart <|> - listStart <|> - paraStart) +parseBlocksTill end = continuation where parseEnd = mempty <$ end - blockStart = (B.<>) <$> blockElements <*> continuation - listStart = do - updateState (\st -> st { museInPara = False }) - uncurry (B.<>) <$> anyListUntil (parseEnd <|> continuation) + blockStart = (B.<>) <$> blockElements <*> allowPara continuation + listStart = uncurry (B.<>) <$> allowPara (anyListUntil (parseEnd <|> continuation)) paraStart = uncurry (B.<>) <$> paraUntil (parseEnd <|> continuation) - continuation = parseBlocksTill end + continuation = try $ parseEnd <|> blockStart <|> listStart <|> paraStart listItemContentsUntil :: PandocMonad m => Int -> MuseParser m a -> MuseParser m a -> MuseParser m (F Blocks, a) -listItemContentsUntil col pre end = - try blockStart <|> - try listStart <|> - try paraStart +listItemContentsUntil col pre end = p where + p = try blockStart <|> try listStart <|> try paraStart parsePre = (mempty,) <$> pre parseEnd = (mempty,) <$> end paraStart = do (f, (r, e)) <- paraUntil (parsePre <|> continuation <|> parseEnd) return (f B.<> r, e) blockStart = first <$> ((B.<>) <$> blockElements) - <*> (parsePre <|> continuation <|> parseEnd) + <*> allowPara (parsePre <|> continuation <|> parseEnd) listStart = do - updateState (\st -> st { museInPara = False }) - (f, (r, e)) <- anyListUntil (parsePre <|> continuation <|> parseEnd) + (f, (r, e)) <- allowPara $ anyListUntil (parsePre <|> continuation <|> parseEnd) return (f B.<> r, e) continuation = try $ do blank <- optionMaybe blankline skipMany blankline indentWith col - updateState (\st -> st { museInPara = museInPara st && isNothing blank }) - listItemContentsUntil col pre end + local (\s -> s { museInPara = museInPara s && isNothing blank }) p parseBlock :: PandocMonad m => MuseParser m (F Blocks) parseBlock = do @@ -331,25 +314,22 @@ parseBlock = do where para = fst <$> paraUntil (try (eof <|> void (lookAhead blockElements))) blockElements :: PandocMonad m => MuseParser m (F Blocks) -blockElements = do - updateState (\st -> st { museInPara = False }) - choice [ mempty <$ blankline - , comment - , separator - , example - , exampleTag - , literalTag - , centerTag - , rightTag - , quoteTag - , divTag - , biblioTag - , playTag - , verseTag - , lineBlock - , table - , commentTag - ] +blockElements = (mempty <$ blankline) + <|> comment + <|> separator + <|> example + <|> exampleTag + <|> literalTag + <|> centerTag + <|> rightTag + <|> quoteTag + <|> divTag + <|> biblioTag + <|> playTag + <|> verseTag + <|> lineBlock + <|> table + <|> commentTag -- | Parse a line comment, starting with @;@ in the first column. comment :: PandocMonad m => MuseParser m (F Blocks) @@ -445,9 +425,9 @@ divTag = do -- | Parse @\<biblio>@ tag, the result is the same as @\<div class="biblio">@. -- @\<biblio>@ tag is supported only in Text::Amuse mode. biblioTag :: PandocMonad m => MuseParser m (F Blocks) -biblioTag = do - guardEnabled Ext_amuse - fmap (B.divWith ("", ["biblio"], [])) . snd <$> parseHtmlContent "biblio" +biblioTag = fmap (B.divWith ("", ["biblio"], [])) . snd + <$ guardEnabled Ext_amuse + <*> parseHtmlContent "biblio" -- | Parse @\<play>@ tag, the result is the same as @\<div class="play">@. -- @\<play>@ tag is supported only in Text::Amuse mode. @@ -463,13 +443,11 @@ verseLine = (<>) -- | Parse @\<verse>@ tag. verseTag :: PandocMonad m => MuseParser m (F Blocks) -verseTag = try $ do - indent <- getIndent - openTag "verse" - manyTill spaceChar eol - content <- sequence <$> manyTill (count indent spaceChar *> verseLine) (try $ count indent spaceChar *> closeTag "verse") - manyTill spaceChar eol - return $ B.lineBlock <$> content +verseTag = try $ getIndent >>= \indent -> fmap B.lineBlock . sequence + <$ openTag "verse" + <* manyTill spaceChar eol + <*> manyTill (indentWith indent *> verseLine) (try $ indentWith indent *> closeTag "verse") + <* manyTill spaceChar eol -- | Parse @\<comment>@ tag. commentTag :: PandocMonad m => MuseParser m (F Blocks) @@ -483,19 +461,16 @@ commentTag = try $ mempty paraContentsUntil :: PandocMonad m => MuseParser m a -- ^ Terminator parser -> MuseParser m (F Inlines, a) -paraContentsUntil end = do - updateState (\st -> st { museInPara = True }) - (l, e) <- someUntil inline $ try (manyTill spaceChar eol *> end) - updateState (\st -> st { museInPara = False }) - return (trimInlinesF $ mconcat l, e) +paraContentsUntil end = first (trimInlinesF . mconcat) + <$> someUntil inline (try (manyTill spaceChar eol *> local (\s -> s { museInPara = True}) end)) -- | Parse a paragraph. paraUntil :: PandocMonad m => MuseParser m a -- ^ Terminator parser -> MuseParser m (F Blocks, a) paraUntil end = do - state <- getState - guard $ not $ museInPara state + inPara <- asks museInPara + guard $ not inPara first (fmap B.para) <$> paraContentsUntil end noteMarker :: PandocMonad m => MuseParser m String @@ -504,6 +479,17 @@ noteMarker = try $ (:) <*> oneOf "123456789" <*> manyTill digit (char ']') +addNote :: PandocMonad m + => String + -> SourcePos + -> F Blocks + -> MuseParser m () +addNote ref pos content = do + oldnotes <- museNotes <$> getState + when (M.member ref oldnotes) + (logMessage $ DuplicateNoteReference ref pos) + updateState $ \s -> s{ museNotes = M.insert ref (pos, content) oldnotes } + -- Amusewiki version of note -- Parsing is similar to list item, except that note marker is used instead of list marker amuseNoteBlockUntil :: PandocMonad m @@ -513,12 +499,8 @@ amuseNoteBlockUntil end = try $ do guardEnabled Ext_amuse ref <- noteMarker <* spaceChar pos <- getPosition - updateState (\st -> st { museInPara = False }) - (content, e) <- listItemContentsUntil (sourceColumn pos - 1) (fail "x") end - oldnotes <- museNotes <$> getState - when (M.member ref oldnotes) - (logMessage $ DuplicateNoteReference ref pos) - updateState $ \s -> s{ museNotes = M.insert ref (pos, content) oldnotes } + (content, e) <- allowPara $ listItemContentsUntil (sourceColumn pos - 1) (fail "x") end + addNote ref pos content return (mempty, e) -- Emacs version of note @@ -526,13 +508,10 @@ amuseNoteBlockUntil end = try $ do emacsNoteBlock :: PandocMonad m => MuseParser m (F Blocks) emacsNoteBlock = try $ do guardDisabled Ext_amuse - pos <- getPosition ref <- noteMarker <* skipSpaces - content <- mconcat <$> blocksTillNote - oldnotes <- museNotes <$> getState - when (M.member ref oldnotes) - (logMessage $ DuplicateNoteReference ref pos) - updateState $ \s -> s{ museNotes = M.insert ref (pos, content) oldnotes } + pos <- getPosition + content <- fmap mconcat blocksTillNote + addNote ref pos content return mempty where blocksTillNote = @@ -544,10 +523,8 @@ emacsNoteBlock = try $ do -- | Parse a line block indicated by @\'>\'@ characters. lineBlock :: PandocMonad m => MuseParser m (F Blocks) -lineBlock = try $ do - indent <- getIndent - lns <- (blankVerseLine <|> nonblankVerseLine) `sepBy1'` try (indentWith indent) - return $ B.lineBlock <$> sequence lns +lineBlock = try $ getIndent >>= \indent -> fmap B.lineBlock . sequence + <$> (blankVerseLine <|> nonblankVerseLine) `sepBy1'` try (indentWith indent) where blankVerseLine = try $ mempty <$ char '>' <* blankline nonblankVerseLine = try (string "> ") *> verseLine @@ -561,8 +538,7 @@ bulletListItemsUntil :: PandocMonad m bulletListItemsUntil indent end = try $ do char '-' void spaceChar <|> lookAhead eol - updateState (\st -> st { museInPara = False }) - (x, (xs, e)) <- listItemContentsUntil (indent + 2) (try (optional blankline *> indentWith indent *> bulletListItemsUntil indent end)) (([],) <$> end) + (x, (xs, e)) <- allowPara $ listItemContentsUntil (indent + 2) (try (optional blankline *> indentWith indent *> bulletListItemsUntil indent end)) (([],) <$> end) return (x:xs, e) -- | Parse a bullet list. @@ -598,8 +574,7 @@ orderedListItemsUntil indent style end = continuation = try $ do pos <- getPosition void spaceChar <|> lookAhead eol - updateState (\st -> st { museInPara = False }) - (x, (xs, e)) <- listItemContentsUntil (sourceColumn pos) (try (optional blankline *> indentWith indent *> museOrderedListMarker style *> continuation)) (([],) <$> end) + (x, (xs, e)) <- allowPara $ listItemContentsUntil (sourceColumn pos) (try (optional blankline *> indentWith indent *> museOrderedListMarker style *> continuation)) (([],) <$> end) return (x:xs, e) -- | Parse an ordered list. @@ -620,8 +595,7 @@ descriptionsUntil :: PandocMonad m -> MuseParser m ([F Blocks], a) descriptionsUntil indent end = do void spaceChar <|> lookAhead eol - updateState (\st -> st { museInPara = False }) - (x, (xs, e)) <- listItemContentsUntil indent (try (optional blankline *> indentWith indent *> manyTill spaceChar (string "::") *> descriptionsUntil indent end)) (([],) <$> end) + (x, (xs, e)) <- allowPara $ listItemContentsUntil indent (try (optional blankline *> indentWith indent *> manyTill spaceChar (string "::") *> descriptionsUntil indent end)) (([],) <$> end) return (x:xs, e) definitionListItemsUntil :: PandocMonad m @@ -686,12 +660,8 @@ museAppendElement element tbl = MuseFooterRow row -> tbl{ museTableFooters = row : museTableFooters tbl } MuseCaption inlines -> tbl{ museTableCaption = inlines } -tableCell :: PandocMonad m => MuseParser m (F Blocks) -tableCell = try $ fmap B.plain . trimInlinesF . mconcat <$> manyTill inline (lookAhead cellEnd) - where cellEnd = try $ void (many1 spaceChar *> char '|') <|> eol - tableElements :: PandocMonad m => MuseParser m (F [MuseTableElement]) -tableElements = sequence <$> (tableParseElement `sepEndBy1` eol) +tableElements = sequence <$> many1 tableParseElement elementsToTable :: [MuseTableElement] -> MuseTable elementsToTable = foldr museAppendElement emptyTable @@ -710,10 +680,10 @@ tableParseElement = tableParseHeader tableParseRow :: PandocMonad m => Int -- ^ Number of separator characters -> MuseParser m (F [Blocks]) -tableParseRow n = try $ - sequence <$> (tableCell `sepBy2` fieldSep) - where p `sepBy2` sep = (:) <$> p <*> many1 (sep *> p) - fieldSep = many1 spaceChar *> count n (char '|') *> (void (many1 spaceChar) <|> void (lookAhead newline)) +tableParseRow n = try $ sequence <$> tableCells + where tableCells = (:) <$> tableCell sep <*> (tableCells <|> fmap pure (tableCell eol)) + tableCell p = try $ fmap B.plain . trimInlinesF . mconcat <$> manyTill inline' p + sep = try $ many1 spaceChar *> count n (char '|') *> lookAhead (void (many1 spaceChar) <|> void eol) -- | Parse a table header row. tableParseHeader :: PandocMonad m => MuseParser m (F MuseTableElement) @@ -732,7 +702,7 @@ tableParseCaption :: PandocMonad m => MuseParser m (F MuseTableElement) tableParseCaption = try $ fmap MuseCaption . trimInlinesF . mconcat <$ many spaceChar <* string "|+" - <*> many1Till inline (try $ string "+|") + <*> many1Till inline (try $ string "+|" *> eol) -- ** Inline parsers @@ -803,24 +773,15 @@ whitespace = try $ pure B.space <$ skipMany1 spaceChar br :: PandocMonad m => MuseParser m (F Inlines) br = try $ pure B.linebreak <$ string "<br>" -emphasisBetween :: (PandocMonad m, Show a) => MuseParser m a -> MuseParser m (F Inlines) -emphasisBetween c = try $ enclosedInlines c c - --- | Parses material enclosed between start and end parsers. -enclosed :: (Show end, Stream s m Char) => ParserT s st m t -- ^ start parser - -> ParserT s st m end -- ^ end parser - -> ParserT s st m a -- ^ content parser (to be used repeatedly) - -> ParserT s st m [a] -enclosed start end parser = try $ - start *> notFollowedBy spaceChar *> many1Till parser end - -enclosedInlines :: (PandocMonad m, Show a, Show b) +emphasisBetween :: (PandocMonad m, Show a) => MuseParser m a - -> MuseParser m b -> MuseParser m (F Inlines) -enclosedInlines start end = try $ trimInlinesF . mconcat - <$> enclosed (atStart start) end inline - <* notFollowedBy (satisfy isAlphaNum) +emphasisBetween p = try $ trimInlinesF . mconcat + <$ atStart + <* p + <* notFollowedBy spaceChar + <*> many1Till inline p + <* notFollowedBy alphaNum -- | Parse an inline tag, such as @\<em>@ and @\<strong>@. inlineTag :: PandocMonad m @@ -875,8 +836,7 @@ verbatimTag = return . B.text classTag :: PandocMonad m => MuseParser m (F Inlines) classTag = do classes <- maybe [] words . lookup "name" <$> openTag "class" - res <- manyTill inline $ closeTag "class" - return $ B.spanWith ("", classes, []) <$> mconcat res + fmap (B.spanWith ("", classes, [])) . mconcat <$> manyTill inline (closeTag "class") -- | Parse "~~" as nonbreaking space. nbsp :: PandocMonad m => MuseParser m (F Inlines) @@ -884,14 +844,12 @@ nbsp = try $ pure (B.str "\160") <$ string "~~" -- | Parse code markup, indicated by @\'=\'@ characters. code :: PandocMonad m => MuseParser m (F Inlines) -code = try $ do - atStart $ char '=' - contents <- many1Till (noneOf "\n\r" <|> (newline <* notFollowedBy newline)) $ char '=' - guard $ not $ null contents - guard $ head contents `notElem` " \t\n" - guard $ last contents `notElem` " \t\n" - notFollowedBy $ satisfy isAlphaNum - return $ return $ B.code contents +code = try $ fmap pure $ B.code . uncurry (++) + <$ atStart + <* char '=' + <* notFollowedBy (spaceChar <|> newline) + <*> manyUntil (noneOf "\n\r" <|> (newline <* notFollowedBy newline)) (try $ fmap pure $ noneOf " \t\n\r=" <* char '=') + <* notFollowedBy alphaNum -- | Parse @\<code>@ tag. codeTag :: PandocMonad m => MuseParser m (F Inlines) @@ -916,24 +874,24 @@ str :: PandocMonad m => MuseParser m (F Inlines) str = return . B.str <$> many1 alphaNum <* updateLastStrPos symbol :: PandocMonad m => MuseParser m (F Inlines) -symbol = return . B.str <$> count 1 nonspaceChar +symbol = pure . B.str . pure <$> nonspaceChar -- | Parse a link or image. linkOrImage :: PandocMonad m => MuseParser m (F Inlines) linkOrImage = try $ do inLink <- asks museInLink guard $ not inLink - local (\s -> s { museInLink = True }) (explicitLink <|> image <|> link) + local (\s -> s { museInLink = True }) (link "URL:" <|> image <|> link "") linkContent :: PandocMonad m => MuseParser m (F Inlines) linkContent = trimInlinesF . mconcat <$ char '[' <*> manyTill inline (char ']') --- | Parse a link starting with @URL:@ -explicitLink :: PandocMonad m => MuseParser m (F Inlines) -explicitLink = try $ do - string "[[URL:" +-- | Parse a link starting with (possibly null) prefix +link :: PandocMonad m => String -> MuseParser m (F Inlines) +link prefix = try $ do + string $ "[[" ++ prefix url <- manyTill anyChar $ char ']' content <- option (pure $ B.str url) linkContent char ']' @@ -966,11 +924,3 @@ image = try $ do <*> optionMaybe (many1 digit) <* many spaceChar <*> optionMaybe (oneOf "rlf") - -link :: PandocMonad m => MuseParser m (F Inlines) -link = try $ do - string "[[" - url <- manyTill anyChar $ char ']' - content <- optionMaybe linkContent - char ']' - return $ B.link url "" <$> fromMaybe (return $ B.str url) content |