diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Text/Pandoc/Readers.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Muse.hs | 577 |
2 files changed, 580 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs index 004fefe25..4c95d5d28 100644 --- a/src/Text/Pandoc/Readers.hs +++ b/src/Text/Pandoc/Readers.hs @@ -59,6 +59,7 @@ module Text.Pandoc.Readers , readTWiki , readTxt2Tags , readEPUB + , readMuse -- * Miscellaneous , getReader , getDefaultExtensions @@ -81,6 +82,7 @@ import Text.Pandoc.Readers.HTML import Text.Pandoc.Readers.LaTeX import Text.Pandoc.Readers.Markdown import Text.Pandoc.Readers.MediaWiki +import Text.Pandoc.Readers.Muse import Text.Pandoc.Readers.Native import Text.Pandoc.Readers.Odt import Text.Pandoc.Readers.OPML @@ -125,6 +127,7 @@ readers = [ ("native" , TextReader readNative) ,("odt" , ByteStringReader readOdt) ,("t2t" , TextReader readTxt2Tags) ,("epub" , ByteStringReader readEPUB) + ,("muse" , TextReader readMuse) ] -- | Retrieve reader based on formatSpec (format+extensions). diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs new file mode 100644 index 000000000..bc9da26cb --- /dev/null +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -0,0 +1,577 @@ +{- + Copyright (C) 2017 Alexander Krotov <ilabdsf@gmail.com> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Muse + Copyright : Copyright (C) 2017 Alexander Krotov + License : GNU GPL, version 2 or above + + Maintainer : Alexander Krotov <ilabdsf@gmail.com> + Stability : alpha + Portability : portable + +Conversion of Muse text to 'Pandoc' document. +-} +{- +TODO: +- {{{ }}} syntax for <example> +- Page breaks (five "*") +- Headings with anchors (make it round trip with Muse writer) +- <verse> and ">" +- Definition lists +- Org tables +- table.el tables +- Images with attributes (floating and width) +- Anchors +- Citations and <biblio> +- <play> environment +- <verbatim> tag +-} +module Text.Pandoc.Readers.Muse (readMuse) where + +import Control.Monad +import Control.Monad.Except (throwError) +import qualified Data.Map as M +import Data.Text (Text, unpack) +import Data.List (stripPrefix) +import Data.Maybe (fromMaybe) +import Text.HTML.TagSoup +import Text.Pandoc.Builder (Blocks, Inlines) +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Definition +import Text.Pandoc.Logging +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (macro, nested) +import Text.Pandoc.Readers.HTML (htmlTag) +import Text.Pandoc.XML (fromEntities) +import System.FilePath (takeExtension) + +-- | Read Muse from an input string and return a Pandoc document. +readMuse :: PandocMonad m + => ReaderOptions + -> Text + -> m Pandoc +readMuse opts s = do + res <- readWithM parseMuse def{ stateOptions = opts } (unpack s) + case res of + Left e -> throwError e + Right d -> return d + +type MuseParser = ParserT String ParserState + +-- +-- main parser +-- + +parseMuse :: PandocMonad m => MuseParser m Pandoc +parseMuse = do + many directive + blocks <- parseBlocks + st <- getState + let doc = runF (do Pandoc _ bs <- B.doc <$> blocks + meta <- stateMeta' st + return $ Pandoc meta bs) st + reportLogMessages + return doc + +parseBlocks :: PandocMonad m => MuseParser m (F Blocks) +parseBlocks = do + res <- mconcat <$> many block + spaces + eof + return res + +-- +-- utility functions +-- + +nested :: PandocMonad m => MuseParser m a -> MuseParser m a +nested p = do + nestlevel <- stateMaxNestingLevel <$> getState + guard $ nestlevel > 0 + updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 } + res <- p + updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } + return res + +htmlElement :: PandocMonad m => String -> MuseParser m (Attr, String) +htmlElement tag = try $ do + (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag []) + content <- manyTill anyChar (endtag <|> endofinput) + return (htmlAttrToPandoc attr, trim content) + where + endtag = void $ htmlTag (~== TagClose tag) + endofinput = lookAhead $ try $ skipMany blankline >> skipSpaces >> eof + trim = dropWhile (=='\n') . reverse . dropWhile (=='\n') . reverse + +htmlAttrToPandoc :: [Attribute String] -> Attr +htmlAttrToPandoc attrs = (ident, classes, keyvals) + where + ident = fromMaybe "" $ lookup "id" attrs + classes = maybe [] words $ lookup "class" attrs + keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] + +parseHtmlContentWithAttrs :: PandocMonad m + => String -> MuseParser m a -> MuseParser m (Attr, [a]) +parseHtmlContentWithAttrs tag parser = do + (attr, content) <- htmlElement tag + parsedContent <- try $ parseContent content + return (attr, parsedContent) + where + parseContent = parseFromString $ nested $ manyTill parser endOfContent + endOfContent = try $ skipMany blankline >> skipSpaces >> eof + +parseHtmlContent :: PandocMonad m => String -> MuseParser m a -> MuseParser m [a] +parseHtmlContent tag p = liftM snd (parseHtmlContentWithAttrs tag p) + +-- +-- directive parsers +-- + +parseDirective :: PandocMonad m => MuseParser m (String, F Inlines) +parseDirective = do + char '#' + key <- many letter + space + spaces + raw <- many $ noneOf "\n" + newline + value <- parseFromString (trimInlinesF . mconcat <$> many inline) raw + return (key, value) + +directive :: PandocMonad m => MuseParser m () +directive = do + (key, value) <- parseDirective + updateState $ \st -> st { stateMeta' = B.setMeta key <$> value <*> stateMeta' st } + +-- +-- block parsers +-- + +block :: PandocMonad m => MuseParser m (F Blocks) +block = do + pos <- getPosition + res <- mempty <$ skipMany1 blankline + <|> blockElements + <|> para + skipMany blankline + report $ ParsingTrace (take 60 $ show $ B.toList $ runF res defaultParserState) pos + return res + +blockElements :: PandocMonad m => MuseParser m (F Blocks) +blockElements = choice [ comment + , separator + , header + , exampleTag + , literal + , centerTag + , rightTag + , quoteTag + , bulletList + , orderedList + , table + , commentTag + , noteBlock + ] + +comment :: PandocMonad m => MuseParser m (F Blocks) +comment = try $ do + char ';' + space + many $ noneOf "\n" + void newline <|> eof + return mempty + +separator :: PandocMonad m => MuseParser m (F Blocks) +separator = try $ do + string "---" + newline + return $ return B.horizontalRule + +header :: PandocMonad m => MuseParser m (F Blocks) +header = try $ do + level <- liftM length $ many1 $ char '*' + guard $ level <= 5 + skipSpaces + content <- trimInlinesF . mconcat <$> manyTill inline newline + attr <- registerHeader ("", [], []) (runF content defaultParserState) + return $ B.headerWith attr level <$> content + +exampleTag :: PandocMonad m => MuseParser m (F Blocks) +exampleTag = liftM (return . uncurry B.codeBlockWith) $ htmlElement "example" + +literal :: PandocMonad m => MuseParser m (F Blocks) +literal = liftM (return . rawBlock) $ htmlElement "literal" + where + format (_, _, kvs) = fromMaybe "html" $ lookup "format" kvs + rawBlock (attrs, content) = B.rawBlock (format attrs) content + +blockTag :: PandocMonad m + => (Blocks -> Blocks) + -> String + -> MuseParser m (F Blocks) +blockTag f s = do + res <- parseHtmlContent s block + return $ f <$> mconcat res + +-- <center> tag is ignored +centerTag :: PandocMonad m => MuseParser m (F Blocks) +centerTag = blockTag id "center" + +-- <right> tag is ignored +rightTag :: PandocMonad m => MuseParser m (F Blocks) +rightTag = blockTag id "right" + +quoteTag :: PandocMonad m => MuseParser m (F Blocks) +quoteTag = blockTag B.blockQuote "quote" + +commentTag :: PandocMonad m => MuseParser m (F Blocks) +commentTag = parseHtmlContent "comment" block >> return mempty + +para :: PandocMonad m => MuseParser m (F Blocks) +para = do + res <- trimInlinesF . mconcat <$> many1Till inline endOfParaElement + return $ B.para <$> res + where + endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement + endOfInput = try $ skipMany blankline >> skipSpaces >> eof + endOfPara = try $ blankline >> skipMany1 blankline + newBlockElement = try $ blankline >> void blockElements + +noteMarker :: PandocMonad m => MuseParser m String +noteMarker = try $ do + char '[' + many1Till digit $ char ']' + +noteBlock :: PandocMonad m => MuseParser m (F Blocks) +noteBlock = try $ do + pos <- getPosition + ref <- noteMarker <* skipSpaces + content <- mconcat <$> blocksTillNote + oldnotes <- stateNotes' <$> getState + case M.lookup ref oldnotes of + Just _ -> logMessage $ DuplicateNoteReference ref pos + Nothing -> return () + updateState $ \s -> s{ stateNotes' = M.insert ref (pos, content) oldnotes } + return mempty + where + blocksTillNote = + many1Till block (eof <|> () <$ lookAhead noteMarker) + +-- +-- lists +-- + +listLine :: PandocMonad m => Int -> MuseParser m String +listLine markerLength = try $ do + notFollowedBy blankline + indentWith markerLength + anyLineNewline + +withListContext :: PandocMonad m => MuseParser m a -> MuseParser m a +withListContext p = do + state <- getState + let oldContext = stateParserContext state + setState $ state { stateParserContext = ListItemState } + parsed <- p + updateState (\st -> st {stateParserContext = oldContext}) + return parsed + +listContinuation :: PandocMonad m => Int -> MuseParser m String +listContinuation markerLength = try $ do + result <- many1 $ listLine markerLength + blanks <- many1 blankline + return $ concat result ++ blanks + +listStart :: PandocMonad m => MuseParser m Int -> MuseParser m Int +listStart marker = try $ do + preWhitespace <- length <$> many spaceChar + st <- stateParserContext <$> getState + getPosition >>= \pos -> guard (st == ListItemState || sourceColumn pos /= 1) + markerLength <- marker + postWhitespace <- length <$> many1 spaceChar + return $ preWhitespace + markerLength + postWhitespace + +listItem :: PandocMonad m => MuseParser m Int -> MuseParser m (F Blocks) +listItem start = try $ do + markerLength <- start + firstLine <- anyLineNewline + blank <- option "" ("\n" <$ blankline) + restLines <- many $ listLine markerLength + let first = firstLine ++ blank ++ concat restLines + rest <- many $ listContinuation markerLength + parseFromString (withListContext parseBlocks) $ concat (first:rest) ++ "\n" + +bulletListItems :: PandocMonad m => MuseParser m (F [Blocks]) +bulletListItems = sequence <$> many1 (listItem bulletListStart) + +bulletListStart :: PandocMonad m => MuseParser m Int +bulletListStart = listStart (char '-' >> return 1) + +bulletList :: PandocMonad m => MuseParser m (F Blocks) +bulletList = do + listItems <- bulletListItems + return $ B.bulletList <$> listItems + +orderedListStart :: PandocMonad m + => ListNumberStyle + -> ListNumberDelim + -> MuseParser m Int +orderedListStart style delim = listStart (snd <$> withHorizDisplacement (orderedListMarker style delim)) + +orderedList :: PandocMonad m => MuseParser m (F Blocks) +orderedList = try $ do + p@(_, style, delim) <- lookAhead (many spaceChar *> anyOrderedListMarker <* spaceChar) + guard $ style `elem` [Decimal, LowerAlpha, UpperAlpha, LowerRoman, UpperRoman] + guard $ delim == Period + items <- sequence <$> many1 (listItem $ orderedListStart style delim) + return $ B.orderedListWith p <$> items + +-- +-- tables +-- + +data MuseTable = MuseTable + { museTableCaption :: Inlines + , museTableHeaders :: [[Blocks]] + , museTableRows :: [[Blocks]] + , museTableFooters :: [[Blocks]] + } + +data MuseTableElement = MuseHeaderRow (F [Blocks]) + | MuseBodyRow (F [Blocks]) + | MuseFooterRow (F [Blocks]) + | MuseCaption (F Inlines) + +museToPandocTable :: MuseTable -> Blocks +museToPandocTable (MuseTable caption headers body footers) = + B.table caption attrs headRow rows + where ncol = maximum (0 : map length (headers ++ body ++ footers)) + attrs = replicate ncol (AlignDefault, 0.0) + headRow = if null headers then [] else head headers + rows = (if null headers then [] else tail headers) ++ body ++ footers + +museAppendElement :: MuseTable + -> MuseTableElement + -> F MuseTable +museAppendElement tbl element = + case element of + MuseHeaderRow row -> do + row' <- row + return tbl{ museTableHeaders = museTableHeaders tbl ++ [row'] } + MuseBodyRow row -> do + row' <- row + return tbl{ museTableRows = museTableRows tbl ++ [row'] } + MuseFooterRow row-> do + row' <- row + return tbl{ museTableFooters = museTableFooters tbl ++ [row'] } + MuseCaption inlines -> do + inlines' <- inlines + return tbl{ museTableCaption = inlines' } + +tableCell :: PandocMonad m => MuseParser m (F Blocks) +tableCell = try $ do + content <- trimInlinesF . mconcat <$> manyTill inline (lookAhead cellEnd) + return $ B.plain <$> content + where cellEnd = try $ void (many1 spaceChar >> char '|') <|> void newline <|> eof + +tableElements :: PandocMonad m => MuseParser m [MuseTableElement] +tableElements = tableParseElement `sepEndBy1` (void newline <|> eof) + +elementsToTable :: [MuseTableElement] -> F MuseTable +elementsToTable = foldM museAppendElement emptyTable + where emptyTable = MuseTable mempty mempty mempty mempty + +table :: PandocMonad m => MuseParser m (F Blocks) +table = try $ do + rows <- tableElements + let tbl = elementsToTable rows + let pandocTbl = museToPandocTable <$> tbl :: F Blocks + return pandocTbl + +tableParseElement :: PandocMonad m => MuseParser m MuseTableElement +tableParseElement = tableParseHeader + <|> tableParseBody + <|> tableParseFooter + <|> tableParseCaption + +tableParseRow :: PandocMonad m => Int -> MuseParser m (F [Blocks]) +tableParseRow n = try $ do + fields <- tableCell `sepBy2` fieldSep + return $ sequence fields + where p `sepBy2` sep = (:) <$> p <*> many1 (sep >> p) + fieldSep = many1 spaceChar >> count n (char '|') >> (void (many1 spaceChar) <|> void (lookAhead newline)) + +tableParseHeader :: PandocMonad m => MuseParser m MuseTableElement +tableParseHeader = MuseHeaderRow <$> tableParseRow 2 + +tableParseBody :: PandocMonad m => MuseParser m MuseTableElement +tableParseBody = MuseBodyRow <$> tableParseRow 1 + +tableParseFooter :: PandocMonad m => MuseParser m MuseTableElement +tableParseFooter = MuseFooterRow <$> tableParseRow 3 + +tableParseCaption :: PandocMonad m => MuseParser m MuseTableElement +tableParseCaption = try $ do + many spaceChar + string "|+" + contents <- trimInlinesF . mconcat <$> many1Till inline (lookAhead $ string "+|") + string "+|" + return $ MuseCaption contents + +-- +-- inline parsers +-- + +inline :: PandocMonad m => MuseParser m (F Inlines) +inline = choice [ whitespace + , br + , footnote + , strong + , strongTag + , emph + , emphTag + , superscriptTag + , subscriptTag + , strikeoutTag + , link + , code + , codeTag + , str + , symbol + ] <?> "inline" + +footnote :: PandocMonad m => MuseParser m (F Inlines) +footnote = try $ do + ref <- noteMarker + return $ do + notes <- asksF stateNotes' + case M.lookup ref notes of + Nothing -> return $ B.str $ "[" ++ ref ++ "]" + Just (_pos, contents) -> do + st <- askF + let contents' = runF contents st { stateNotes' = M.empty } + return $ B.note contents' + +whitespace :: PandocMonad m => MuseParser m (F Inlines) +whitespace = liftM return (lb <|> regsp) + where lb = try $ skipMany spaceChar >> linebreak >> return B.space + regsp = try $ skipMany1 spaceChar >> return B.space + +br :: PandocMonad m => MuseParser m (F Inlines) +br = try $ do + string "<br>" + return $ return B.linebreak + +linebreak :: PandocMonad m => MuseParser m (F Inlines) +linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline) + where lastNewline = do + eof + return $ return mempty + innerNewline = return $ return B.space + +emphasisBetween :: (PandocMonad m, Show a) => MuseParser m a -> MuseParser m (F Inlines) +emphasisBetween c = try $ enclosedInlines c c + +enclosedInlines :: (PandocMonad m, Show a, Show b) + => MuseParser m a + -> MuseParser m b + -> MuseParser m (F Inlines) +enclosedInlines start end = try $ + trimInlinesF . mconcat <$> enclosed start end inline + +verbatimBetween :: PandocMonad m + => Char + -> MuseParser m String +verbatimBetween c = try $ do + char c + many1Till anyChar $ char c + +inlineTag :: PandocMonad m + => (Inlines -> Inlines) + -> String + -> MuseParser m (F Inlines) +inlineTag f s = do + res <- parseHtmlContent s inline + return $ f <$> mconcat res + +strongTag :: PandocMonad m => MuseParser m (F Inlines) +strongTag = inlineTag B.strong "strong" + +strong :: PandocMonad m => MuseParser m (F Inlines) +strong = fmap B.strong <$> emphasisBetween (string "**") + +emph :: PandocMonad m => MuseParser m (F Inlines) +emph = fmap B.emph <$> emphasisBetween (char '*') + +emphTag :: PandocMonad m => MuseParser m (F Inlines) +emphTag = inlineTag B.emph "em" + +superscriptTag :: PandocMonad m => MuseParser m (F Inlines) +superscriptTag = inlineTag B.superscript "sup" + +subscriptTag :: PandocMonad m => MuseParser m (F Inlines) +subscriptTag = inlineTag B.subscript "sub" + +strikeoutTag :: PandocMonad m => MuseParser m (F Inlines) +strikeoutTag = inlineTag B.strikeout "del" + +code :: PandocMonad m => MuseParser m (F Inlines) +code = return . B.code <$> verbatimBetween '=' + +codeTag :: PandocMonad m => MuseParser m (F Inlines) +codeTag = do + (attrs, content) <- parseHtmlContentWithAttrs "code" anyChar + return $ return $ B.codeWith attrs $ fromEntities content + +str :: PandocMonad m => MuseParser m (F Inlines) +str = liftM (return . B.str) (many1 alphaNum <|> count 1 characterReference) + +symbol :: PandocMonad m => MuseParser m (F Inlines) +symbol = liftM (return . B.str) $ count 1 nonspaceChar + +link :: PandocMonad m => MuseParser m (F Inlines) +link = try $ do + st <- getState + guard $ stateAllowLinks st + setState $ st{ stateAllowLinks = False } + (url, title, content) <- linkText + setState $ st{ stateAllowLinks = True } + return $ case stripPrefix "URL:" url of + Nothing -> if isImageUrl url + then B.image url title <$> fromMaybe (return mempty) content + else B.link url title <$> fromMaybe (return $ B.str url) content + Just url' -> B.link url' title <$> fromMaybe (return $ B.str url') content + where -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el + imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"] + isImageUrl = (`elem` imageExtensions) . takeExtension + +linkContent :: PandocMonad m => MuseParser m (F Inlines) +linkContent = do + char '[' + res <- many1Till anyChar $ char ']' + parseFromString (mconcat <$> many1 inline) res + +linkText :: PandocMonad m => MuseParser m (String, String, Maybe (F Inlines)) +linkText = do + string "[[" + url <- many1Till anyChar $ char ']' + content <- optionMaybe linkContent + char ']' + return (url, "", content) |