aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Muse.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/Muse.hs')
-rw-r--r--src/Text/Pandoc/Readers/Muse.hs144
1 files changed, 75 insertions, 69 deletions
diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs
index b8cbe2f26..4ade61294 100644
--- a/src/Text/Pandoc/Readers/Muse.hs
+++ b/src/Text/Pandoc/Readers/Muse.hs
@@ -1,6 +1,7 @@
{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Muse
Copyright : Copyright (C) 2017-2019 Alexander Krotov
@@ -24,12 +25,12 @@ import Control.Monad.Reader
import Control.Monad.Except (throwError)
import Data.Bifunctor
import Data.Default
-import Data.List (intercalate, transpose, uncons)
-import Data.List.Split (splitOn)
+import Data.List (transpose, uncons)
import qualified Data.Map as M
import qualified Data.Set as Set
import Data.Maybe (fromMaybe, isNothing, maybeToList)
-import Data.Text (Text, unpack)
+import Data.Text (Text)
+import qualified Data.Text as T
import Text.Pandoc.Builder (Blocks, Inlines)
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad (..))
@@ -38,7 +39,7 @@ import Text.Pandoc.Error (PandocError (PandocParsecError))
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (F)
-import Text.Pandoc.Shared (crFilter, trimr, underlineSpan)
+import Text.Pandoc.Shared (crFilter, trimr, underlineSpan, tshow)
-- | Read Muse from an input string and return a Pandoc document.
readMuse :: PandocMonad m
@@ -49,18 +50,18 @@ readMuse opts s = do
let input = crFilter s
res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts } "source" input
case res of
- Left e -> throwError $ PandocParsecError (unpack input) e
+ Left e -> throwError $ PandocParsecError input e
Right d -> return d
type F = Future MuseState
data MuseState = MuseState { museMeta :: F Meta -- ^ Document metadata
, museOptions :: ReaderOptions
- , museIdentifierList :: Set.Set String
+ , museIdentifierList :: Set.Set Text
, museLastSpacePos :: Maybe SourcePos -- ^ Position after last space or newline parsed
, museLastStrPos :: Maybe SourcePos -- ^ Position after last str parsed
, museLogMessages :: [LogMessage]
- , museNotes :: M.Map String (SourcePos, F Blocks)
+ , museNotes :: M.Map Text (SourcePos, F Blocks)
}
instance Default MuseState where
@@ -116,22 +117,27 @@ parseMuse = do
-- * Utility functions
-- | Trim up to one newline from the beginning of the string.
-lchop :: String -> String
-lchop ('\n':xs) = xs
-lchop s = s
+lchop :: Text -> Text
+lchop s = case T.uncons s of
+ Just ('\n', xs) -> xs
+ _ -> s
-- | Trim up to one newline from the end of the string.
-rchop :: String -> String
-rchop = reverse . lchop . reverse
+rchop :: Text -> Text
+rchop s = case T.unsnoc s of
+ Just (xs, '\n') -> xs
+ _ -> s
-unindent :: String -> String
-unindent = rchop . intercalate "\n" . dropSpacePrefix . splitOn "\n" . lchop
+unindent :: Text -> Text
+unindent = rchop . T.intercalate "\n" . dropSpacePrefix . T.splitOn "\n" . lchop
-dropSpacePrefix :: [String] -> [String]
-dropSpacePrefix lns = drop maxIndent <$> lns
+dropSpacePrefix :: [Text] -> [Text]
+dropSpacePrefix lns = T.drop maxIndent <$> lns
where isSpaceChar c = c == ' ' || c == '\t'
- maxIndent = length $ takeWhile (isSpaceChar . head) $ takeWhile same $ transpose lns
- same = and . (zipWith (==) <*> drop 1)
+ maxIndent = length $ takeWhile (isSpaceChar . T.head) $ takeWhile same $ T.transpose lns
+ same t = case T.uncons t of
+ Just (c, cs) -> T.all (== c) cs
+ Nothing -> True
atStart :: PandocMonad m => MuseParser m ()
atStart = do
@@ -160,29 +166,29 @@ getIndent = subtract 1 . sourceColumn <$ many spaceChar <*> getPosition
-- ** HTML parsers
-openTag :: PandocMonad m => String -> MuseParser m [(String, String)]
+openTag :: PandocMonad m => Text -> MuseParser m [(Text, Text)]
openTag tag = try $
- char '<' *> string tag *> manyTill attr (char '>')
+ char '<' *> textStr tag *> manyTill attr (char '>')
where
attr = try $ (,)
<$ many1 spaceChar
- <*> many1 (noneOf "=\n")
+ <*> many1Char (noneOf "=\n")
<* string "=\""
- <*> manyTill (noneOf "\"") (char '"')
+ <*> manyTillChar (noneOf "\"") (char '"')
-closeTag :: PandocMonad m => String -> MuseParser m ()
-closeTag tag = try $ string "</" *> string tag *> void (char '>')
+closeTag :: PandocMonad m => Text -> MuseParser m ()
+closeTag tag = try $ string "</" *> textStr tag *> void (char '>')
-- | Convert HTML attributes to Pandoc 'Attr'
-htmlAttrToPandoc :: [(String, String)] -> Attr
+htmlAttrToPandoc :: [(Text, Text)] -> Attr
htmlAttrToPandoc attrs = (ident, classes, keyvals)
where
ident = fromMaybe "" $ lookup "id" attrs
- classes = maybe [] words $ lookup "class" attrs
+ classes = maybe [] T.words $ lookup "class" attrs
keyvals = [(k,v) | (k,v) <- attrs, k /= "id", k /= "class"]
parseHtmlContent :: PandocMonad m
- => String -- ^ Tag name
+ => Text -- ^ Tag name
-> MuseParser m (Attr, F Blocks)
parseHtmlContent tag = try $ getIndent >>= \indent -> (,)
<$> fmap htmlAttrToPandoc (openTag tag)
@@ -193,16 +199,16 @@ parseHtmlContent tag = try $ getIndent >>= \indent -> (,)
-- ** Directive parsers
-- While not documented, Emacs Muse allows "-" in directive name
-parseDirectiveKey :: PandocMonad m => MuseParser m String
-parseDirectiveKey = char '#' *> many (letter <|> char '-')
+parseDirectiveKey :: PandocMonad m => MuseParser m Text
+parseDirectiveKey = char '#' *> manyChar (letter <|> char '-')
-parseEmacsDirective :: PandocMonad m => MuseParser m (String, F Inlines)
+parseEmacsDirective :: PandocMonad m => MuseParser m (Text, F Inlines)
parseEmacsDirective = (,)
<$> parseDirectiveKey
<* spaceChar
<*> (trimInlinesF . mconcat <$> manyTill inline' eol)
-parseAmuseDirective :: PandocMonad m => MuseParser m (String, F Inlines)
+parseAmuseDirective :: PandocMonad m => MuseParser m (Text, F Inlines)
parseAmuseDirective = (,)
<$> parseDirectiveKey
<* many1 spaceChar
@@ -289,7 +295,7 @@ listItemContentsUntil col pre end = p
parseBlock :: PandocMonad m => MuseParser m (F Blocks)
parseBlock = do
res <- blockElements <|> para
- trace (take 60 $ show $ B.toList $ runF res def)
+ trace (T.take 60 $ tshow $ B.toList $ runF res def)
return res
where para = fst <$> paraUntil (try (eof <|> void (lookAhead blockElements)))
@@ -337,7 +343,7 @@ pagebreak = try $ pure (B.divWith ("", [], [("style", "page-break-before: always
<* string "* * * * *"
<* manyTill spaceChar eol
-headingStart :: PandocMonad m => MuseParser m (String, Int)
+headingStart :: PandocMonad m => MuseParser m (Text, Int)
headingStart = try $ (,)
<$> option "" (try (parseAnchor <* manyTill spaceChar eol))
<* firstColumn
@@ -371,14 +377,14 @@ example :: PandocMonad m => MuseParser m (F Blocks)
example = try $ pure . B.codeBlock
<$ string "{{{"
<* many spaceChar
- <*> (unindent <$> manyTill anyChar (string "}}}"))
+ <*> (unindent <$> manyTillChar anyChar (string "}}}"))
-- | Parse an @\<example>@ tag.
exampleTag :: PandocMonad m => MuseParser m (F Blocks)
exampleTag = try $ fmap pure $ B.codeBlockWith
<$ many spaceChar
<*> (htmlAttrToPandoc <$> openTag "example")
- <*> (unindent <$> manyTill anyChar (closeTag "example"))
+ <*> (unindent <$> manyTillChar anyChar (closeTag "example"))
<* manyTill spaceChar eol
-- | Parse a @\<literal>@ tag as a raw block.
@@ -388,7 +394,7 @@ literalTag = try $ fmap pure $ B.rawBlock
<$ many spaceChar
<*> (fromMaybe "html" . lookup "style" <$> openTag "literal") -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML
<* manyTill spaceChar eol
- <*> (unindent <$> manyTill anyChar (closeTag "literal"))
+ <*> (unindent <$> manyTillChar anyChar (closeTag "literal"))
<* manyTill spaceChar eol
-- | Parse @\<center>@ tag.
@@ -428,7 +434,7 @@ playTag = do
verseLine :: PandocMonad m => MuseParser m (F Inlines)
verseLine = (<>)
- <$> fmap pure (option mempty (B.str <$> many1 ('\160' <$ char ' ')))
+ <$> fmap pure (option mempty (B.str <$> many1Char ('\160' <$ char ' ')))
<*> fmap (trimInlinesF . mconcat) (manyTill inline' eol)
-- | Parse @\<verse>@ tag.
@@ -466,17 +472,17 @@ paraUntil end = do
noteMarker' :: PandocMonad m
=> Char
-> Char
- -> MuseParser m String
-noteMarker' l r = try $ (\x y -> l:x:y ++ [r])
+ -> MuseParser m Text
+noteMarker' l r = try $ (\x y -> T.pack $ l:x:y ++ [r])
<$ char l
<*> oneOf "123456789"
<*> manyTill digit (char r)
-noteMarker :: PandocMonad m => MuseParser m String
+noteMarker :: PandocMonad m => MuseParser m Text
noteMarker = noteMarker' '[' ']' <|> noteMarker' '{' '}'
addNote :: PandocMonad m
- => String
+ => Text
-> SourcePos
-> F Blocks
-> MuseParser m ()
@@ -674,15 +680,15 @@ museGridTableRow :: PandocMonad m
-> MuseParser m (F [Blocks])
museGridTableRow indent indices = try $ do
lns <- many1 $ try (indentWith indent *> museGridTableRawLine indices)
- let cols = map (unlines . map trimr) $ transpose lns
+ let cols = map (T.unlines . map trimr) $ transpose lns
indentWith indent *> museGridTableHeader
sequence <$> mapM (parseFromString' parseBlocks) cols
museGridTableRawLine :: PandocMonad m
=> [Int]
- -> MuseParser m [String]
+ -> MuseParser m [Text]
museGridTableRawLine indices =
- char '|' *> forM indices (\n -> count n anyChar <* char '|') <* manyTill spaceChar eol
+ char '|' *> forM indices (\n -> countChar n anyChar <* char '|') <* manyTill spaceChar eol
museGridTable :: PandocMonad m => MuseParser m (F Blocks)
museGridTable = try $ do
@@ -767,12 +773,12 @@ inline = endline <|> inline'
endline :: PandocMonad m => MuseParser m (F Inlines)
endline = try $ pure B.softbreak <$ newline <* notFollowedBy blankline <* updateLastSpacePos
-parseAnchor :: PandocMonad m => MuseParser m String
-parseAnchor = try $ (:)
+parseAnchor :: PandocMonad m => MuseParser m Text
+parseAnchor = try $ T.cons
<$ firstColumn
<* char '#'
<*> letter
- <*> many (letter <|> digit <|> char '-')
+ <*> manyChar (letter <|> digit <|> char '-')
anchor :: PandocMonad m => MuseParser m (F Inlines)
anchor = try $ do
@@ -813,7 +819,7 @@ emphasisBetween p = try $ trimInlinesF . mconcat
-- | Parse an inline tag, such as @\<em>@ and @\<strong>@.
inlineTag :: PandocMonad m
- => String -- ^ Tag name
+ => Text -- ^ Tag name
-> MuseParser m (F Inlines)
inlineTag tag = try $ mconcat
<$ openTag tag
@@ -862,12 +868,12 @@ strikeoutTag = fmap B.strikeout <$> inlineTag "del"
verbatimTag :: PandocMonad m => MuseParser m (F Inlines)
verbatimTag = return . B.text
<$ openTag "verbatim"
- <*> manyTill anyChar (closeTag "verbatim")
+ <*> manyTillChar anyChar (closeTag "verbatim")
-- | Parse @\<class>@ tag.
classTag :: PandocMonad m => MuseParser m (F Inlines)
classTag = do
- classes <- maybe [] words . lookup "name" <$> openTag "class"
+ classes <- maybe [] T.words . lookup "name" <$> openTag "class"
fmap (B.spanWith ("", classes, [])) . mconcat <$> manyTill inline (closeTag "class")
-- | Parse @\<\<\<RTL>>>@ text.
@@ -886,43 +892,43 @@ nbsp = try $ pure (B.str "\160") <$ string "~~"
-- | Parse code markup, indicated by @\'=\'@ characters.
code :: PandocMonad m => MuseParser m (F Inlines)
-code = try $ fmap pure $ B.code . uncurry (++)
+code = try $ fmap pure $ B.code . uncurry (<>)
<$ atStart
<* char '='
<* notFollowedBy (spaceChar <|> newline)
- <*> manyUntil (noneOf "\n\r" <|> (newline <* notFollowedBy newline)) (try $ fmap pure $ noneOf " \t\n\r=" <* char '=')
+ <*> manyUntilChar (noneOf "\n\r" <|> (newline <* notFollowedBy newline)) (try $ fmap T.singleton $ noneOf " \t\n\r=" <* char '=')
<* notFollowedBy alphaNum
-- | Parse @\<code>@ tag.
codeTag :: PandocMonad m => MuseParser m (F Inlines)
codeTag = fmap pure $ B.codeWith
<$> (htmlAttrToPandoc <$> openTag "code")
- <*> manyTill anyChar (closeTag "code")
+ <*> manyTillChar anyChar (closeTag "code")
-- | Parse @\<math>@ tag.
-- @\<math>@ tag is an Emacs Muse extension enabled by @(require 'muse-latex2png)@
mathTag :: PandocMonad m => MuseParser m (F Inlines)
mathTag = return . B.math
<$ openTag "math"
- <*> manyTill anyChar (closeTag "math")
+ <*> manyTillChar anyChar (closeTag "math")
-- | Parse inline @\<literal>@ tag as a raw inline.
inlineLiteralTag :: PandocMonad m => MuseParser m (F Inlines)
inlineLiteralTag = try $ fmap pure $ B.rawInline
<$> (fromMaybe "html" . lookup "style" <$> openTag "literal") -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML
- <*> manyTill anyChar (closeTag "literal")
+ <*> manyTillChar anyChar (closeTag "literal")
str :: PandocMonad m => MuseParser m (F Inlines)
-str = return . B.str <$> many1 alphaNum <* updateLastStrPos
+str = return . B.str <$> many1Char alphaNum <* updateLastStrPos
-- | Consume asterisks that were not used as emphasis opening.
-- This prevents series of asterisks from being split into
-- literal asterisk and emphasis opening.
asterisks :: PandocMonad m => MuseParser m (F Inlines)
-asterisks = pure . B.str <$> many1 (char '*')
+asterisks = pure . B.str <$> many1Char (char '*')
symbol :: PandocMonad m => MuseParser m (F Inlines)
-symbol = pure . B.str . pure <$> nonspaceChar
+symbol = pure . B.str . T.singleton <$> nonspaceChar
-- | Parse a link or image.
linkOrImage :: PandocMonad m => MuseParser m (F Inlines)
@@ -934,12 +940,12 @@ linkContent = trimInlinesF . mconcat
<*> manyTill inline (char ']')
-- | Parse a link starting with (possibly null) prefix
-link :: PandocMonad m => String -> MuseParser m (F Inlines)
+link :: PandocMonad m => Text -> MuseParser m (F Inlines)
link prefix = try $ do
inLink <- asks museInLink
guard $ not inLink
- string $ "[[" ++ prefix
- url <- manyTill anyChar $ char ']'
+ textStr $ "[[" <> prefix
+ url <- manyTillChar anyChar $ char ']'
content <- option (pure $ B.str url) (local (\s -> s { museInLink = True }) linkContent)
char ']'
return $ B.link url "" <$> content
@@ -947,27 +953,27 @@ link prefix = try $ do
image :: PandocMonad m => MuseParser m (F Inlines)
image = try $ do
string "[["
- (url, (ext, width, align)) <- manyUntil (noneOf "]") (imageExtensionAndOptions <* char ']')
+ (url, (ext, width, align)) <- manyUntilChar (noneOf "]") (imageExtensionAndOptions <* char ']')
content <- option mempty linkContent
char ']'
let widthAttr = case align of
- Just 'f' -> [("width", fromMaybe "100" width ++ "%"), ("height", "75%")]
- _ -> maybeToList (("width",) . (++ "%") <$> width)
+ Just 'f' -> [("width", fromMaybe "100" width <> "%"), ("height", "75%")]
+ _ -> maybeToList (("width",) . (<> "%") <$> width)
let alignClass = case align of
Just 'r' -> ["align-right"]
Just 'l' -> ["align-left"]
Just 'f' -> []
_ -> []
- return $ B.imageWith ("", alignClass, widthAttr) (url ++ ext) mempty <$> content
+ return $ B.imageWith ("", alignClass, widthAttr) (url <> ext) mempty <$> content
where -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el
imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"]
- imageExtension = choice (try . string <$> imageExtensions)
+ imageExtension = choice (try . textStr <$> imageExtensions)
imageExtensionAndOptions = do
ext <- imageExtension
(width, align) <- option (Nothing, Nothing) imageAttrs
return (ext, width, align)
imageAttrs = (,)
<$ many1 spaceChar
- <*> optionMaybe (many1 digit)
+ <*> optionMaybe (many1Char digit)
<* many spaceChar
<*> optionMaybe (oneOf "rlf")