diff options
| -rw-r--r-- | src/Text/Pandoc/Readers/Muse.hs | 273 | ||||
| -rw-r--r-- | test/Tests/Readers/Muse.hs | 41 | 
2 files changed, 235 insertions, 79 deletions
| diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index 1385533b3..c8ebe1883 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -47,7 +47,7 @@ import Data.List (stripPrefix, intercalate)  import Data.List.Split (splitOn)  import qualified Data.Map as M  import qualified Data.Set as Set -import Data.Maybe (fromMaybe) +import Data.Maybe (fromMaybe, isNothing)  import Data.Text (Text, unpack)  import System.FilePath (takeExtension)  import Text.HTML.TagSoup @@ -82,6 +82,7 @@ data MuseState = MuseState { museMeta :: F Meta -- ^ Document metadata                             , museLogMessages :: [LogMessage]                             , museNotes :: M.Map String (SourcePos, F Blocks)                             , museInLink :: Bool +                           , museInPara :: Bool                             }  instance Default MuseState where @@ -96,6 +97,7 @@ defaultMuseState = MuseState { museMeta = return nullMeta                               , museLogMessages = []                               , museNotes = M.empty                               , museInLink = False +                             , museInPara = False                               }  type MuseParser = ParserT String MuseState @@ -149,6 +151,12 @@ htmlElement tag = try $ do    where      endtag = void $ htmlTag (~== TagClose tag) +htmlBlock :: PandocMonad m => String -> MuseParser m (Attr, String) +htmlBlock tag = try $ do +  res <- htmlElement tag +  manyTill spaceChar eol +  return res +  htmlAttrToPandoc :: [Attribute String] -> Attr  htmlAttrToPandoc attrs = (ident, classes, keyvals)    where @@ -159,13 +167,13 @@ htmlAttrToPandoc attrs = (ident, classes, keyvals)  parseHtmlContent :: PandocMonad m                   => String -> MuseParser m (Attr, F Blocks)  parseHtmlContent tag = do -  (attr, content) <- htmlElement tag -  parsedContent <- parseContent (content ++ "\n") +  (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag []) +  manyTill spaceChar eol +  content <- parseBlocksTill (manyTill spaceChar endtag)    manyTill spaceChar eol -- closing tag must be followed by optional whitespace and newline -  return (attr, mconcat parsedContent) +  return (htmlAttrToPandoc attr, content)    where -    parseContent = parseFromString $ manyTill parseBlock endOfContent -    endOfContent = try $ skipMany blankline >> skipSpaces >> eof +    endtag = void $ htmlTag (~== TagClose tag)  commonPrefix :: String -> String -> String  commonPrefix _ [] = [] @@ -248,19 +256,85 @@ directive = do  parseBlocks :: PandocMonad m              => MuseParser m (F Blocks)  parseBlocks = -  try (mempty <$ eof) <|> +  try parseEnd <|>    try blockStart <|> +  try listStart <|>    try paraStart    where +    parseEnd = mempty <$ eof      blockStart = do first <- header <|> blockElements <|> amuseNoteBlock <|> emacsNoteBlock                      rest <- parseBlocks                      return $ first B.<> rest +    listStart = do +      st <- getState +      setState $ st{ museInPara = False } +      (first, rest) <- anyListUntil parseBlocks +      return $ first B.<> rest      paraStart = do        indent <- length <$> many spaceChar -      (first, rest) <- paraUntil ((mempty <$ eof) <|> blockStart) +      (first, rest) <- paraUntil parseBlocks        let first' = if indent >= 2 && indent < 6 then B.blockQuote <$> first else first        return $ first' B.<> rest +parseBlocksTill :: PandocMonad m +                => MuseParser m a +                -> MuseParser m (F Blocks) +parseBlocksTill end = +  try parseEnd <|> +  try blockStart <|> +  try listStart <|> +  try paraStart +  where +    parseEnd = mempty <$ end +    blockStart = do first <- blockElements +                    rest <- continuation +                    return $ first B.<> rest +    listStart = do +      st <- getState +      setState $ st{ museInPara = False } +      (first, e) <- anyListUntil ((Left <$> end) <|> (Right <$> continuation)) +      case e of +        Left _ -> return first +        Right rest -> return $ first B.<> rest +    paraStart = do (first, e) <- paraUntil ((Left <$> end) <|> (Right <$> continuation)) +                   case e of +                     Left _ -> return $ first +                     Right rest -> return $ first B.<> rest +    continuation = parseBlocksTill end + +listItemContentsUntil :: PandocMonad m +                      => Int +                      -> MuseParser m a +                      -> MuseParser m (F Blocks, a) +listItemContentsUntil col end = +  try blockStart <|> +  try listStart <|> +  try paraStart +  where +    parseEnd = do e <- end +                  return (mempty, e) +    paraStart = do +      (first, e) <- paraUntil ((Right <$> continuation) <|> (Left <$> end)) +      case e of +        Left ee -> return (first, ee) +        Right (rest, ee) -> return (first B.<> rest, ee) +    blockStart = do first <- blockElements +                    (rest, e) <- continuation <|> parseEnd +                    return (first B.<> rest, e) +    listStart = do +      st <- getState +      setState $ st{ museInPara = False } +      (first, e) <- anyListUntil ((Right <$> continuation) <|> (Left <$> end)) +      case e of +        Left ee -> return (first, ee) +        Right (rest, ee) -> return $ (first B.<> rest, ee) +    continuation = try $ do blank <- optionMaybe blankline +                            skipMany blankline +                            indentWith col +                            st <- getState +                            setState $ st{ museInPara = museInPara st && isNothing blank } +                            listItemContentsUntil col end +  parseBlock :: PandocMonad m => MuseParser m (F Blocks)  parseBlock = do    res <- blockElements <|> para @@ -269,24 +343,24 @@ parseBlock = do    where para = fst <$> paraUntil (try (eof <|> void (lookAhead blockElements)))  blockElements :: PandocMonad m => MuseParser m (F Blocks) -blockElements = choice [ mempty <$ blankline -                       , comment -                       , separator -                       , example -                       , exampleTag -                       , literalTag -                       , centerTag -                       , rightTag -                       , quoteTag -                       , divTag -                       , verseTag -                       , lineBlock -                       , bulletList -                       , orderedList -                       , definitionList -                       , table -                       , commentTag -                       ] +blockElements = do +  st <- getState +  setState $ st{ museInPara = False } +  choice [ mempty <$ blankline +         , comment +         , separator +         , example +         , exampleTag +         , literalTag +         , centerTag +         , rightTag +         , quoteTag +         , divTag +         , verseTag +         , lineBlock +         , table +         , commentTag +         ]  comment :: PandocMonad m => MuseParser m (F Blocks)  comment = try $ do @@ -343,13 +417,13 @@ dropSpacePrefix lns =  exampleTag :: PandocMonad m => MuseParser m (F Blocks)  exampleTag = try $ do    many spaceChar -  (attr, contents) <- htmlElement "example" +  (attr, contents) <- htmlBlock "example"    return $ return $ B.codeBlockWith attr $ rchop $ intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop contents  literalTag :: PandocMonad m => MuseParser m (F Blocks)  literalTag = do    guardDisabled Ext_amuse -- Text::Amuse does not support <literal> -  (return . rawBlock) <$> htmlElement "literal" +  (return . rawBlock) <$> htmlBlock "literal"    where      -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML      format (_, _, kvs)        = fromMaybe "html" $ lookup "style" kvs @@ -385,18 +459,22 @@ verseLines = do  verseTag :: PandocMonad m => MuseParser m (F Blocks)  verseTag = do -  (_, content) <- htmlElement "verse" +  (_, content) <- htmlBlock "verse"    parseFromString verseLines (intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop content)  commentTag :: PandocMonad m => MuseParser m (F Blocks) -commentTag = htmlElement "comment" >> return mempty +commentTag = htmlBlock "comment" >> return mempty  -- Indented paragraph is either center, right or quote  paraUntil :: PandocMonad m            => MuseParser m a            -> MuseParser m (F Blocks, a)  paraUntil end = do +  state <- getState +  guard $ not $ museInPara state +  setState $ state{ museInPara = True }    (l, e) <- someUntil inline $ try (manyTill spaceChar eol >> end) +  updateState (\st -> st { museInPara = False })    return (fmap (B.para) $ trimInlinesF $ mconcat l, e)  noteMarker :: PandocMonad m => MuseParser m String @@ -413,6 +491,8 @@ amuseNoteBlock = try $ do    guardEnabled Ext_amuse    pos <- getPosition    ref <- noteMarker <* spaceChar +  st <- getState +  setState $ st{ museInPara = False }    content <- listItemContents    oldnotes <- museNotes <$> getState    case M.lookup ref oldnotes of @@ -465,35 +545,36 @@ lineBlock = try $ do  -- lists  -- -listItemContents' :: PandocMonad m => Int -> MuseParser m (F Blocks) -listItemContents' col = -  mconcat <$> parseBlock `sepBy1` try (skipMany blankline >> indentWith col) +bulletListItemsUntil :: PandocMonad m +                     => Int +                     -> MuseParser m a +                     -> MuseParser m ([F Blocks], a) +bulletListItemsUntil indent end = try $ do +  char '-' +  void spaceChar <|> lookAhead eol +  st <- getState +  setState $ st{ museInPara = False } +  (x, e) <- listItemContentsUntil (indent + 2) ((Right <$> try (optional blankline >> indentWith indent >> bulletListItemsUntil indent end)) <|> (Left <$> end)) +  case e of +    Left ee -> return ([x], ee) +    Right (xs, ee) -> return (x:xs, ee) + +bulletListUntil :: PandocMonad m +                => MuseParser m a +                -> MuseParser m (F Blocks, a) +bulletListUntil end = try $ do +  many spaceChar +  pos <- getPosition +  let indent = sourceColumn pos - 1 +  guard $ indent /= 0 +  (items, e) <- bulletListItemsUntil indent end +  return $ (B.bulletList <$> sequence items, e)  listItemContents :: PandocMonad m => MuseParser m (F Blocks)  listItemContents = do    pos <- getPosition    let col = sourceColumn pos - 1 -  listItemContents' col - -listItem :: PandocMonad m => Int -> MuseParser m a -> MuseParser m (F Blocks) -listItem n p = try $ do -  optional blankline -  count n spaceChar -  p -  void spaceChar <|> lookAhead eol -  listItemContents - -bulletList :: PandocMonad m => MuseParser m (F Blocks) -bulletList = try $ do -  many spaceChar -  pos <- getPosition -  let col = sourceColumn pos -  guard $ col /= 1 -  char '-' -  void spaceChar <|> lookAhead eol -  first <- listItemContents -  rest <- many $ listItem (col - 1) (char '-') -  return $ B.bulletList <$> sequence (first : rest) +  mconcat <$> parseBlock `sepBy1` try (skipMany blankline >> indentWith col)  -- | Parses an ordered list marker and returns list attributes.  anyMuseOrderedListMarker :: PandocMonad m => MuseParser m ListAttributes @@ -516,38 +597,74 @@ museOrderedListMarker style = do    char '.'    return start -orderedList :: PandocMonad m => MuseParser m (F Blocks) -orderedList = try $ do +orderedListItemsUntil :: PandocMonad m +                      => Int +                      -> ListNumberStyle +                      -> MuseParser m a +                      -> MuseParser m ([F Blocks], a) +orderedListItemsUntil indent style end = +  continuation +  where +    continuation = try $ do +      pos <- getPosition +      void spaceChar <|> lookAhead eol +      st <- getState +      setState $ st{ museInPara = False } +      (x, e) <- listItemContentsUntil (sourceColumn pos) ((Right <$> try (optionMaybe blankline >> indentWith indent >> museOrderedListMarker style >> continuation)) <|> (Left <$> end)) +      case e of +        Left ee -> return ([x], ee) +        Right (xs, ee) -> return (x:xs, ee) + +orderedListUntil :: PandocMonad m +                 => MuseParser m a +                 -> MuseParser m (F Blocks, a) +orderedListUntil end = try $ do    many spaceChar    pos <- getPosition -  let col = sourceColumn pos -  guard $ col /= 1 +  let indent = sourceColumn pos - 1 +  guard $ indent /= 0    p@(_, style, _) <- anyMuseOrderedListMarker    guard $ style `elem` [Decimal, LowerAlpha, UpperAlpha, LowerRoman, UpperRoman] -  void spaceChar <|> lookAhead eol -  first <- listItemContents -  rest <- many $ listItem (col - 1) (museOrderedListMarker style) -  return $ B.orderedListWith p <$> sequence (first : rest) - -definitionListItem :: PandocMonad m => MuseParser m (F (Inlines, [Blocks])) -definitionListItem = try $ do -  pos <- getPosition -  term <- trimInlinesF . mconcat <$> manyTill (choice inlineList) (string "::") -  void spaceChar <|> lookAhead eol -  contents <- listItemContents' $ sourceColumn pos -  pure $ do lineContent' <- contents -            term' <- term -            pure (term', [lineContent']) - -definitionList :: PandocMonad m => MuseParser m (F Blocks) -definitionList = try $ do +  (items, e) <- orderedListItemsUntil indent style end +  return $ (B.orderedListWith p <$> sequence items, e) + +definitionListItemsUntil :: PandocMonad m +                         => Int +                         -> MuseParser m a +                         -> MuseParser m ([F (Inlines, [Blocks])], a) +definitionListItemsUntil indent end = +  continuation +  where continuation = try $ do +          pos <- getPosition +          term <- trimInlinesF . mconcat <$> manyTill (choice inlineList) (string "::") +          void spaceChar <|> lookAhead eol +          st <- getState +          setState $ st{ museInPara = False } +          (x, e) <- listItemContentsUntil (sourceColumn pos) ((Right <$> try (optional blankline >> count indent spaceChar >> continuation)) <|> (Left <$> end)) +          let xx = do +                term' <- term +                x' <- x +                (return (term', [x']))::(F (Inlines, [Blocks])) +          case e of +            Left ee -> return $ ([xx], ee) +            Right (xs, ee) -> return $ (xx : xs, ee) + +definitionListUntil :: PandocMonad m +                    => MuseParser m a +                    -> MuseParser m (F Blocks, a) +definitionListUntil end = try $ do    many spaceChar    pos <- getPosition    let indent = sourceColumn pos - 1    guardDisabled Ext_amuse <|> guard (indent /= 0) -- Initial space is required by Amusewiki, but not Emacs Muse -  first <- definitionListItem -  rest <- many $ try (optional blankline >> count indent spaceChar >> definitionListItem) -  return $ B.definitionList <$> sequence (first : rest) +  (items, e) <- definitionListItemsUntil indent end +  return (B.definitionList <$> sequence items, e) + +anyListUntil :: PandocMonad m +             => MuseParser m a +             -> MuseParser m (F Blocks, a) +anyListUntil end = +  bulletListUntil end <|> orderedListUntil end <|> definitionListUntil end  --  -- tables diff --git a/test/Tests/Readers/Muse.hs b/test/Tests/Readers/Muse.hs index 60059df77..967a63ac9 100644 --- a/test/Tests/Readers/Muse.hs +++ b/test/Tests/Readers/Muse.hs @@ -313,6 +313,16 @@ tests =                    , "</quote>"                    ]          =?> blockQuote (para $ text "Hello, world") +      , "Nested quote tag" =: +        T.unlines [ "<quote>" +                  , "foo" +                  , "<quote>" +                  , "bar" +                  , "</quote>" +                  , "baz" +                  , "</quote>" +                  ] =?> +        blockQuote (para "foo" <> blockQuote (para "bar") <> para "baz")        , "Verse tag" =:          T.unlines [ "<verse>"                    , "" @@ -514,6 +524,12 @@ tests =                      ] =?>            header 2 "Foo" <>            para (spanWith ("bar", [], []) mempty) +        , "Headers terminate lists" =: +          T.unlines [ " - foo" +                    , "* bar" +                    ] =?> +          bulletList [ para "foo" ] <> +          header 1 "bar"          ]        , testGroup "Directives"          [ "Title" =: @@ -846,6 +862,15 @@ tests =                                , para "c"                                ]                      ] +      , "List continuation afeter nested list" =: +         T.unlines +           [ " - - foo" +           , "" +           , "   bar" +           ] =?> +         bulletList [ bulletList [ para "foo" ] <> +                      para "bar" +                    ]        -- Emacs Muse allows to separate lists with two or more blank lines.        -- Text::Amuse (Amusewiki engine) always creates a single list as of version 0.82.        -- pandoc follows Emacs Muse behavior @@ -1087,7 +1112,21 @@ tests =                               , para "* Bar"                               ]                     ] -      , "List inside a tag" =: +      , "Bullet list inside a tag" =: +        T.unlines +          [ "<quote>" +          , " - First" +          , "" +          , " - Second" +          , "" +          , " - Third" +          , "</quote>" +          ] =?> +        blockQuote (bulletList [ para "First" +                               , para "Second" +                               , para "Third" +                               ]) +      , "Ordered list inside a tag" =:          T.unlines            [ "<quote>"            , " 1. First" | 
