diff options
Diffstat (limited to 'src/Text/Pandoc/Readers')
| -rw-r--r-- | src/Text/Pandoc/Readers/DocBook.hs | 89 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx.hs | 6 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx/Combine.hs | 4 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/EPUB.hs | 32 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 6 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/JATS.hs | 30 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 89 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 37 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Muse.hs | 405 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/OPML.hs | 30 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 21 |
11 files changed, 513 insertions, 236 deletions
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 6bc4584c2..3d48c7ee8 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -1,4 +1,33 @@ {-# LANGUAGE NoImplicitPrelude #-} +{- +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.DocBook + Copyright : Copyright (C) 2006-2018 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of DocBook XML to 'Pandoc' document. +-} module Text.Pandoc.Readers.DocBook ( readDocBook ) where import Prelude import Control.Monad.State.Strict @@ -236,7 +265,7 @@ List of all DocBook tags, with [x] indicating implemented, [ ] manvolnum - A reference volume number [x] markup - A string of formatting markup in text that is to be represented literally -[ ] mathphrase - A mathematical phrase, an expression that can be represented +[x] mathphrase - A mathematical phrase, an expression that can be represented with ordinary text and a small amount of markup [ ] medialabel - A name that identifies the physical medium on which some information resides @@ -698,6 +727,8 @@ parseBlock (Elem e) = "bibliodiv" -> sect 1 "biblioentry" -> parseMixed para (elContent e) "bibliomixed" -> parseMixed para (elContent e) + "equation" -> para <$> equation e displayMath + "informalequation" -> para <$> equation e displayMath "glosssee" -> para . (\ils -> text "See " <> ils <> str ".") <$> getInlines e "glossseealso" -> para . (\ils -> text "See also " <> ils <> str ".") @@ -924,9 +955,9 @@ parseInline (CRef ref) = return $ maybe (text $ map toUpper ref) text $ lookupEntity ref parseInline (Elem e) = case qName (elName e) of - "equation" -> equation displayMath - "informalequation" -> equation displayMath - "inlineequation" -> equation math + "equation" -> equation e displayMath + "informalequation" -> equation e displayMath + "inlineequation" -> equation e math "subscript" -> subscript <$> innerInlines "superscript" -> superscript <$> innerInlines "inlinemediaobject" -> getMediaobject e @@ -1005,13 +1036,6 @@ parseInline (Elem e) = _ -> innerInlines where innerInlines = (trimInlines . mconcat) <$> mapM parseInline (elContent e) - equation constructor = return $ mconcat $ - map (constructor . writeTeX) - $ rights - $ map (readMathML . showElement . everywhere (mkT removePrefix)) - $ filterChildren (\x -> qName (elName x) == "math" && - qPrefix (elName x) == Just "mml") e - removePrefix elname = elname { qPrefix = Nothing } codeWithLang = do let classes' = case attrValue "language" e of "" -> [] @@ -1049,6 +1073,7 @@ parseInline (Elem e) = | not (null xrefLabel) = xrefLabel | otherwise = case qName (elName el) of "chapter" -> descendantContent "title" el + "section" -> descendantContent "title" el "sect1" -> descendantContent "title" el "sect2" -> descendantContent "title" el "sect3" -> descendantContent "title" el @@ -1061,3 +1086,45 @@ parseInline (Elem e) = xrefLabel = attrValue "xreflabel" el descendantContent name = maybe "???" strContent . filterElementName (\n -> qName n == name) + +-- | Extract a math equation from an element +-- +-- asciidoc can generate Latex math in CDATA sections. +-- +-- Note that if some MathML can't be parsed it is silently ignored! +equation + :: Monad m + => Element + -- ^ The element from which to extract a mathematical equation + -> (String -> Inlines) + -- ^ A constructor for some Inlines, taking the TeX code as input + -> m Inlines +equation e constructor = + return $ mconcat $ map constructor $ mathMLEquations ++ latexEquations + where + mathMLEquations :: [String] + mathMLEquations = map writeTeX $ rights $ readMath + (\x -> qName (elName x) == "math" && qPrefix (elName x) == Just "mml") + (readMathML . showElement) + + latexEquations :: [String] + latexEquations = readMath (\x -> qName (elName x) == "mathphrase") + (concat . fmap showVerbatimCData . elContent) + + readMath :: (Element -> Bool) -> (Element -> b) -> [b] + readMath childPredicate fromElement = + ( map (fromElement . everywhere (mkT removePrefix)) + $ filterChildren childPredicate e + ) + +-- | Get the actual text stored in a verbatim CData block. 'showContent' +-- returns the text still surrounded by the [[CDATA]] tags. +-- +-- Returns 'showContent' if this is not a verbatim CData +showVerbatimCData :: Content -> String +showVerbatimCData (Text (CData CDataVerbatim d _)) = d +showVerbatimCData c = showContent c + +-- | Set the prefix of a name to 'Nothing' +removePrefix :: QName -> QName +removePrefix elname = elname { qPrefix = Nothing } diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 00603603a..ca9f8c8dd 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -688,6 +688,10 @@ bodyPartToBlocks (Tbl cap _ look parts@(r:rs)) = do rowLength :: Row -> Int rowLength (Row c) = length c + -- pad cells. New Text.Pandoc.Builder will do that for us, + -- so this is for compatibility while we switch over. + let cells' = map (\row -> take width (row ++ repeat mempty)) cells + hdrCells <- case hdr of Just r' -> rowToBlocksList r' Nothing -> return $ replicate width mempty @@ -700,7 +704,7 @@ bodyPartToBlocks (Tbl cap _ look parts@(r:rs)) = do let alignments = replicate width AlignDefault widths = replicate width 0 :: [Double] - return $ table caption (zip alignments widths) hdrCells cells + return $ table caption (zip alignments widths) hdrCells cells' bodyPartToBlocks (OMathPara e) = return $ para $ displayMath (writeTeX e) diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs index dfd2b5666..108c4bbe5 100644 --- a/src/Text/Pandoc/Readers/Docx/Combine.hs +++ b/src/Text/Pandoc/Readers/Docx/Combine.hs @@ -135,6 +135,10 @@ combineBlocks bs cs | bs' :> BlockQuote bs'' <- viewr (unMany bs) , BlockQuote cs'' :< cs' <- viewl (unMany cs) = Many $ (bs' |> BlockQuote (bs'' <> cs'')) >< cs' + | bs' :> CodeBlock attr codeStr <- viewr (unMany bs) + , CodeBlock attr' codeStr' :< cs' <- viewl (unMany cs) + , attr == attr' = + Many $ (bs' |> CodeBlock attr (codeStr <> "\n" <> codeStr')) >< cs' combineBlocks bs cs = bs <> cs instance (Monoid a, Eq a) => Eq (Modifier a) where diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index 714468a8a..c26447641 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -1,7 +1,35 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE ViewPatterns #-} +{- +Copyright (C) 2014-2018 Matthew Pickering +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.EPUB + Copyright : Copyright (C) 2014-2018 Matthew Pickering + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of EPUB to 'Pandoc' document. +-} module Text.Pandoc.Readers.EPUB (readEPUB) @@ -93,7 +121,7 @@ fetchImages mimes root arc (query iq -> links) = mapM_ (uncurry3 insertMedia) (mapMaybe getEntry links) where getEntry link = - let abslink = normalise (root </> link) in + let abslink = normalise (unEscapeString (root </> link)) in (link , lookup link mimes, ) . fromEntry <$> findEntryByPath abslink arc @@ -264,7 +292,7 @@ findAttrE :: PandocMonad m => QName -> Element -> m String findAttrE q e = mkE "findAttr" $ findAttr q e findEntryByPathE :: PandocMonad m => FilePath -> Archive -> m Entry -findEntryByPathE (normalise -> path) a = +findEntryByPathE (normalise . unEscapeString -> path) a = mkE ("No entry on path: " ++ path) $ findEntryByPath path a parseXMLDocE :: PandocMonad m => String -> m Element diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index b221b6fb2..32a1ba5a6 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -510,14 +510,16 @@ pTable = try $ do [Plain _] -> True _ -> False let isSimple = all isSinglePlain $ concat (head':rows''') - let cols = length $ if null head' then head rows''' else head' + let cols = if null head' + then maximum (map length rows''') + else length head' -- add empty cells to short rows let addEmpties r = case cols - length r of n | n > 0 -> r <> replicate n mempty | otherwise -> r let rows = map addEmpties rows''' let aligns = case rows'' of - (cs:_) -> map fst cs + (cs:_) -> take cols $ map fst cs ++ repeat AlignDefault _ -> replicate cols AlignDefault let widths = if null widths' then if isSimple diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs index 29f23137c..59af76d23 100644 --- a/src/Text/Pandoc/Readers/JATS.hs +++ b/src/Text/Pandoc/Readers/JATS.hs @@ -1,5 +1,35 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE TupleSections #-} +{- +Copyright (C) 2017-2018 Hamish Mackenzie + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.JATS + Copyright : Copyright (C) 2017-2018 Hamish Mackenzie + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of JATS XML to 'Pandoc' document. +-} + module Text.Pandoc.Readers.JATS ( readJATS ) where import Prelude import Control.Monad.State.Strict diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 52782653e..041b552dc 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -48,7 +48,7 @@ import Control.Applicative (many, optional, (<|>)) import Control.Monad import Control.Monad.Except (throwError) import Control.Monad.Trans (lift) -import Data.Char (chr, isAlphaNum, isDigit, isLetter, ord, toLower) +import Data.Char (chr, isAlphaNum, isDigit, isLetter, ord, toLower, toUpper) import Data.Default import Data.List (intercalate, isPrefixOf) import qualified Data.Map as M @@ -164,6 +164,7 @@ data LaTeXState = LaTeXState{ sOptions :: ReaderOptions , sInTableCell :: Bool , sLastHeaderNum :: HeaderNum , sLabels :: M.Map String [Inline] + , sHasChapters :: Bool , sToggles :: M.Map String Bool } deriving Show @@ -183,6 +184,7 @@ defaultLaTeXState = LaTeXState{ sOptions = def , sInTableCell = False , sLastHeaderNum = HeaderNum [] , sLabels = M.empty + , sHasChapters = False , sToggles = M.empty } @@ -240,21 +242,30 @@ withVerbatimMode parser = do return result rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => LP m a -> ParserT String s m (a, String) -rawLaTeXParser parser = do + => LP m a -> LP m a -> ParserT String s m (a, String) +rawLaTeXParser parser valParser = do inp <- getInput let toks = tokenize "source" $ T.pack inp pstate <- getState - let lstate = def{ sOptions = extractReaderOptions pstate - , sMacros = extractMacros pstate } - let rawparser = (,) <$> withRaw parser <*> getState - res <- lift $ runParserT rawparser lstate "chunk" toks - case res of + let lstate = def{ sOptions = extractReaderOptions pstate } + let lstate' = lstate { sMacros = extractMacros pstate } + let rawparser = (,) <$> withRaw valParser <*> getState + res' <- lift $ runParserT (snd <$> withRaw parser) lstate "chunk" toks + case res' of Left _ -> mzero - Right ((val, raw), st) -> do - updateState (updateMacros (sMacros st <>)) - rawstring <- takeP (T.length (untokenize raw)) - return (val, rawstring) + Right toks' -> do + res <- lift $ runParserT (do doMacros 0 + -- retokenize, applying macros + ts <- many (satisfyTok (const True)) + setInput ts + rawparser) + lstate' "chunk" toks' + case res of + Left _ -> mzero + Right ((val, raw), st) -> do + updateState (updateMacros (sMacros st <>)) + _ <- takeP (T.length (untokenize toks')) + return (val, T.unpack (untokenize raw)) applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s) => String -> ParserT String s m String @@ -275,19 +286,18 @@ rawLaTeXBlock = do lookAhead (try (char '\\' >> letter)) -- we don't want to apply newly defined latex macros to their own -- definitions: - snd <$> rawLaTeXParser macroDef - <|> ((snd <$> rawLaTeXParser (environment <|> blockCommand)) >>= applyMacros) + snd <$> rawLaTeXParser (environment <|> macroDef <|> blockCommand) blocks rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s) => ParserT String s m String rawLaTeXInline = do lookAhead (try (char '\\' >> letter)) - rawLaTeXParser (inlineEnvironment <|> inlineCommand') >>= applyMacros . snd + snd <$> rawLaTeXParser (inlineEnvironment <|> inlineCommand') inlines inlineCommand :: PandocMonad m => ParserT String ParserState m Inlines inlineCommand = do lookAhead (try (char '\\' >> letter)) - fst <$> rawLaTeXParser (inlineEnvironment <|> inlineCommand') + fst <$> rawLaTeXParser (inlineEnvironment <|> inlineCommand') inlines tokenize :: SourceName -> Text -> [Tok] tokenize sourcename = totoks (initialPos sourcename) @@ -1313,6 +1323,12 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList , ("slshape", extractSpaces emph <$> inlines) , ("scshape", extractSpaces smallcaps <$> inlines) , ("bfseries", extractSpaces strong <$> inlines) + , ("MakeUppercase", makeUppercase <$> tok) + , ("MakeTextUppercase", makeUppercase <$> tok) -- textcase + , ("uppercase", makeUppercase <$> tok) + , ("MakeLowercase", makeLowercase <$> tok) + , ("MakeTextLowercase", makeLowercase <$> tok) + , ("lowercase", makeLowercase <$> tok) , ("/", pure mempty) -- italic correction , ("aa", lit "å") , ("AA", lit "Å") @@ -1513,6 +1529,16 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList , ("foreignlanguage", foreignlanguage) ] +makeUppercase :: Inlines -> Inlines +makeUppercase = fromList . walk (alterStr (map toUpper)) . toList + +makeLowercase :: Inlines -> Inlines +makeLowercase = fromList . walk (alterStr (map toLower)) . toList + +alterStr :: (String -> String) -> Inline -> Inline +alterStr f (Str xs) = Str (f xs) +alterStr _ x = x + foreignlanguage :: PandocMonad m => LP m Inlines foreignlanguage = do babelLang <- T.unpack . untokenize <$> braced @@ -1685,6 +1711,9 @@ treatAsBlock = Set.fromList , "clearpage" , "pagebreak" , "titleformat" + , "listoffigures" + , "listoftables" + , "write" ] isInlineCommand :: Text -> Bool @@ -1984,9 +2013,13 @@ section starred (ident, classes, kvs) lvl = do try (spaces >> controlSeq "label" >> spaces >> toksToString <$> braced) let classes' = if starred then "unnumbered" : classes else classes + when (lvl == 0) $ + updateState $ \st -> st{ sHasChapters = True } unless starred $ do hn <- sLastHeaderNum <$> getState - let num = incrementHeaderNum lvl hn + hasChapters <- sHasChapters <$> getState + let lvl' = lvl + if hasChapters then 1 else 0 + let num = incrementHeaderNum lvl' hn updateState $ \st -> st{ sLastHeaderNum = num } updateState $ \st -> st{ sLabels = M.insert lab [Str (renderHeaderNum num)] @@ -2143,19 +2176,6 @@ environments = M.fromList codeBlockWith attr <$> verbEnv "lstlisting") , ("minted", minted) , ("obeylines", obeylines) - , ("displaymath", mathEnvWith para Nothing "displaymath") - , ("equation", mathEnvWith para Nothing "equation") - , ("equation*", mathEnvWith para Nothing "equation*") - , ("gather", mathEnvWith para (Just "gathered") "gather") - , ("gather*", mathEnvWith para (Just "gathered") "gather*") - , ("multline", mathEnvWith para (Just "gathered") "multline") - , ("multline*", mathEnvWith para (Just "gathered") "multline*") - , ("eqnarray", mathEnvWith para (Just "aligned") "eqnarray") - , ("eqnarray*", mathEnvWith para (Just "aligned") "eqnarray*") - , ("align", mathEnvWith para (Just "aligned") "align") - , ("align*", mathEnvWith para (Just "aligned") "align*") - , ("alignat", mathEnvWith para (Just "aligned") "alignat") - , ("alignat*", mathEnvWith para (Just "aligned") "alignat*") , ("tikzpicture", rawVerbEnv "tikzpicture") -- etoolbox , ("ifstrequal", ifstrequal) @@ -2166,11 +2186,14 @@ environments = M.fromList ] environment :: PandocMonad m => LP m Blocks -environment = do +environment = try $ do controlSeq "begin" name <- untokenize <$> braced - M.findWithDefault mzero name environments - <|> rawEnv name + M.findWithDefault mzero name environments <|> + if M.member name (inlineEnvironments + :: M.Map Text (LP PandocPure Inlines)) + then mzero + else rawEnv name env :: PandocMonad m => Text -> LP m a -> LP m a env name p = p <* end_ name diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 71e6f8249..156b2b622 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -674,6 +674,8 @@ keyValAttr = try $ do char '=' val <- enclosed (char '"') (char '"') litChar <|> enclosed (char '\'') (char '\'') litChar + <|> ("" <$ try (string "\"\"")) + <|> ("" <$ try (string "''")) <|> many (escapedChar' <|> noneOf " \t\n\r}") return $ \(id',cs,kvs) -> case key of @@ -910,6 +912,17 @@ listContinuation continuationIndent = try $ do blanks <- many blankline return $ concat (x:xs) ++ blanks +-- Variant of blanklines that doesn't require blank lines +-- before a fence or eof. +blanklines' :: PandocMonad m => MarkdownParser m [Char] +blanklines' = blanklines <|> try checkDivCloser + where checkDivCloser = do + guardEnabled Ext_fenced_divs + divLevel <- stateFencedDivLevel <$> getState + guard (divLevel >= 1) + lookAhead divFenceEnd + return "" + notFollowedByDivCloser :: PandocMonad m => MarkdownParser m () notFollowedByDivCloser = guardDisabled Ext_fenced_divs <|> @@ -1251,7 +1264,7 @@ alignType strLst len = -- Parse a table footer - dashed lines followed by blank line. tableFooter :: PandocMonad m => MarkdownParser m String -tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines +tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines' -- Parse a table separator - dashed line. tableSep :: PandocMonad m => MarkdownParser m Char @@ -1262,7 +1275,7 @@ rawTableLine :: PandocMonad m => [Int] -> MarkdownParser m [String] rawTableLine indices = do - notFollowedBy' (blanklines <|> tableFooter) + notFollowedBy' (blanklines' <|> tableFooter) line <- many1Till anyChar newline return $ map trim $ tail $ splitStringByIndices (init indices) line @@ -1300,7 +1313,7 @@ simpleTable headless = do (aligns, _widths, heads', lines') <- tableWith (simpleTableHeader headless) tableLine (return ()) - (if headless then tableFooter else tableFooter <|> blanklines) + (if headless then tableFooter else tableFooter <|> blanklines') -- Simple tables get 0s for relative column widths (i.e., use default) return (aligns, replicate (length aligns) 0, heads', lines') @@ -1328,11 +1341,16 @@ multilineTableHeader headless = try $ do newline let (lengths, lines') = unzip dashes let indices = scanl (+) (length initSp) lines' + -- compensate for the fact that intercolumn spaces are + -- not included in the last index: + let indices' = case reverse indices of + [] -> [] + (x:xs) -> reverse (x+1:xs) rawHeadsList <- if headless then fmap (map (:[]) . tail . - splitStringByIndices (init indices)) $ lookAhead anyLine + splitStringByIndices (init indices')) $ lookAhead anyLine else return $ transpose $ map - (tail . splitStringByIndices (init indices)) + (tail . splitStringByIndices (init indices')) rawContent let aligns = zipWith alignType rawHeadsList lengths let rawHeads = if headless @@ -1340,7 +1358,7 @@ multilineTableHeader headless = try $ do else map (unlines . map trim) rawHeadsList heads <- fmap sequence $ mapM ((parseFromString' (mconcat <$> many plain)).trim) rawHeads - return (heads, aligns, indices) + return (heads, aligns, indices') -- Parse a grid table: starts with row of '-' on top, then header -- (which may be grid), then the rows, @@ -2146,7 +2164,6 @@ singleQuoted = try $ do doubleQuoted :: PandocMonad m => MarkdownParser m (F Inlines) doubleQuoted = try $ do doubleQuoteStart - contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) - withQuoteContext InDoubleQuote (doubleQuoteEnd >> return - (fmap B.doubleQuoted . trimInlinesF $ contents)) - <|> return (return (B.str "\8220") <> contents) + withQuoteContext InDoubleQuote $ + fmap B.doubleQuoted . trimInlinesF . mconcat <$> + many1Till inline doubleQuoteEnd diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index 30475d91e..fe6b3698c 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -36,8 +36,7 @@ TODO: - Org tables - table.el tables - Images with attributes (floating and width) -- Citations and <biblio> -- <play> environment +- <cite> tag -} module Text.Pandoc.Readers.Muse (readMuse) where @@ -85,24 +84,21 @@ data MuseState = MuseState { museMeta :: F Meta -- ^ Document metadata , museLastStrPos :: Maybe SourcePos -- ^ Position after last str parsed , museLogMessages :: [LogMessage] , museNotes :: M.Map String (SourcePos, F Blocks) - , museInLink :: Bool - , museInPara :: Bool + , museInLink :: Bool -- ^ True when parsing a link description to avoid nested links + , museInPara :: Bool -- ^ True when looking for a paragraph terminator } instance Default MuseState where - def = defaultMuseState - -defaultMuseState :: MuseState -defaultMuseState = MuseState { museMeta = return nullMeta - , museOptions = def - , museHeaders = M.empty - , museIdentifierList = Set.empty - , museLastStrPos = Nothing - , museLogMessages = [] - , museNotes = M.empty - , museInLink = False - , museInPara = False - } + def = MuseState { museMeta = return nullMeta + , museOptions = def + , museHeaders = M.empty + , museIdentifierList = Set.empty + , museLastStrPos = Nothing + , museLogMessages = [] + , museNotes = M.empty + , museInLink = False + , museInPara = False + } type MuseParser = ParserT String MuseState @@ -125,10 +121,7 @@ instance HasLogMessages MuseState where addLogMessage m s = s{ museLogMessages = m : museLogMessages s } getLogMessages = reverse . museLogMessages --- --- main parser --- - +-- | Parse Muse document parseMuse :: PandocMonad m => MuseParser m Pandoc parseMuse = do many directive @@ -140,14 +133,56 @@ parseMuse = do reportLogMessages return doc --- --- utility functions --- +-- * Utility functions + +commonPrefix :: String -> String -> String +commonPrefix _ [] = [] +commonPrefix [] _ = [] +commonPrefix (x:xs) (y:ys) + | x == y = x : commonPrefix xs ys + | otherwise = [] + +-- | Trim up to one newline from the beginning of the string. +lchop :: String -> String +lchop s = case s of + '\n':ss -> ss + _ -> s + +-- | Trim up to one newline from the end of the string. +rchop :: String -> String +rchop = reverse . lchop . reverse + +dropSpacePrefix :: [String] -> [String] +dropSpacePrefix lns = + map (drop maxIndent) lns + where flns = filter (not . all (== ' ')) lns + maxIndent = if null flns then maximum (map length lns) else length $ takeWhile (== ' ') $ foldl1 commonPrefix flns + +atStart :: PandocMonad m => MuseParser m a -> MuseParser m a +atStart p = do + pos <- getPosition + st <- getState + guard $ museLastStrPos st /= Just pos + p + +-- * Parsers +-- | Parse end-of-line, which can be either a newline or end-of-file. eol :: Stream s m Char => ParserT s st m () eol = void newline <|> eof -htmlElement :: PandocMonad m => String -> MuseParser m (Attr, String) +someUntil :: (Stream s m t) + => ParserT s u m a + -> ParserT s u m b + -> ParserT s u m ([a], b) +someUntil p end = first <$> ((:) <$> p) <*> manyUntil p end + +-- ** HTML parsers + +-- | Parse HTML tag, returning its attributes and literal contents. +htmlElement :: PandocMonad m + => String -- ^ Tag name + -> MuseParser m (Attr, String) htmlElement tag = try $ do (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag []) content <- manyTill anyChar endtag @@ -155,13 +190,16 @@ htmlElement tag = try $ do where endtag = void $ htmlTag (~== TagClose tag) -htmlBlock :: PandocMonad m => String -> MuseParser m (Attr, String) +htmlBlock :: PandocMonad m + => String -- ^ Tag name + -> MuseParser m (Attr, String) htmlBlock tag = try $ do many spaceChar res <- htmlElement tag manyTill spaceChar eol return res +-- | Convert HTML attributes to Pandoc 'Attr' htmlAttrToPandoc :: [Attribute String] -> Attr htmlAttrToPandoc attrs = (ident, classes, keyvals) where @@ -170,7 +208,8 @@ htmlAttrToPandoc attrs = (ident, classes, keyvals) keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] parseHtmlContent :: PandocMonad m - => String -> MuseParser m (Attr, F Blocks) + => String -- ^ Tag name + -> MuseParser m (Attr, F Blocks) parseHtmlContent tag = try $ do many spaceChar pos <- getPosition @@ -182,29 +221,7 @@ parseHtmlContent tag = try $ do where endtag = void $ htmlTag (~== TagClose tag) -commonPrefix :: String -> String -> String -commonPrefix _ [] = [] -commonPrefix [] _ = [] -commonPrefix (x:xs) (y:ys) - | x == y = x : commonPrefix xs ys - | otherwise = [] - -atStart :: PandocMonad m => MuseParser m a -> MuseParser m a -atStart p = do - pos <- getPosition - st <- getState - guard $ museLastStrPos st /= Just pos - p - -someUntil :: (Stream s m t) - => ParserT s u m a - -> ParserT s u m b - -> ParserT s u m ([a], b) -someUntil p end = first <$> ((:) <$> p) <*> manyUntil p end - --- --- directive parsers --- +-- ** Directive parsers -- While not documented, Emacs Muse allows "-" in directive name parseDirectiveKey :: PandocMonad m => MuseParser m String @@ -235,9 +252,7 @@ directive = do where translateKey "cover" = "cover-image" translateKey x = x --- --- block parsers --- +-- ** Block parsers parseBlocks :: PandocMonad m => MuseParser m (F Blocks) @@ -248,8 +263,8 @@ parseBlocks = paraStart) where parseEnd = mempty <$ eof - blockStart = (B.<>) <$> (header <|> blockElements <|> emacsNoteBlock) - <*> parseBlocks + blockStart = ((B.<>) <$> (emacsHeading <|> blockElements <|> emacsNoteBlock) + <*> parseBlocks) <|> (uncurry (B.<>) <$> amuseHeadingUntil parseBlocks) listStart = do updateState (\st -> st { museInPara = False }) uncurry (B.<>) <$> (anyListUntil parseBlocks <|> amuseNoteBlockUntil parseBlocks) @@ -322,19 +337,24 @@ blockElements = do , rightTag , quoteTag , divTag + , biblioTag + , playTag , verseTag , lineBlock , table , commentTag ] +-- | Parse a line comment, starting with @;@ in the first column. comment :: PandocMonad m => MuseParser m (F Blocks) comment = try $ do + getPosition >>= \pos -> guard (sourceColumn pos == 1) char ';' optional (spaceChar >> many (noneOf "\n")) eol return mempty +-- | Parse a horizontal rule, consisting of 4 or more @\'-\'@ characters. separator :: PandocMonad m => MuseParser m (F Blocks) separator = try $ do string "----" @@ -343,8 +363,10 @@ separator = try $ do eol return $ return B.horizontalRule -header :: PandocMonad m => MuseParser m (F Blocks) -header = try $ do +-- | Parse a single-line heading. +emacsHeading :: PandocMonad m => MuseParser m (F Blocks) +emacsHeading = try $ do + guardDisabled Ext_amuse anchorId <- option "" $ try (parseAnchor <* manyTill spaceChar eol) getPosition >>= \pos -> guard (sourceColumn pos == 1) level <- fmap length $ many1 $ char '*' @@ -354,6 +376,24 @@ header = try $ do attr <- registerHeader (anchorId, [], []) (runF content def) return $ B.headerWith attr level <$> content +-- | Parse a multi-line heading. +-- It is a Text::Amuse extension, Emacs Muse does not allow heading to span multiple lines. +amuseHeadingUntil :: PandocMonad m + => MuseParser m a -- ^ Terminator parser + -> MuseParser m (F Blocks, a) +amuseHeadingUntil end = try $ do + guardEnabled Ext_amuse + anchorId <- option "" $ try (parseAnchor <* manyTill spaceChar eol) + getPosition >>= \pos -> guard (sourceColumn pos == 1) + level <- fmap length $ many1 $ char '*' + guard $ level <= 5 + spaceChar + (content, e) <- paraContentsUntil end + attr <- registerHeader (anchorId, [], []) (runF content def) + return (B.headerWith attr level <$> content, e) + +-- | Parse an example between @{{{@ and @}}}@. +-- It is an Amusewiki extension influenced by Creole wiki, as described in @Text::Amuse@ documentation. example :: PandocMonad m => MuseParser m (F Blocks) example = try $ do string "{{{" @@ -361,27 +401,14 @@ example = try $ do contents <- manyTill anyChar $ try (optional blankline >> string "}}}") return $ return $ B.codeBlock contents --- Trim up to one newline from the beginning of the string. -lchop :: String -> String -lchop s = case s of - '\n':ss -> ss - _ -> s - --- Trim up to one newline from the end of the string. -rchop :: String -> String -rchop = reverse . lchop . reverse - -dropSpacePrefix :: [String] -> [String] -dropSpacePrefix lns = - map (drop maxIndent) lns - where flns = filter (not . all (== ' ')) lns - maxIndent = if null flns then maximum (map length lns) else length $ takeWhile (== ' ') $ foldl1 commonPrefix flns - +-- | Parse an @\<example>@ tag. exampleTag :: PandocMonad m => MuseParser m (F Blocks) exampleTag = try $ do (attr, contents) <- htmlBlock "example" return $ return $ B.codeBlockWith attr $ rchop $ intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop contents +-- | Parse a @\<literal>@ tag as a raw block. +-- For 'RawInline' @\<literal>@ parser, see 'inlineLiteralTag'. literalTag :: PandocMonad m => MuseParser m (F Blocks) literalTag = try $ do many spaceChar @@ -396,23 +423,41 @@ literalTag = try $ do format (_, _, kvs) = fromMaybe "html" $ lookup "style" kvs rawBlock (attrs, content) = B.rawBlock (format attrs) $ rchop $ intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop content --- <center> tag is ignored +-- | Parse @\<center>@ tag. +-- Currently it is ignored as Pandoc cannot represent centered blocks. centerTag :: PandocMonad m => MuseParser m (F Blocks) centerTag = snd <$> parseHtmlContent "center" --- <right> tag is ignored +-- | Parse @\<right>@ tag. +-- Currently it is ignored as Pandoc cannot represent centered blocks. rightTag :: PandocMonad m => MuseParser m (F Blocks) rightTag = snd <$> parseHtmlContent "right" +-- | Parse @\<quote>@ tag. quoteTag :: PandocMonad m => MuseParser m (F Blocks) quoteTag = fmap B.blockQuote . snd <$> parseHtmlContent "quote" --- <div> tag is supported by Emacs Muse, but not Amusewiki 2.025 +-- | Parse @\<div>@ tag. +-- @\<div>@ tag is supported by Emacs Muse, but not Amusewiki 2.025. divTag :: PandocMonad m => MuseParser m (F Blocks) divTag = do (attrs, content) <- parseHtmlContent "div" return $ B.divWith attrs <$> content +-- | Parse @\<biblio>@ tag, the result is the same as @\<div class="biblio">@. +-- @\<biblio>@ tag is supported only in Text::Amuse mode. +biblioTag :: PandocMonad m => MuseParser m (F Blocks) +biblioTag = do + guardEnabled Ext_amuse + fmap (B.divWith ("", ["biblio"], [])) . snd <$> parseHtmlContent "biblio" + +-- | Parse @\<play>@ tag, the result is the same as @\<div class="play">@. +-- @\<play>@ tag is supported only in Text::Amuse mode. +playTag :: PandocMonad m => MuseParser m (F Blocks) +playTag = do + guardEnabled Ext_amuse + fmap (B.divWith ("", ["play"], [])) . snd <$> parseHtmlContent "play" + verseLine :: PandocMonad m => MuseParser m (F Inlines) verseLine = do indent <- (B.str <$> many1 (char ' ' >> pure '\160')) <|> pure mempty @@ -424,25 +469,34 @@ verseLines = do lns <- many verseLine return $ B.lineBlock <$> sequence lns +-- | Parse @\<verse>@ tag. verseTag :: PandocMonad m => MuseParser m (F Blocks) verseTag = do (_, content) <- htmlBlock "verse" parseFromString verseLines (intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop content) +-- | Parse @\<comment>@ tag. commentTag :: PandocMonad m => MuseParser m (F Blocks) commentTag = htmlBlock "comment" >> return mempty --- Indented paragraph is either center, right or quote +-- | Parse paragraph contents. +paraContentsUntil :: PandocMonad m + => MuseParser m a -- ^ Terminator parser + -> MuseParser m (F Inlines, a) +paraContentsUntil end = do + updateState (\st -> st { museInPara = True }) + (l, e) <- someUntil inline $ try (manyTill spaceChar eol >> end) + updateState (\st -> st { museInPara = False }) + return (trimInlinesF $ mconcat l, e) + +-- | Parse a paragraph. paraUntil :: PandocMonad m - => MuseParser m a + => MuseParser m a -- ^ Terminator parser -> MuseParser m (F Blocks, a) paraUntil end = do state <- getState guard $ not $ museInPara state - setState $ state{ museInPara = True } - (l, e) <- someUntil inline $ try (manyTill spaceChar eol >> end) - updateState (\st -> st { museInPara = False }) - return (fmap B.para $ trimInlinesF $ mconcat l, e) + first (fmap B.para) <$> paraContentsUntil end noteMarker :: PandocMonad m => MuseParser m String noteMarker = try $ do @@ -461,9 +515,8 @@ amuseNoteBlockUntil end = try $ do updateState (\st -> st { museInPara = False }) (content, e) <- listItemContentsUntil (sourceColumn pos - 1) (fail "x") end oldnotes <- museNotes <$> getState - case M.lookup ref oldnotes of - Just _ -> logMessage $ DuplicateNoteReference ref pos - Nothing -> return () + when (M.member ref oldnotes) + (logMessage $ DuplicateNoteReference ref pos) updateState $ \s -> s{ museNotes = M.insert ref (pos, content) oldnotes } return (mempty, e) @@ -476,9 +529,8 @@ emacsNoteBlock = try $ do ref <- noteMarker <* skipSpaces content <- mconcat <$> blocksTillNote oldnotes <- museNotes <$> getState - case M.lookup ref oldnotes of - Just _ -> logMessage $ DuplicateNoteReference ref pos - Nothing -> return () + when (M.member ref oldnotes) + (logMessage $ DuplicateNoteReference ref pos) updateState $ \s -> s{ museNotes = M.insert ref (pos, content) oldnotes } return mempty where @@ -503,29 +555,28 @@ blanklineVerseLine = try $ do blankline pure mempty +-- | Parse a line block indicated by @\'>\'@ characters. lineBlock :: PandocMonad m => MuseParser m (F Blocks) lineBlock = try $ do + many spaceChar col <- sourceColumn <$> getPosition lns <- (blanklineVerseLine <|> lineVerseLine) `sepBy1'` try (indentWith (col - 1)) return $ B.lineBlock <$> sequence lns --- --- lists --- +-- *** List parsers bulletListItemsUntil :: PandocMonad m - => Int - -> MuseParser m a + => Int -- ^ Indentation + -> MuseParser m a -- ^ Terminator parser -> MuseParser m ([F Blocks], a) bulletListItemsUntil indent end = try $ do char '-' void spaceChar <|> lookAhead eol updateState (\st -> st { museInPara = False }) - (x, e) <- listItemContentsUntil (indent + 2) (Right <$> try (optional blankline >> indentWith indent >> bulletListItemsUntil indent end)) (Left <$> end) - case e of - Left ee -> return ([x], ee) - Right (xs, ee) -> return (x:xs, ee) + (x, (xs, e)) <- listItemContentsUntil (indent + 2) (try (optional blankline >> indentWith indent >> bulletListItemsUntil indent end)) (([],) <$> end) + return (x:xs, e) +-- | Parse a bullet list. bulletListUntil :: PandocMonad m => MuseParser m a -> MuseParser m (F Blocks, a) @@ -569,11 +620,10 @@ orderedListItemsUntil indent style end = pos <- getPosition void spaceChar <|> lookAhead eol updateState (\st -> st { museInPara = False }) - (x, e) <- listItemContentsUntil (sourceColumn pos) (Right <$> try (optional blankline >> indentWith indent >> museOrderedListMarker style >> continuation)) (Left <$> end) - case e of - Left ee -> return ([x], ee) - Right (xs, ee) -> return (x:xs, ee) + (x, (xs, e)) <- listItemContentsUntil (sourceColumn pos) (try (optional blankline >> indentWith indent >> museOrderedListMarker style >> continuation)) (([],) <$> end) + return (x:xs, e) +-- | Parse an ordered list. orderedListUntil :: PandocMonad m => MuseParser m a -> MuseParser m (F Blocks, a) @@ -594,10 +644,8 @@ descriptionsUntil :: PandocMonad m descriptionsUntil indent end = do void spaceChar <|> lookAhead eol updateState (\st -> st { museInPara = False }) - (x, e) <- listItemContentsUntil indent (Right <$> try (optional blankline >> indentWith indent >> manyTill spaceChar (string "::") >> descriptionsUntil indent end)) (Left <$> end) - case e of - Right (xs, ee) -> return (x:xs, ee) - Left ee -> return ([x], ee) + (x, (xs, e)) <- listItemContentsUntil indent (try (optional blankline >> indentWith indent >> manyTill spaceChar (string "::") >> descriptionsUntil indent end)) (([],) <$> end) + return (x:xs, e) definitionListItemsUntil :: PandocMonad m => Int @@ -609,17 +657,13 @@ definitionListItemsUntil indent end = continuation = try $ do pos <- getPosition term <- trimInlinesF . mconcat <$> manyTill (choice inlineList) (try $ string "::") - (x, e) <- descriptionsUntil (sourceColumn pos) ((Right <$> try (optional blankline >> indentWith indent >> continuation)) <|> (Left <$> end)) - let xx = do - term' <- term - x' <- sequence x - return (term', x') - case e of - Left ee -> return ([xx], ee) - Right (xs, ee) -> return (xx:xs, ee) + (x, (xs, e)) <- descriptionsUntil (sourceColumn pos) (try (optional blankline >> indentWith indent >> continuation) <|> (([],) <$> end)) + let xx = (,) <$> term <*> sequence x + return (xx:xs, e) +-- | Parse a definition list. definitionListUntil :: PandocMonad m - => MuseParser m a + => MuseParser m a -- ^ Terminator parser -> MuseParser m (F Blocks, a) definitionListUntil end = try $ do many spaceChar @@ -629,15 +673,14 @@ definitionListUntil end = try $ do first (fmap B.definitionList . sequence) <$> definitionListItemsUntil indent end anyListUntil :: PandocMonad m - => MuseParser m a + => MuseParser m a -- ^ Terminator parser -> MuseParser m (F Blocks, a) anyListUntil end = bulletListUntil end <|> orderedListUntil end <|> definitionListUntil end --- --- tables --- +-- *** Table parsers +-- | Internal Muse table representation. data MuseTable = MuseTable { museTableCaption :: Inlines , museTableHeaders :: [[Blocks]] @@ -645,10 +688,10 @@ data MuseTable = MuseTable , museTableFooters :: [[Blocks]] } -data MuseTableElement = MuseHeaderRow (F [Blocks]) - | MuseBodyRow (F [Blocks]) - | MuseFooterRow (F [Blocks]) - | MuseCaption (F Inlines) +data MuseTableElement = MuseHeaderRow [Blocks] + | MuseBodyRow [Blocks] + | MuseFooterRow [Blocks] + | MuseCaption Inlines museToPandocTable :: MuseTable -> Blocks museToPandocTable (MuseTable caption headers body footers) = @@ -658,69 +701,66 @@ museToPandocTable (MuseTable caption headers body footers) = headRow = if null headers then [] else head headers rows = (if null headers then [] else tail headers) ++ body ++ footers -museAppendElement :: MuseTable - -> MuseTableElement - -> F MuseTable -museAppendElement tbl element = +museAppendElement :: MuseTableElement + -> MuseTable + -> MuseTable +museAppendElement element tbl = case element of - MuseHeaderRow row -> do - row' <- row - return tbl{ museTableHeaders = museTableHeaders tbl ++ [row'] } - MuseBodyRow row -> do - row' <- row - return tbl{ museTableRows = museTableRows tbl ++ [row'] } - MuseFooterRow row-> do - row' <- row - return tbl{ museTableFooters = museTableFooters tbl ++ [row'] } - MuseCaption inlines -> do - inlines' <- inlines - return tbl{ museTableCaption = inlines' } + MuseHeaderRow row -> tbl{ museTableHeaders = row : museTableHeaders tbl } + MuseBodyRow row -> tbl{ museTableRows = row : museTableRows tbl } + MuseFooterRow row -> tbl{ museTableFooters = row : museTableFooters tbl } + MuseCaption inlines -> tbl{ museTableCaption = inlines } tableCell :: PandocMonad m => MuseParser m (F Blocks) tableCell = try $ fmap B.plain . trimInlinesF . mconcat <$> manyTill inline (lookAhead cellEnd) where cellEnd = try $ void (many1 spaceChar >> char '|') <|> eol -tableElements :: PandocMonad m => MuseParser m [MuseTableElement] -tableElements = tableParseElement `sepEndBy1` eol +tableElements :: PandocMonad m => MuseParser m (F [MuseTableElement]) +tableElements = sequence <$> (tableParseElement `sepEndBy1` eol) -elementsToTable :: [MuseTableElement] -> F MuseTable -elementsToTable = foldM museAppendElement emptyTable +elementsToTable :: [MuseTableElement] -> MuseTable +elementsToTable = foldr museAppendElement emptyTable where emptyTable = MuseTable mempty mempty mempty mempty +-- | Parse a table. table :: PandocMonad m => MuseParser m (F Blocks) -table = try $ fmap museToPandocTable <$> (elementsToTable <$> tableElements) +table = try $ fmap (museToPandocTable . elementsToTable) <$> tableElements -tableParseElement :: PandocMonad m => MuseParser m MuseTableElement +tableParseElement :: PandocMonad m => MuseParser m (F MuseTableElement) tableParseElement = tableParseHeader <|> tableParseBody <|> tableParseFooter <|> tableParseCaption -tableParseRow :: PandocMonad m => Int -> MuseParser m (F [Blocks]) +tableParseRow :: PandocMonad m + => Int -- ^ Number of separator characters + -> MuseParser m (F [Blocks]) tableParseRow n = try $ do fields <- tableCell `sepBy2` fieldSep return $ sequence fields where p `sepBy2` sep = (:) <$> p <*> many1 (sep >> p) fieldSep = many1 spaceChar >> count n (char '|') >> (void (many1 spaceChar) <|> void (lookAhead newline)) -tableParseHeader :: PandocMonad m => MuseParser m MuseTableElement -tableParseHeader = MuseHeaderRow <$> tableParseRow 2 +-- | Parse a table header row. +tableParseHeader :: PandocMonad m => MuseParser m (F MuseTableElement) +tableParseHeader = fmap MuseHeaderRow <$> tableParseRow 2 -tableParseBody :: PandocMonad m => MuseParser m MuseTableElement -tableParseBody = MuseBodyRow <$> tableParseRow 1 +-- | Parse a table body row. +tableParseBody :: PandocMonad m => MuseParser m (F MuseTableElement) +tableParseBody = fmap MuseBodyRow <$> tableParseRow 1 -tableParseFooter :: PandocMonad m => MuseParser m MuseTableElement -tableParseFooter = MuseFooterRow <$> tableParseRow 3 +-- | Parse a table footer row. +tableParseFooter :: PandocMonad m => MuseParser m (F MuseTableElement) +tableParseFooter = fmap MuseFooterRow <$> tableParseRow 3 -tableParseCaption :: PandocMonad m => MuseParser m MuseTableElement +-- | Parse table caption. +tableParseCaption :: PandocMonad m => MuseParser m (F MuseTableElement) tableParseCaption = try $ do many spaceChar string "|+" - MuseCaption <$> (trimInlinesF . mconcat <$> many1Till inline (string "+|")) + fmap MuseCaption <$> (trimInlinesF . mconcat <$> many1Till inline (string "+|")) --- --- inline parsers --- +-- ** Inline parsers inlineList :: PandocMonad m => [MuseParser m (F Inlines)] inlineList = [ whitespace @@ -750,17 +790,18 @@ inlineList = [ whitespace inline :: PandocMonad m => MuseParser m (F Inlines) inline = endline <|> choice inlineList <?> "inline" +-- | Parse a soft break. endline :: PandocMonad m => MuseParser m (F Inlines) endline = try $ do newline notFollowedBy blankline - returnF B.softbreak + return $ return B.softbreak parseAnchor :: PandocMonad m => MuseParser m String parseAnchor = try $ do getPosition >>= \pos -> guard (sourceColumn pos == 1) char '#' - (:) <$> letter <*> many (letter <|> digit) + (:) <$> letter <*> many (letter <|> digit <|> char '-') anchor :: PandocMonad m => MuseParser m (F Inlines) anchor = try $ do @@ -768,8 +809,11 @@ anchor = try $ do skipMany spaceChar <|> void newline return $ return $ B.spanWith (anchorId, [], []) mempty +-- | Parse a footnote reference. footnote :: PandocMonad m => MuseParser m (F Inlines) footnote = try $ do + inLink <- museInLink <$> getState + guard $ not inLink ref <- noteMarker return $ do notes <- asksF museNotes @@ -777,7 +821,7 @@ footnote = try $ do Nothing -> return $ B.str $ "[" ++ ref ++ "]" Just (_pos, contents) -> do st <- askF - let contents' = runF contents st { museNotes = M.empty } + let contents' = runF contents st { museNotes = M.delete ref (museNotes st) } return $ B.note contents' whitespace :: PandocMonad m => MuseParser m (F Inlines) @@ -785,6 +829,7 @@ whitespace = try $ do skipMany1 spaceChar return $ return B.space +-- | Parse @\<br>@ tag. br :: PandocMonad m => MuseParser m (F Inlines) br = try $ do string "<br>" @@ -800,42 +845,54 @@ enclosedInlines :: (PandocMonad m, Show a, Show b) enclosedInlines start end = try $ trimInlinesF . mconcat <$> (enclosed (atStart start) end inline <* notFollowedBy (satisfy isLetter)) +-- | Parse an inline tag, such as @\<em>@ and @\<strong>@. inlineTag :: PandocMonad m - => String + => String -- ^ Tag name -> MuseParser m (F Inlines) inlineTag tag = try $ do htmlTag (~== TagOpen tag []) mconcat <$> manyTill inline (void $ htmlTag (~== TagClose tag)) -strongTag :: PandocMonad m => MuseParser m (F Inlines) -strongTag = fmap B.strong <$> inlineTag "strong" - +-- | Parse strong inline markup, indicated by @**@. strong :: PandocMonad m => MuseParser m (F Inlines) strong = fmap B.strong <$> emphasisBetween (string "**") +-- | Parse emphasis inline markup, indicated by @*@. emph :: PandocMonad m => MuseParser m (F Inlines) emph = fmap B.emph <$> emphasisBetween (char '*') +-- | Parse underline inline markup, indicated by @_@. +-- Supported only in Emacs Muse mode, not Text::Amuse. underlined :: PandocMonad m => MuseParser m (F Inlines) underlined = do guardDisabled Ext_amuse -- Supported only by Emacs Muse fmap underlineSpan <$> emphasisBetween (char '_') +-- | Parse @\<strong>@ tag. +strongTag :: PandocMonad m => MuseParser m (F Inlines) +strongTag = fmap B.strong <$> inlineTag "strong" + +-- | Parse @\<em>@ tag. emphTag :: PandocMonad m => MuseParser m (F Inlines) emphTag = fmap B.emph <$> inlineTag "em" +-- | Parse @\<sup>@ tag. superscriptTag :: PandocMonad m => MuseParser m (F Inlines) superscriptTag = fmap B.superscript <$> inlineTag "sup" +-- | Parse @\<sub>@ tag. subscriptTag :: PandocMonad m => MuseParser m (F Inlines) subscriptTag = fmap B.subscript <$> inlineTag "sub" +-- | Parse @\<del>@ tag. strikeoutTag :: PandocMonad m => MuseParser m (F Inlines) strikeoutTag = fmap B.strikeout <$> inlineTag "del" +-- | Parse @\<verbatim>@ tag. verbatimTag :: PandocMonad m => MuseParser m (F Inlines) verbatimTag = return . B.text . snd <$> htmlElement "verbatim" +-- | Parse @\<class>@ tag. classTag :: PandocMonad m => MuseParser m (F Inlines) classTag = do (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "class" []) @@ -843,11 +900,13 @@ classTag = do let classes = maybe [] words $ lookup "name" attrs return $ B.spanWith ("", classes, []) <$> mconcat res +-- | Parse "~~" as nonbreaking space. nbsp :: PandocMonad m => MuseParser m (F Inlines) nbsp = try $ do string "~~" return $ return $ B.str "\160" +-- | Parse code markup, indicated by @\'=\'@ characters. code :: PandocMonad m => MuseParser m (F Inlines) code = try $ do atStart $ char '=' @@ -858,13 +917,16 @@ code = try $ do notFollowedBy $ satisfy isLetter return $ return $ B.code contents +-- | Parse @\<code>@ tag. codeTag :: PandocMonad m => MuseParser m (F Inlines) codeTag = return . uncurry B.codeWith <$> htmlElement "code" --- <math> tag is an Emacs Muse extension enabled by (require 'muse-latex2png) +-- | Parse @\<math>@ tag. +-- @\<math>@ tag is an Emacs Muse extension enabled by @(require 'muse-latex2png)@ mathTag :: PandocMonad m => MuseParser m (F Inlines) mathTag = return . B.math . snd <$> htmlElement "math" +-- | Parse inline @\<literal>@ tag as a raw inline. inlineLiteralTag :: PandocMonad m => MuseParser m (F Inlines) inlineLiteralTag = (return . rawInline) <$> htmlElement "literal" @@ -879,18 +941,19 @@ str = return . B.str <$> many1 alphaNum <* updateLastStrPos symbol :: PandocMonad m => MuseParser m (F Inlines) symbol = return . B.str <$> count 1 nonspaceChar +-- | Parse a link or image. link :: PandocMonad m => MuseParser m (F Inlines) link = try $ do st <- getState guard $ not $ museInLink st setState $ st{ museInLink = True } - (url, title, content) <- linkText + (url, content) <- linkText updateState (\state -> state { museInLink = False }) return $ case stripPrefix "URL:" url of Nothing -> if isImageUrl url - then B.image url title <$> fromMaybe (return mempty) content - else B.link url title <$> fromMaybe (return $ B.str url) content - Just url' -> B.link url' title <$> fromMaybe (return $ B.str url') content + then B.image url "" <$> fromMaybe (return mempty) content + else B.link url "" <$> fromMaybe (return $ B.str url) content + Just url' -> B.link url' "" <$> fromMaybe (return $ B.str url') content where -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"] isImageUrl = (`elem` imageExtensions) . takeExtension @@ -898,10 +961,10 @@ link = try $ do linkContent :: PandocMonad m => MuseParser m (F Inlines) linkContent = char '[' >> trimInlinesF . mconcat <$> manyTill inline (string "]") -linkText :: PandocMonad m => MuseParser m (String, String, Maybe (F Inlines)) +linkText :: PandocMonad m => MuseParser m (String, Maybe (F Inlines)) linkText = do string "[[" - url <- many1Till anyChar $ char ']' + url <- manyTill anyChar $ char ']' content <- optionMaybe linkContent char ']' - return (url, "", content) + return (url, content) diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index 3f4586295..1a489ab94 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -1,4 +1,34 @@ {-# LANGUAGE NoImplicitPrelude #-} +{- +Copyright (C) 2013-2018 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.OPML + Copyright : Copyright (C) 2013-2018 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of OPML to 'Pandoc' document. +-} + module Text.Pandoc.Readers.OPML ( readOPML ) where import Prelude import Control.Monad.State.Strict diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 566f9b959..71a38cf82 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -651,11 +651,15 @@ directive' = do skipMany spaceChar top <- many $ satisfy (/='\n') <|> try (char '\n' <* - notFollowedBy' (rawFieldListItem 3) <* - count 3 (char ' ') <* + notFollowedBy' (rawFieldListItem 1) <* + many1 (char ' ') <* notFollowedBy blankline) newline - fields <- many $ rawFieldListItem 3 + fields <- do + fieldIndent <- length <$> lookAhead (many (char ' ')) + if fieldIndent == 0 + then return [] + else many $ rawFieldListItem fieldIndent body <- option "" $ try $ blanklines >> indentedBlock optional blanklines let body' = body ++ "\n\n" @@ -1086,10 +1090,15 @@ targetURI :: Monad m => ParserT [Char] st m [Char] targetURI = do skipSpaces optional newline - contents <- many1 (try (many spaceChar >> newline >> - many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n") + contents <- trim <$> + many1 (satisfy (/='\n') + <|> try (newline >> many1 spaceChar >> noneOf " \t\n")) blanklines - return $ escapeURI $ trim contents + case reverse contents of + -- strip backticks + '_':'`':xs -> return (dropWhile (=='`') (reverse xs) ++ "_") + '_':_ -> return contents + _ -> return (escapeURI contents) substKey :: PandocMonad m => RSTParser m () substKey = try $ do |
