aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs89
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs6
-rw-r--r--src/Text/Pandoc/Readers/Docx/Combine.hs4
-rw-r--r--src/Text/Pandoc/Readers/EPUB.hs32
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs6
-rw-r--r--src/Text/Pandoc/Readers/JATS.hs30
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs89
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs37
-rw-r--r--src/Text/Pandoc/Readers/Muse.hs405
-rw-r--r--src/Text/Pandoc/Readers/OPML.hs30
-rw-r--r--src/Text/Pandoc/Readers/RST.hs21
11 files changed, 513 insertions, 236 deletions
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 6bc4584c2..3d48c7ee8 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -1,4 +1,33 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.DocBook
+ Copyright : Copyright (C) 2006-2018 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of DocBook XML to 'Pandoc' document.
+-}
module Text.Pandoc.Readers.DocBook ( readDocBook ) where
import Prelude
import Control.Monad.State.Strict
@@ -236,7 +265,7 @@ List of all DocBook tags, with [x] indicating implemented,
[ ] manvolnum - A reference volume number
[x] markup - A string of formatting markup in text that is to be
represented literally
-[ ] mathphrase - A mathematical phrase, an expression that can be represented
+[x] mathphrase - A mathematical phrase, an expression that can be represented
with ordinary text and a small amount of markup
[ ] medialabel - A name that identifies the physical medium on which some
information resides
@@ -698,6 +727,8 @@ parseBlock (Elem e) =
"bibliodiv" -> sect 1
"biblioentry" -> parseMixed para (elContent e)
"bibliomixed" -> parseMixed para (elContent e)
+ "equation" -> para <$> equation e displayMath
+ "informalequation" -> para <$> equation e displayMath
"glosssee" -> para . (\ils -> text "See " <> ils <> str ".")
<$> getInlines e
"glossseealso" -> para . (\ils -> text "See also " <> ils <> str ".")
@@ -924,9 +955,9 @@ parseInline (CRef ref) =
return $ maybe (text $ map toUpper ref) text $ lookupEntity ref
parseInline (Elem e) =
case qName (elName e) of
- "equation" -> equation displayMath
- "informalequation" -> equation displayMath
- "inlineequation" -> equation math
+ "equation" -> equation e displayMath
+ "informalequation" -> equation e displayMath
+ "inlineequation" -> equation e math
"subscript" -> subscript <$> innerInlines
"superscript" -> superscript <$> innerInlines
"inlinemediaobject" -> getMediaobject e
@@ -1005,13 +1036,6 @@ parseInline (Elem e) =
_ -> innerInlines
where innerInlines = (trimInlines . mconcat) <$>
mapM parseInline (elContent e)
- equation constructor = return $ mconcat $
- map (constructor . writeTeX)
- $ rights
- $ map (readMathML . showElement . everywhere (mkT removePrefix))
- $ filterChildren (\x -> qName (elName x) == "math" &&
- qPrefix (elName x) == Just "mml") e
- removePrefix elname = elname { qPrefix = Nothing }
codeWithLang = do
let classes' = case attrValue "language" e of
"" -> []
@@ -1049,6 +1073,7 @@ parseInline (Elem e) =
| not (null xrefLabel) = xrefLabel
| otherwise = case qName (elName el) of
"chapter" -> descendantContent "title" el
+ "section" -> descendantContent "title" el
"sect1" -> descendantContent "title" el
"sect2" -> descendantContent "title" el
"sect3" -> descendantContent "title" el
@@ -1061,3 +1086,45 @@ parseInline (Elem e) =
xrefLabel = attrValue "xreflabel" el
descendantContent name = maybe "???" strContent
. filterElementName (\n -> qName n == name)
+
+-- | Extract a math equation from an element
+--
+-- asciidoc can generate Latex math in CDATA sections.
+--
+-- Note that if some MathML can't be parsed it is silently ignored!
+equation
+ :: Monad m
+ => Element
+ -- ^ The element from which to extract a mathematical equation
+ -> (String -> Inlines)
+ -- ^ A constructor for some Inlines, taking the TeX code as input
+ -> m Inlines
+equation e constructor =
+ return $ mconcat $ map constructor $ mathMLEquations ++ latexEquations
+ where
+ mathMLEquations :: [String]
+ mathMLEquations = map writeTeX $ rights $ readMath
+ (\x -> qName (elName x) == "math" && qPrefix (elName x) == Just "mml")
+ (readMathML . showElement)
+
+ latexEquations :: [String]
+ latexEquations = readMath (\x -> qName (elName x) == "mathphrase")
+ (concat . fmap showVerbatimCData . elContent)
+
+ readMath :: (Element -> Bool) -> (Element -> b) -> [b]
+ readMath childPredicate fromElement =
+ ( map (fromElement . everywhere (mkT removePrefix))
+ $ filterChildren childPredicate e
+ )
+
+-- | Get the actual text stored in a verbatim CData block. 'showContent'
+-- returns the text still surrounded by the [[CDATA]] tags.
+--
+-- Returns 'showContent' if this is not a verbatim CData
+showVerbatimCData :: Content -> String
+showVerbatimCData (Text (CData CDataVerbatim d _)) = d
+showVerbatimCData c = showContent c
+
+-- | Set the prefix of a name to 'Nothing'
+removePrefix :: QName -> QName
+removePrefix elname = elname { qPrefix = Nothing }
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 00603603a..ca9f8c8dd 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -688,6 +688,10 @@ bodyPartToBlocks (Tbl cap _ look parts@(r:rs)) = do
rowLength :: Row -> Int
rowLength (Row c) = length c
+ -- pad cells. New Text.Pandoc.Builder will do that for us,
+ -- so this is for compatibility while we switch over.
+ let cells' = map (\row -> take width (row ++ repeat mempty)) cells
+
hdrCells <- case hdr of
Just r' -> rowToBlocksList r'
Nothing -> return $ replicate width mempty
@@ -700,7 +704,7 @@ bodyPartToBlocks (Tbl cap _ look parts@(r:rs)) = do
let alignments = replicate width AlignDefault
widths = replicate width 0 :: [Double]
- return $ table caption (zip alignments widths) hdrCells cells
+ return $ table caption (zip alignments widths) hdrCells cells'
bodyPartToBlocks (OMathPara e) =
return $ para $ displayMath (writeTeX e)
diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs
index dfd2b5666..108c4bbe5 100644
--- a/src/Text/Pandoc/Readers/Docx/Combine.hs
+++ b/src/Text/Pandoc/Readers/Docx/Combine.hs
@@ -135,6 +135,10 @@ combineBlocks bs cs
| bs' :> BlockQuote bs'' <- viewr (unMany bs)
, BlockQuote cs'' :< cs' <- viewl (unMany cs) =
Many $ (bs' |> BlockQuote (bs'' <> cs'')) >< cs'
+ | bs' :> CodeBlock attr codeStr <- viewr (unMany bs)
+ , CodeBlock attr' codeStr' :< cs' <- viewl (unMany cs)
+ , attr == attr' =
+ Many $ (bs' |> CodeBlock attr (codeStr <> "\n" <> codeStr')) >< cs'
combineBlocks bs cs = bs <> cs
instance (Monoid a, Eq a) => Eq (Modifier a) where
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index 714468a8a..c26447641 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -1,7 +1,35 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE ViewPatterns #-}
+{-
+Copyright (C) 2014-2018 Matthew Pickering
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.EPUB
+ Copyright : Copyright (C) 2014-2018 Matthew Pickering
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of EPUB to 'Pandoc' document.
+-}
module Text.Pandoc.Readers.EPUB
(readEPUB)
@@ -93,7 +121,7 @@ fetchImages mimes root arc (query iq -> links) =
mapM_ (uncurry3 insertMedia) (mapMaybe getEntry links)
where
getEntry link =
- let abslink = normalise (root </> link) in
+ let abslink = normalise (unEscapeString (root </> link)) in
(link , lookup link mimes, ) . fromEntry
<$> findEntryByPath abslink arc
@@ -264,7 +292,7 @@ findAttrE :: PandocMonad m => QName -> Element -> m String
findAttrE q e = mkE "findAttr" $ findAttr q e
findEntryByPathE :: PandocMonad m => FilePath -> Archive -> m Entry
-findEntryByPathE (normalise -> path) a =
+findEntryByPathE (normalise . unEscapeString -> path) a =
mkE ("No entry on path: " ++ path) $ findEntryByPath path a
parseXMLDocE :: PandocMonad m => String -> m Element
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index b221b6fb2..32a1ba5a6 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -510,14 +510,16 @@ pTable = try $ do
[Plain _] -> True
_ -> False
let isSimple = all isSinglePlain $ concat (head':rows''')
- let cols = length $ if null head' then head rows''' else head'
+ let cols = if null head'
+ then maximum (map length rows''')
+ else length head'
-- add empty cells to short rows
let addEmpties r = case cols - length r of
n | n > 0 -> r <> replicate n mempty
| otherwise -> r
let rows = map addEmpties rows'''
let aligns = case rows'' of
- (cs:_) -> map fst cs
+ (cs:_) -> take cols $ map fst cs ++ repeat AlignDefault
_ -> replicate cols AlignDefault
let widths = if null widths'
then if isSimple
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index 29f23137c..59af76d23 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -1,5 +1,35 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE TupleSections #-}
+{-
+Copyright (C) 2017-2018 Hamish Mackenzie
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.JATS
+ Copyright : Copyright (C) 2017-2018 Hamish Mackenzie
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of JATS XML to 'Pandoc' document.
+-}
+
module Text.Pandoc.Readers.JATS ( readJATS ) where
import Prelude
import Control.Monad.State.Strict
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 52782653e..041b552dc 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -48,7 +48,7 @@ import Control.Applicative (many, optional, (<|>))
import Control.Monad
import Control.Monad.Except (throwError)
import Control.Monad.Trans (lift)
-import Data.Char (chr, isAlphaNum, isDigit, isLetter, ord, toLower)
+import Data.Char (chr, isAlphaNum, isDigit, isLetter, ord, toLower, toUpper)
import Data.Default
import Data.List (intercalate, isPrefixOf)
import qualified Data.Map as M
@@ -164,6 +164,7 @@ data LaTeXState = LaTeXState{ sOptions :: ReaderOptions
, sInTableCell :: Bool
, sLastHeaderNum :: HeaderNum
, sLabels :: M.Map String [Inline]
+ , sHasChapters :: Bool
, sToggles :: M.Map String Bool
}
deriving Show
@@ -183,6 +184,7 @@ defaultLaTeXState = LaTeXState{ sOptions = def
, sInTableCell = False
, sLastHeaderNum = HeaderNum []
, sLabels = M.empty
+ , sHasChapters = False
, sToggles = M.empty
}
@@ -240,21 +242,30 @@ withVerbatimMode parser = do
return result
rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
- => LP m a -> ParserT String s m (a, String)
-rawLaTeXParser parser = do
+ => LP m a -> LP m a -> ParserT String s m (a, String)
+rawLaTeXParser parser valParser = do
inp <- getInput
let toks = tokenize "source" $ T.pack inp
pstate <- getState
- let lstate = def{ sOptions = extractReaderOptions pstate
- , sMacros = extractMacros pstate }
- let rawparser = (,) <$> withRaw parser <*> getState
- res <- lift $ runParserT rawparser lstate "chunk" toks
- case res of
+ let lstate = def{ sOptions = extractReaderOptions pstate }
+ let lstate' = lstate { sMacros = extractMacros pstate }
+ let rawparser = (,) <$> withRaw valParser <*> getState
+ res' <- lift $ runParserT (snd <$> withRaw parser) lstate "chunk" toks
+ case res' of
Left _ -> mzero
- Right ((val, raw), st) -> do
- updateState (updateMacros (sMacros st <>))
- rawstring <- takeP (T.length (untokenize raw))
- return (val, rawstring)
+ Right toks' -> do
+ res <- lift $ runParserT (do doMacros 0
+ -- retokenize, applying macros
+ ts <- many (satisfyTok (const True))
+ setInput ts
+ rawparser)
+ lstate' "chunk" toks'
+ case res of
+ Left _ -> mzero
+ Right ((val, raw), st) -> do
+ updateState (updateMacros (sMacros st <>))
+ _ <- takeP (T.length (untokenize toks'))
+ return (val, T.unpack (untokenize raw))
applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
=> String -> ParserT String s m String
@@ -275,19 +286,18 @@ rawLaTeXBlock = do
lookAhead (try (char '\\' >> letter))
-- we don't want to apply newly defined latex macros to their own
-- definitions:
- snd <$> rawLaTeXParser macroDef
- <|> ((snd <$> rawLaTeXParser (environment <|> blockCommand)) >>= applyMacros)
+ snd <$> rawLaTeXParser (environment <|> macroDef <|> blockCommand) blocks
rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s)
=> ParserT String s m String
rawLaTeXInline = do
lookAhead (try (char '\\' >> letter))
- rawLaTeXParser (inlineEnvironment <|> inlineCommand') >>= applyMacros . snd
+ snd <$> rawLaTeXParser (inlineEnvironment <|> inlineCommand') inlines
inlineCommand :: PandocMonad m => ParserT String ParserState m Inlines
inlineCommand = do
lookAhead (try (char '\\' >> letter))
- fst <$> rawLaTeXParser (inlineEnvironment <|> inlineCommand')
+ fst <$> rawLaTeXParser (inlineEnvironment <|> inlineCommand') inlines
tokenize :: SourceName -> Text -> [Tok]
tokenize sourcename = totoks (initialPos sourcename)
@@ -1313,6 +1323,12 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
, ("slshape", extractSpaces emph <$> inlines)
, ("scshape", extractSpaces smallcaps <$> inlines)
, ("bfseries", extractSpaces strong <$> inlines)
+ , ("MakeUppercase", makeUppercase <$> tok)
+ , ("MakeTextUppercase", makeUppercase <$> tok) -- textcase
+ , ("uppercase", makeUppercase <$> tok)
+ , ("MakeLowercase", makeLowercase <$> tok)
+ , ("MakeTextLowercase", makeLowercase <$> tok)
+ , ("lowercase", makeLowercase <$> tok)
, ("/", pure mempty) -- italic correction
, ("aa", lit "å")
, ("AA", lit "Å")
@@ -1513,6 +1529,16 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
, ("foreignlanguage", foreignlanguage)
]
+makeUppercase :: Inlines -> Inlines
+makeUppercase = fromList . walk (alterStr (map toUpper)) . toList
+
+makeLowercase :: Inlines -> Inlines
+makeLowercase = fromList . walk (alterStr (map toLower)) . toList
+
+alterStr :: (String -> String) -> Inline -> Inline
+alterStr f (Str xs) = Str (f xs)
+alterStr _ x = x
+
foreignlanguage :: PandocMonad m => LP m Inlines
foreignlanguage = do
babelLang <- T.unpack . untokenize <$> braced
@@ -1685,6 +1711,9 @@ treatAsBlock = Set.fromList
, "clearpage"
, "pagebreak"
, "titleformat"
+ , "listoffigures"
+ , "listoftables"
+ , "write"
]
isInlineCommand :: Text -> Bool
@@ -1984,9 +2013,13 @@ section starred (ident, classes, kvs) lvl = do
try (spaces >> controlSeq "label"
>> spaces >> toksToString <$> braced)
let classes' = if starred then "unnumbered" : classes else classes
+ when (lvl == 0) $
+ updateState $ \st -> st{ sHasChapters = True }
unless starred $ do
hn <- sLastHeaderNum <$> getState
- let num = incrementHeaderNum lvl hn
+ hasChapters <- sHasChapters <$> getState
+ let lvl' = lvl + if hasChapters then 1 else 0
+ let num = incrementHeaderNum lvl' hn
updateState $ \st -> st{ sLastHeaderNum = num }
updateState $ \st -> st{ sLabels = M.insert lab
[Str (renderHeaderNum num)]
@@ -2143,19 +2176,6 @@ environments = M.fromList
codeBlockWith attr <$> verbEnv "lstlisting")
, ("minted", minted)
, ("obeylines", obeylines)
- , ("displaymath", mathEnvWith para Nothing "displaymath")
- , ("equation", mathEnvWith para Nothing "equation")
- , ("equation*", mathEnvWith para Nothing "equation*")
- , ("gather", mathEnvWith para (Just "gathered") "gather")
- , ("gather*", mathEnvWith para (Just "gathered") "gather*")
- , ("multline", mathEnvWith para (Just "gathered") "multline")
- , ("multline*", mathEnvWith para (Just "gathered") "multline*")
- , ("eqnarray", mathEnvWith para (Just "aligned") "eqnarray")
- , ("eqnarray*", mathEnvWith para (Just "aligned") "eqnarray*")
- , ("align", mathEnvWith para (Just "aligned") "align")
- , ("align*", mathEnvWith para (Just "aligned") "align*")
- , ("alignat", mathEnvWith para (Just "aligned") "alignat")
- , ("alignat*", mathEnvWith para (Just "aligned") "alignat*")
, ("tikzpicture", rawVerbEnv "tikzpicture")
-- etoolbox
, ("ifstrequal", ifstrequal)
@@ -2166,11 +2186,14 @@ environments = M.fromList
]
environment :: PandocMonad m => LP m Blocks
-environment = do
+environment = try $ do
controlSeq "begin"
name <- untokenize <$> braced
- M.findWithDefault mzero name environments
- <|> rawEnv name
+ M.findWithDefault mzero name environments <|>
+ if M.member name (inlineEnvironments
+ :: M.Map Text (LP PandocPure Inlines))
+ then mzero
+ else rawEnv name
env :: PandocMonad m => Text -> LP m a -> LP m a
env name p = p <* end_ name
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 71e6f8249..156b2b622 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -674,6 +674,8 @@ keyValAttr = try $ do
char '='
val <- enclosed (char '"') (char '"') litChar
<|> enclosed (char '\'') (char '\'') litChar
+ <|> ("" <$ try (string "\"\""))
+ <|> ("" <$ try (string "''"))
<|> many (escapedChar' <|> noneOf " \t\n\r}")
return $ \(id',cs,kvs) ->
case key of
@@ -910,6 +912,17 @@ listContinuation continuationIndent = try $ do
blanks <- many blankline
return $ concat (x:xs) ++ blanks
+-- Variant of blanklines that doesn't require blank lines
+-- before a fence or eof.
+blanklines' :: PandocMonad m => MarkdownParser m [Char]
+blanklines' = blanklines <|> try checkDivCloser
+ where checkDivCloser = do
+ guardEnabled Ext_fenced_divs
+ divLevel <- stateFencedDivLevel <$> getState
+ guard (divLevel >= 1)
+ lookAhead divFenceEnd
+ return ""
+
notFollowedByDivCloser :: PandocMonad m => MarkdownParser m ()
notFollowedByDivCloser =
guardDisabled Ext_fenced_divs <|>
@@ -1251,7 +1264,7 @@ alignType strLst len =
-- Parse a table footer - dashed lines followed by blank line.
tableFooter :: PandocMonad m => MarkdownParser m String
-tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines
+tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines'
-- Parse a table separator - dashed line.
tableSep :: PandocMonad m => MarkdownParser m Char
@@ -1262,7 +1275,7 @@ rawTableLine :: PandocMonad m
=> [Int]
-> MarkdownParser m [String]
rawTableLine indices = do
- notFollowedBy' (blanklines <|> tableFooter)
+ notFollowedBy' (blanklines' <|> tableFooter)
line <- many1Till anyChar newline
return $ map trim $ tail $
splitStringByIndices (init indices) line
@@ -1300,7 +1313,7 @@ simpleTable headless = do
(aligns, _widths, heads', lines') <-
tableWith (simpleTableHeader headless) tableLine
(return ())
- (if headless then tableFooter else tableFooter <|> blanklines)
+ (if headless then tableFooter else tableFooter <|> blanklines')
-- Simple tables get 0s for relative column widths (i.e., use default)
return (aligns, replicate (length aligns) 0, heads', lines')
@@ -1328,11 +1341,16 @@ multilineTableHeader headless = try $ do
newline
let (lengths, lines') = unzip dashes
let indices = scanl (+) (length initSp) lines'
+ -- compensate for the fact that intercolumn spaces are
+ -- not included in the last index:
+ let indices' = case reverse indices of
+ [] -> []
+ (x:xs) -> reverse (x+1:xs)
rawHeadsList <- if headless
then fmap (map (:[]) . tail .
- splitStringByIndices (init indices)) $ lookAhead anyLine
+ splitStringByIndices (init indices')) $ lookAhead anyLine
else return $ transpose $ map
- (tail . splitStringByIndices (init indices))
+ (tail . splitStringByIndices (init indices'))
rawContent
let aligns = zipWith alignType rawHeadsList lengths
let rawHeads = if headless
@@ -1340,7 +1358,7 @@ multilineTableHeader headless = try $ do
else map (unlines . map trim) rawHeadsList
heads <- fmap sequence $
mapM ((parseFromString' (mconcat <$> many plain)).trim) rawHeads
- return (heads, aligns, indices)
+ return (heads, aligns, indices')
-- Parse a grid table: starts with row of '-' on top, then header
-- (which may be grid), then the rows,
@@ -2146,7 +2164,6 @@ singleQuoted = try $ do
doubleQuoted :: PandocMonad m => MarkdownParser m (F Inlines)
doubleQuoted = try $ do
doubleQuoteStart
- contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
- withQuoteContext InDoubleQuote (doubleQuoteEnd >> return
- (fmap B.doubleQuoted . trimInlinesF $ contents))
- <|> return (return (B.str "\8220") <> contents)
+ withQuoteContext InDoubleQuote $
+ fmap B.doubleQuoted . trimInlinesF . mconcat <$>
+ many1Till inline doubleQuoteEnd
diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs
index 30475d91e..fe6b3698c 100644
--- a/src/Text/Pandoc/Readers/Muse.hs
+++ b/src/Text/Pandoc/Readers/Muse.hs
@@ -36,8 +36,7 @@ TODO:
- Org tables
- table.el tables
- Images with attributes (floating and width)
-- Citations and <biblio>
-- <play> environment
+- <cite> tag
-}
module Text.Pandoc.Readers.Muse (readMuse) where
@@ -85,24 +84,21 @@ data MuseState = MuseState { museMeta :: F Meta -- ^ Document metadata
, museLastStrPos :: Maybe SourcePos -- ^ Position after last str parsed
, museLogMessages :: [LogMessage]
, museNotes :: M.Map String (SourcePos, F Blocks)
- , museInLink :: Bool
- , museInPara :: Bool
+ , museInLink :: Bool -- ^ True when parsing a link description to avoid nested links
+ , museInPara :: Bool -- ^ True when looking for a paragraph terminator
}
instance Default MuseState where
- def = defaultMuseState
-
-defaultMuseState :: MuseState
-defaultMuseState = MuseState { museMeta = return nullMeta
- , museOptions = def
- , museHeaders = M.empty
- , museIdentifierList = Set.empty
- , museLastStrPos = Nothing
- , museLogMessages = []
- , museNotes = M.empty
- , museInLink = False
- , museInPara = False
- }
+ def = MuseState { museMeta = return nullMeta
+ , museOptions = def
+ , museHeaders = M.empty
+ , museIdentifierList = Set.empty
+ , museLastStrPos = Nothing
+ , museLogMessages = []
+ , museNotes = M.empty
+ , museInLink = False
+ , museInPara = False
+ }
type MuseParser = ParserT String MuseState
@@ -125,10 +121,7 @@ instance HasLogMessages MuseState where
addLogMessage m s = s{ museLogMessages = m : museLogMessages s }
getLogMessages = reverse . museLogMessages
---
--- main parser
---
-
+-- | Parse Muse document
parseMuse :: PandocMonad m => MuseParser m Pandoc
parseMuse = do
many directive
@@ -140,14 +133,56 @@ parseMuse = do
reportLogMessages
return doc
---
--- utility functions
---
+-- * Utility functions
+
+commonPrefix :: String -> String -> String
+commonPrefix _ [] = []
+commonPrefix [] _ = []
+commonPrefix (x:xs) (y:ys)
+ | x == y = x : commonPrefix xs ys
+ | otherwise = []
+
+-- | Trim up to one newline from the beginning of the string.
+lchop :: String -> String
+lchop s = case s of
+ '\n':ss -> ss
+ _ -> s
+
+-- | Trim up to one newline from the end of the string.
+rchop :: String -> String
+rchop = reverse . lchop . reverse
+
+dropSpacePrefix :: [String] -> [String]
+dropSpacePrefix lns =
+ map (drop maxIndent) lns
+ where flns = filter (not . all (== ' ')) lns
+ maxIndent = if null flns then maximum (map length lns) else length $ takeWhile (== ' ') $ foldl1 commonPrefix flns
+
+atStart :: PandocMonad m => MuseParser m a -> MuseParser m a
+atStart p = do
+ pos <- getPosition
+ st <- getState
+ guard $ museLastStrPos st /= Just pos
+ p
+
+-- * Parsers
+-- | Parse end-of-line, which can be either a newline or end-of-file.
eol :: Stream s m Char => ParserT s st m ()
eol = void newline <|> eof
-htmlElement :: PandocMonad m => String -> MuseParser m (Attr, String)
+someUntil :: (Stream s m t)
+ => ParserT s u m a
+ -> ParserT s u m b
+ -> ParserT s u m ([a], b)
+someUntil p end = first <$> ((:) <$> p) <*> manyUntil p end
+
+-- ** HTML parsers
+
+-- | Parse HTML tag, returning its attributes and literal contents.
+htmlElement :: PandocMonad m
+ => String -- ^ Tag name
+ -> MuseParser m (Attr, String)
htmlElement tag = try $ do
(TagOpen _ attr, _) <- htmlTag (~== TagOpen tag [])
content <- manyTill anyChar endtag
@@ -155,13 +190,16 @@ htmlElement tag = try $ do
where
endtag = void $ htmlTag (~== TagClose tag)
-htmlBlock :: PandocMonad m => String -> MuseParser m (Attr, String)
+htmlBlock :: PandocMonad m
+ => String -- ^ Tag name
+ -> MuseParser m (Attr, String)
htmlBlock tag = try $ do
many spaceChar
res <- htmlElement tag
manyTill spaceChar eol
return res
+-- | Convert HTML attributes to Pandoc 'Attr'
htmlAttrToPandoc :: [Attribute String] -> Attr
htmlAttrToPandoc attrs = (ident, classes, keyvals)
where
@@ -170,7 +208,8 @@ htmlAttrToPandoc attrs = (ident, classes, keyvals)
keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
parseHtmlContent :: PandocMonad m
- => String -> MuseParser m (Attr, F Blocks)
+ => String -- ^ Tag name
+ -> MuseParser m (Attr, F Blocks)
parseHtmlContent tag = try $ do
many spaceChar
pos <- getPosition
@@ -182,29 +221,7 @@ parseHtmlContent tag = try $ do
where
endtag = void $ htmlTag (~== TagClose tag)
-commonPrefix :: String -> String -> String
-commonPrefix _ [] = []
-commonPrefix [] _ = []
-commonPrefix (x:xs) (y:ys)
- | x == y = x : commonPrefix xs ys
- | otherwise = []
-
-atStart :: PandocMonad m => MuseParser m a -> MuseParser m a
-atStart p = do
- pos <- getPosition
- st <- getState
- guard $ museLastStrPos st /= Just pos
- p
-
-someUntil :: (Stream s m t)
- => ParserT s u m a
- -> ParserT s u m b
- -> ParserT s u m ([a], b)
-someUntil p end = first <$> ((:) <$> p) <*> manyUntil p end
-
---
--- directive parsers
---
+-- ** Directive parsers
-- While not documented, Emacs Muse allows "-" in directive name
parseDirectiveKey :: PandocMonad m => MuseParser m String
@@ -235,9 +252,7 @@ directive = do
where translateKey "cover" = "cover-image"
translateKey x = x
---
--- block parsers
---
+-- ** Block parsers
parseBlocks :: PandocMonad m
=> MuseParser m (F Blocks)
@@ -248,8 +263,8 @@ parseBlocks =
paraStart)
where
parseEnd = mempty <$ eof
- blockStart = (B.<>) <$> (header <|> blockElements <|> emacsNoteBlock)
- <*> parseBlocks
+ blockStart = ((B.<>) <$> (emacsHeading <|> blockElements <|> emacsNoteBlock)
+ <*> parseBlocks) <|> (uncurry (B.<>) <$> amuseHeadingUntil parseBlocks)
listStart = do
updateState (\st -> st { museInPara = False })
uncurry (B.<>) <$> (anyListUntil parseBlocks <|> amuseNoteBlockUntil parseBlocks)
@@ -322,19 +337,24 @@ blockElements = do
, rightTag
, quoteTag
, divTag
+ , biblioTag
+ , playTag
, verseTag
, lineBlock
, table
, commentTag
]
+-- | Parse a line comment, starting with @;@ in the first column.
comment :: PandocMonad m => MuseParser m (F Blocks)
comment = try $ do
+ getPosition >>= \pos -> guard (sourceColumn pos == 1)
char ';'
optional (spaceChar >> many (noneOf "\n"))
eol
return mempty
+-- | Parse a horizontal rule, consisting of 4 or more @\'-\'@ characters.
separator :: PandocMonad m => MuseParser m (F Blocks)
separator = try $ do
string "----"
@@ -343,8 +363,10 @@ separator = try $ do
eol
return $ return B.horizontalRule
-header :: PandocMonad m => MuseParser m (F Blocks)
-header = try $ do
+-- | Parse a single-line heading.
+emacsHeading :: PandocMonad m => MuseParser m (F Blocks)
+emacsHeading = try $ do
+ guardDisabled Ext_amuse
anchorId <- option "" $ try (parseAnchor <* manyTill spaceChar eol)
getPosition >>= \pos -> guard (sourceColumn pos == 1)
level <- fmap length $ many1 $ char '*'
@@ -354,6 +376,24 @@ header = try $ do
attr <- registerHeader (anchorId, [], []) (runF content def)
return $ B.headerWith attr level <$> content
+-- | Parse a multi-line heading.
+-- It is a Text::Amuse extension, Emacs Muse does not allow heading to span multiple lines.
+amuseHeadingUntil :: PandocMonad m
+ => MuseParser m a -- ^ Terminator parser
+ -> MuseParser m (F Blocks, a)
+amuseHeadingUntil end = try $ do
+ guardEnabled Ext_amuse
+ anchorId <- option "" $ try (parseAnchor <* manyTill spaceChar eol)
+ getPosition >>= \pos -> guard (sourceColumn pos == 1)
+ level <- fmap length $ many1 $ char '*'
+ guard $ level <= 5
+ spaceChar
+ (content, e) <- paraContentsUntil end
+ attr <- registerHeader (anchorId, [], []) (runF content def)
+ return (B.headerWith attr level <$> content, e)
+
+-- | Parse an example between @{{{@ and @}}}@.
+-- It is an Amusewiki extension influenced by Creole wiki, as described in @Text::Amuse@ documentation.
example :: PandocMonad m => MuseParser m (F Blocks)
example = try $ do
string "{{{"
@@ -361,27 +401,14 @@ example = try $ do
contents <- manyTill anyChar $ try (optional blankline >> string "}}}")
return $ return $ B.codeBlock contents
--- Trim up to one newline from the beginning of the string.
-lchop :: String -> String
-lchop s = case s of
- '\n':ss -> ss
- _ -> s
-
--- Trim up to one newline from the end of the string.
-rchop :: String -> String
-rchop = reverse . lchop . reverse
-
-dropSpacePrefix :: [String] -> [String]
-dropSpacePrefix lns =
- map (drop maxIndent) lns
- where flns = filter (not . all (== ' ')) lns
- maxIndent = if null flns then maximum (map length lns) else length $ takeWhile (== ' ') $ foldl1 commonPrefix flns
-
+-- | Parse an @\<example>@ tag.
exampleTag :: PandocMonad m => MuseParser m (F Blocks)
exampleTag = try $ do
(attr, contents) <- htmlBlock "example"
return $ return $ B.codeBlockWith attr $ rchop $ intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop contents
+-- | Parse a @\<literal>@ tag as a raw block.
+-- For 'RawInline' @\<literal>@ parser, see 'inlineLiteralTag'.
literalTag :: PandocMonad m => MuseParser m (F Blocks)
literalTag = try $ do
many spaceChar
@@ -396,23 +423,41 @@ literalTag = try $ do
format (_, _, kvs) = fromMaybe "html" $ lookup "style" kvs
rawBlock (attrs, content) = B.rawBlock (format attrs) $ rchop $ intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop content
--- <center> tag is ignored
+-- | Parse @\<center>@ tag.
+-- Currently it is ignored as Pandoc cannot represent centered blocks.
centerTag :: PandocMonad m => MuseParser m (F Blocks)
centerTag = snd <$> parseHtmlContent "center"
--- <right> tag is ignored
+-- | Parse @\<right>@ tag.
+-- Currently it is ignored as Pandoc cannot represent centered blocks.
rightTag :: PandocMonad m => MuseParser m (F Blocks)
rightTag = snd <$> parseHtmlContent "right"
+-- | Parse @\<quote>@ tag.
quoteTag :: PandocMonad m => MuseParser m (F Blocks)
quoteTag = fmap B.blockQuote . snd <$> parseHtmlContent "quote"
--- <div> tag is supported by Emacs Muse, but not Amusewiki 2.025
+-- | Parse @\<div>@ tag.
+-- @\<div>@ tag is supported by Emacs Muse, but not Amusewiki 2.025.
divTag :: PandocMonad m => MuseParser m (F Blocks)
divTag = do
(attrs, content) <- parseHtmlContent "div"
return $ B.divWith attrs <$> content
+-- | Parse @\<biblio>@ tag, the result is the same as @\<div class="biblio">@.
+-- @\<biblio>@ tag is supported only in Text::Amuse mode.
+biblioTag :: PandocMonad m => MuseParser m (F Blocks)
+biblioTag = do
+ guardEnabled Ext_amuse
+ fmap (B.divWith ("", ["biblio"], [])) . snd <$> parseHtmlContent "biblio"
+
+-- | Parse @\<play>@ tag, the result is the same as @\<div class="play">@.
+-- @\<play>@ tag is supported only in Text::Amuse mode.
+playTag :: PandocMonad m => MuseParser m (F Blocks)
+playTag = do
+ guardEnabled Ext_amuse
+ fmap (B.divWith ("", ["play"], [])) . snd <$> parseHtmlContent "play"
+
verseLine :: PandocMonad m => MuseParser m (F Inlines)
verseLine = do
indent <- (B.str <$> many1 (char ' ' >> pure '\160')) <|> pure mempty
@@ -424,25 +469,34 @@ verseLines = do
lns <- many verseLine
return $ B.lineBlock <$> sequence lns
+-- | Parse @\<verse>@ tag.
verseTag :: PandocMonad m => MuseParser m (F Blocks)
verseTag = do
(_, content) <- htmlBlock "verse"
parseFromString verseLines (intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop content)
+-- | Parse @\<comment>@ tag.
commentTag :: PandocMonad m => MuseParser m (F Blocks)
commentTag = htmlBlock "comment" >> return mempty
--- Indented paragraph is either center, right or quote
+-- | Parse paragraph contents.
+paraContentsUntil :: PandocMonad m
+ => MuseParser m a -- ^ Terminator parser
+ -> MuseParser m (F Inlines, a)
+paraContentsUntil end = do
+ updateState (\st -> st { museInPara = True })
+ (l, e) <- someUntil inline $ try (manyTill spaceChar eol >> end)
+ updateState (\st -> st { museInPara = False })
+ return (trimInlinesF $ mconcat l, e)
+
+-- | Parse a paragraph.
paraUntil :: PandocMonad m
- => MuseParser m a
+ => MuseParser m a -- ^ Terminator parser
-> MuseParser m (F Blocks, a)
paraUntil end = do
state <- getState
guard $ not $ museInPara state
- setState $ state{ museInPara = True }
- (l, e) <- someUntil inline $ try (manyTill spaceChar eol >> end)
- updateState (\st -> st { museInPara = False })
- return (fmap B.para $ trimInlinesF $ mconcat l, e)
+ first (fmap B.para) <$> paraContentsUntil end
noteMarker :: PandocMonad m => MuseParser m String
noteMarker = try $ do
@@ -461,9 +515,8 @@ amuseNoteBlockUntil end = try $ do
updateState (\st -> st { museInPara = False })
(content, e) <- listItemContentsUntil (sourceColumn pos - 1) (fail "x") end
oldnotes <- museNotes <$> getState
- case M.lookup ref oldnotes of
- Just _ -> logMessage $ DuplicateNoteReference ref pos
- Nothing -> return ()
+ when (M.member ref oldnotes)
+ (logMessage $ DuplicateNoteReference ref pos)
updateState $ \s -> s{ museNotes = M.insert ref (pos, content) oldnotes }
return (mempty, e)
@@ -476,9 +529,8 @@ emacsNoteBlock = try $ do
ref <- noteMarker <* skipSpaces
content <- mconcat <$> blocksTillNote
oldnotes <- museNotes <$> getState
- case M.lookup ref oldnotes of
- Just _ -> logMessage $ DuplicateNoteReference ref pos
- Nothing -> return ()
+ when (M.member ref oldnotes)
+ (logMessage $ DuplicateNoteReference ref pos)
updateState $ \s -> s{ museNotes = M.insert ref (pos, content) oldnotes }
return mempty
where
@@ -503,29 +555,28 @@ blanklineVerseLine = try $ do
blankline
pure mempty
+-- | Parse a line block indicated by @\'>\'@ characters.
lineBlock :: PandocMonad m => MuseParser m (F Blocks)
lineBlock = try $ do
+ many spaceChar
col <- sourceColumn <$> getPosition
lns <- (blanklineVerseLine <|> lineVerseLine) `sepBy1'` try (indentWith (col - 1))
return $ B.lineBlock <$> sequence lns
---
--- lists
---
+-- *** List parsers
bulletListItemsUntil :: PandocMonad m
- => Int
- -> MuseParser m a
+ => Int -- ^ Indentation
+ -> MuseParser m a -- ^ Terminator parser
-> MuseParser m ([F Blocks], a)
bulletListItemsUntil indent end = try $ do
char '-'
void spaceChar <|> lookAhead eol
updateState (\st -> st { museInPara = False })
- (x, e) <- listItemContentsUntil (indent + 2) (Right <$> try (optional blankline >> indentWith indent >> bulletListItemsUntil indent end)) (Left <$> end)
- case e of
- Left ee -> return ([x], ee)
- Right (xs, ee) -> return (x:xs, ee)
+ (x, (xs, e)) <- listItemContentsUntil (indent + 2) (try (optional blankline >> indentWith indent >> bulletListItemsUntil indent end)) (([],) <$> end)
+ return (x:xs, e)
+-- | Parse a bullet list.
bulletListUntil :: PandocMonad m
=> MuseParser m a
-> MuseParser m (F Blocks, a)
@@ -569,11 +620,10 @@ orderedListItemsUntil indent style end =
pos <- getPosition
void spaceChar <|> lookAhead eol
updateState (\st -> st { museInPara = False })
- (x, e) <- listItemContentsUntil (sourceColumn pos) (Right <$> try (optional blankline >> indentWith indent >> museOrderedListMarker style >> continuation)) (Left <$> end)
- case e of
- Left ee -> return ([x], ee)
- Right (xs, ee) -> return (x:xs, ee)
+ (x, (xs, e)) <- listItemContentsUntil (sourceColumn pos) (try (optional blankline >> indentWith indent >> museOrderedListMarker style >> continuation)) (([],) <$> end)
+ return (x:xs, e)
+-- | Parse an ordered list.
orderedListUntil :: PandocMonad m
=> MuseParser m a
-> MuseParser m (F Blocks, a)
@@ -594,10 +644,8 @@ descriptionsUntil :: PandocMonad m
descriptionsUntil indent end = do
void spaceChar <|> lookAhead eol
updateState (\st -> st { museInPara = False })
- (x, e) <- listItemContentsUntil indent (Right <$> try (optional blankline >> indentWith indent >> manyTill spaceChar (string "::") >> descriptionsUntil indent end)) (Left <$> end)
- case e of
- Right (xs, ee) -> return (x:xs, ee)
- Left ee -> return ([x], ee)
+ (x, (xs, e)) <- listItemContentsUntil indent (try (optional blankline >> indentWith indent >> manyTill spaceChar (string "::") >> descriptionsUntil indent end)) (([],) <$> end)
+ return (x:xs, e)
definitionListItemsUntil :: PandocMonad m
=> Int
@@ -609,17 +657,13 @@ definitionListItemsUntil indent end =
continuation = try $ do
pos <- getPosition
term <- trimInlinesF . mconcat <$> manyTill (choice inlineList) (try $ string "::")
- (x, e) <- descriptionsUntil (sourceColumn pos) ((Right <$> try (optional blankline >> indentWith indent >> continuation)) <|> (Left <$> end))
- let xx = do
- term' <- term
- x' <- sequence x
- return (term', x')
- case e of
- Left ee -> return ([xx], ee)
- Right (xs, ee) -> return (xx:xs, ee)
+ (x, (xs, e)) <- descriptionsUntil (sourceColumn pos) (try (optional blankline >> indentWith indent >> continuation) <|> (([],) <$> end))
+ let xx = (,) <$> term <*> sequence x
+ return (xx:xs, e)
+-- | Parse a definition list.
definitionListUntil :: PandocMonad m
- => MuseParser m a
+ => MuseParser m a -- ^ Terminator parser
-> MuseParser m (F Blocks, a)
definitionListUntil end = try $ do
many spaceChar
@@ -629,15 +673,14 @@ definitionListUntil end = try $ do
first (fmap B.definitionList . sequence) <$> definitionListItemsUntil indent end
anyListUntil :: PandocMonad m
- => MuseParser m a
+ => MuseParser m a -- ^ Terminator parser
-> MuseParser m (F Blocks, a)
anyListUntil end =
bulletListUntil end <|> orderedListUntil end <|> definitionListUntil end
---
--- tables
---
+-- *** Table parsers
+-- | Internal Muse table representation.
data MuseTable = MuseTable
{ museTableCaption :: Inlines
, museTableHeaders :: [[Blocks]]
@@ -645,10 +688,10 @@ data MuseTable = MuseTable
, museTableFooters :: [[Blocks]]
}
-data MuseTableElement = MuseHeaderRow (F [Blocks])
- | MuseBodyRow (F [Blocks])
- | MuseFooterRow (F [Blocks])
- | MuseCaption (F Inlines)
+data MuseTableElement = MuseHeaderRow [Blocks]
+ | MuseBodyRow [Blocks]
+ | MuseFooterRow [Blocks]
+ | MuseCaption Inlines
museToPandocTable :: MuseTable -> Blocks
museToPandocTable (MuseTable caption headers body footers) =
@@ -658,69 +701,66 @@ museToPandocTable (MuseTable caption headers body footers) =
headRow = if null headers then [] else head headers
rows = (if null headers then [] else tail headers) ++ body ++ footers
-museAppendElement :: MuseTable
- -> MuseTableElement
- -> F MuseTable
-museAppendElement tbl element =
+museAppendElement :: MuseTableElement
+ -> MuseTable
+ -> MuseTable
+museAppendElement element tbl =
case element of
- MuseHeaderRow row -> do
- row' <- row
- return tbl{ museTableHeaders = museTableHeaders tbl ++ [row'] }
- MuseBodyRow row -> do
- row' <- row
- return tbl{ museTableRows = museTableRows tbl ++ [row'] }
- MuseFooterRow row-> do
- row' <- row
- return tbl{ museTableFooters = museTableFooters tbl ++ [row'] }
- MuseCaption inlines -> do
- inlines' <- inlines
- return tbl{ museTableCaption = inlines' }
+ MuseHeaderRow row -> tbl{ museTableHeaders = row : museTableHeaders tbl }
+ MuseBodyRow row -> tbl{ museTableRows = row : museTableRows tbl }
+ MuseFooterRow row -> tbl{ museTableFooters = row : museTableFooters tbl }
+ MuseCaption inlines -> tbl{ museTableCaption = inlines }
tableCell :: PandocMonad m => MuseParser m (F Blocks)
tableCell = try $ fmap B.plain . trimInlinesF . mconcat <$> manyTill inline (lookAhead cellEnd)
where cellEnd = try $ void (many1 spaceChar >> char '|') <|> eol
-tableElements :: PandocMonad m => MuseParser m [MuseTableElement]
-tableElements = tableParseElement `sepEndBy1` eol
+tableElements :: PandocMonad m => MuseParser m (F [MuseTableElement])
+tableElements = sequence <$> (tableParseElement `sepEndBy1` eol)
-elementsToTable :: [MuseTableElement] -> F MuseTable
-elementsToTable = foldM museAppendElement emptyTable
+elementsToTable :: [MuseTableElement] -> MuseTable
+elementsToTable = foldr museAppendElement emptyTable
where emptyTable = MuseTable mempty mempty mempty mempty
+-- | Parse a table.
table :: PandocMonad m => MuseParser m (F Blocks)
-table = try $ fmap museToPandocTable <$> (elementsToTable <$> tableElements)
+table = try $ fmap (museToPandocTable . elementsToTable) <$> tableElements
-tableParseElement :: PandocMonad m => MuseParser m MuseTableElement
+tableParseElement :: PandocMonad m => MuseParser m (F MuseTableElement)
tableParseElement = tableParseHeader
<|> tableParseBody
<|> tableParseFooter
<|> tableParseCaption
-tableParseRow :: PandocMonad m => Int -> MuseParser m (F [Blocks])
+tableParseRow :: PandocMonad m
+ => Int -- ^ Number of separator characters
+ -> MuseParser m (F [Blocks])
tableParseRow n = try $ do
fields <- tableCell `sepBy2` fieldSep
return $ sequence fields
where p `sepBy2` sep = (:) <$> p <*> many1 (sep >> p)
fieldSep = many1 spaceChar >> count n (char '|') >> (void (many1 spaceChar) <|> void (lookAhead newline))
-tableParseHeader :: PandocMonad m => MuseParser m MuseTableElement
-tableParseHeader = MuseHeaderRow <$> tableParseRow 2
+-- | Parse a table header row.
+tableParseHeader :: PandocMonad m => MuseParser m (F MuseTableElement)
+tableParseHeader = fmap MuseHeaderRow <$> tableParseRow 2
-tableParseBody :: PandocMonad m => MuseParser m MuseTableElement
-tableParseBody = MuseBodyRow <$> tableParseRow 1
+-- | Parse a table body row.
+tableParseBody :: PandocMonad m => MuseParser m (F MuseTableElement)
+tableParseBody = fmap MuseBodyRow <$> tableParseRow 1
-tableParseFooter :: PandocMonad m => MuseParser m MuseTableElement
-tableParseFooter = MuseFooterRow <$> tableParseRow 3
+-- | Parse a table footer row.
+tableParseFooter :: PandocMonad m => MuseParser m (F MuseTableElement)
+tableParseFooter = fmap MuseFooterRow <$> tableParseRow 3
-tableParseCaption :: PandocMonad m => MuseParser m MuseTableElement
+-- | Parse table caption.
+tableParseCaption :: PandocMonad m => MuseParser m (F MuseTableElement)
tableParseCaption = try $ do
many spaceChar
string "|+"
- MuseCaption <$> (trimInlinesF . mconcat <$> many1Till inline (string "+|"))
+ fmap MuseCaption <$> (trimInlinesF . mconcat <$> many1Till inline (string "+|"))
---
--- inline parsers
---
+-- ** Inline parsers
inlineList :: PandocMonad m => [MuseParser m (F Inlines)]
inlineList = [ whitespace
@@ -750,17 +790,18 @@ inlineList = [ whitespace
inline :: PandocMonad m => MuseParser m (F Inlines)
inline = endline <|> choice inlineList <?> "inline"
+-- | Parse a soft break.
endline :: PandocMonad m => MuseParser m (F Inlines)
endline = try $ do
newline
notFollowedBy blankline
- returnF B.softbreak
+ return $ return B.softbreak
parseAnchor :: PandocMonad m => MuseParser m String
parseAnchor = try $ do
getPosition >>= \pos -> guard (sourceColumn pos == 1)
char '#'
- (:) <$> letter <*> many (letter <|> digit)
+ (:) <$> letter <*> many (letter <|> digit <|> char '-')
anchor :: PandocMonad m => MuseParser m (F Inlines)
anchor = try $ do
@@ -768,8 +809,11 @@ anchor = try $ do
skipMany spaceChar <|> void newline
return $ return $ B.spanWith (anchorId, [], []) mempty
+-- | Parse a footnote reference.
footnote :: PandocMonad m => MuseParser m (F Inlines)
footnote = try $ do
+ inLink <- museInLink <$> getState
+ guard $ not inLink
ref <- noteMarker
return $ do
notes <- asksF museNotes
@@ -777,7 +821,7 @@ footnote = try $ do
Nothing -> return $ B.str $ "[" ++ ref ++ "]"
Just (_pos, contents) -> do
st <- askF
- let contents' = runF contents st { museNotes = M.empty }
+ let contents' = runF contents st { museNotes = M.delete ref (museNotes st) }
return $ B.note contents'
whitespace :: PandocMonad m => MuseParser m (F Inlines)
@@ -785,6 +829,7 @@ whitespace = try $ do
skipMany1 spaceChar
return $ return B.space
+-- | Parse @\<br>@ tag.
br :: PandocMonad m => MuseParser m (F Inlines)
br = try $ do
string "<br>"
@@ -800,42 +845,54 @@ enclosedInlines :: (PandocMonad m, Show a, Show b)
enclosedInlines start end = try $
trimInlinesF . mconcat <$> (enclosed (atStart start) end inline <* notFollowedBy (satisfy isLetter))
+-- | Parse an inline tag, such as @\<em>@ and @\<strong>@.
inlineTag :: PandocMonad m
- => String
+ => String -- ^ Tag name
-> MuseParser m (F Inlines)
inlineTag tag = try $ do
htmlTag (~== TagOpen tag [])
mconcat <$> manyTill inline (void $ htmlTag (~== TagClose tag))
-strongTag :: PandocMonad m => MuseParser m (F Inlines)
-strongTag = fmap B.strong <$> inlineTag "strong"
-
+-- | Parse strong inline markup, indicated by @**@.
strong :: PandocMonad m => MuseParser m (F Inlines)
strong = fmap B.strong <$> emphasisBetween (string "**")
+-- | Parse emphasis inline markup, indicated by @*@.
emph :: PandocMonad m => MuseParser m (F Inlines)
emph = fmap B.emph <$> emphasisBetween (char '*')
+-- | Parse underline inline markup, indicated by @_@.
+-- Supported only in Emacs Muse mode, not Text::Amuse.
underlined :: PandocMonad m => MuseParser m (F Inlines)
underlined = do
guardDisabled Ext_amuse -- Supported only by Emacs Muse
fmap underlineSpan <$> emphasisBetween (char '_')
+-- | Parse @\<strong>@ tag.
+strongTag :: PandocMonad m => MuseParser m (F Inlines)
+strongTag = fmap B.strong <$> inlineTag "strong"
+
+-- | Parse @\<em>@ tag.
emphTag :: PandocMonad m => MuseParser m (F Inlines)
emphTag = fmap B.emph <$> inlineTag "em"
+-- | Parse @\<sup>@ tag.
superscriptTag :: PandocMonad m => MuseParser m (F Inlines)
superscriptTag = fmap B.superscript <$> inlineTag "sup"
+-- | Parse @\<sub>@ tag.
subscriptTag :: PandocMonad m => MuseParser m (F Inlines)
subscriptTag = fmap B.subscript <$> inlineTag "sub"
+-- | Parse @\<del>@ tag.
strikeoutTag :: PandocMonad m => MuseParser m (F Inlines)
strikeoutTag = fmap B.strikeout <$> inlineTag "del"
+-- | Parse @\<verbatim>@ tag.
verbatimTag :: PandocMonad m => MuseParser m (F Inlines)
verbatimTag = return . B.text . snd <$> htmlElement "verbatim"
+-- | Parse @\<class>@ tag.
classTag :: PandocMonad m => MuseParser m (F Inlines)
classTag = do
(TagOpen _ attrs, _) <- htmlTag (~== TagOpen "class" [])
@@ -843,11 +900,13 @@ classTag = do
let classes = maybe [] words $ lookup "name" attrs
return $ B.spanWith ("", classes, []) <$> mconcat res
+-- | Parse "~~" as nonbreaking space.
nbsp :: PandocMonad m => MuseParser m (F Inlines)
nbsp = try $ do
string "~~"
return $ return $ B.str "\160"
+-- | Parse code markup, indicated by @\'=\'@ characters.
code :: PandocMonad m => MuseParser m (F Inlines)
code = try $ do
atStart $ char '='
@@ -858,13 +917,16 @@ code = try $ do
notFollowedBy $ satisfy isLetter
return $ return $ B.code contents
+-- | Parse @\<code>@ tag.
codeTag :: PandocMonad m => MuseParser m (F Inlines)
codeTag = return . uncurry B.codeWith <$> htmlElement "code"
--- <math> tag is an Emacs Muse extension enabled by (require 'muse-latex2png)
+-- | Parse @\<math>@ tag.
+-- @\<math>@ tag is an Emacs Muse extension enabled by @(require 'muse-latex2png)@
mathTag :: PandocMonad m => MuseParser m (F Inlines)
mathTag = return . B.math . snd <$> htmlElement "math"
+-- | Parse inline @\<literal>@ tag as a raw inline.
inlineLiteralTag :: PandocMonad m => MuseParser m (F Inlines)
inlineLiteralTag =
(return . rawInline) <$> htmlElement "literal"
@@ -879,18 +941,19 @@ str = return . B.str <$> many1 alphaNum <* updateLastStrPos
symbol :: PandocMonad m => MuseParser m (F Inlines)
symbol = return . B.str <$> count 1 nonspaceChar
+-- | Parse a link or image.
link :: PandocMonad m => MuseParser m (F Inlines)
link = try $ do
st <- getState
guard $ not $ museInLink st
setState $ st{ museInLink = True }
- (url, title, content) <- linkText
+ (url, content) <- linkText
updateState (\state -> state { museInLink = False })
return $ case stripPrefix "URL:" url of
Nothing -> if isImageUrl url
- then B.image url title <$> fromMaybe (return mempty) content
- else B.link url title <$> fromMaybe (return $ B.str url) content
- Just url' -> B.link url' title <$> fromMaybe (return $ B.str url') content
+ then B.image url "" <$> fromMaybe (return mempty) content
+ else B.link url "" <$> fromMaybe (return $ B.str url) content
+ Just url' -> B.link url' "" <$> fromMaybe (return $ B.str url') content
where -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el
imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"]
isImageUrl = (`elem` imageExtensions) . takeExtension
@@ -898,10 +961,10 @@ link = try $ do
linkContent :: PandocMonad m => MuseParser m (F Inlines)
linkContent = char '[' >> trimInlinesF . mconcat <$> manyTill inline (string "]")
-linkText :: PandocMonad m => MuseParser m (String, String, Maybe (F Inlines))
+linkText :: PandocMonad m => MuseParser m (String, Maybe (F Inlines))
linkText = do
string "[["
- url <- many1Till anyChar $ char ']'
+ url <- manyTill anyChar $ char ']'
content <- optionMaybe linkContent
char ']'
- return (url, "", content)
+ return (url, content)
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index 3f4586295..1a489ab94 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -1,4 +1,34 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-
+Copyright (C) 2013-2018 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.OPML
+ Copyright : Copyright (C) 2013-2018 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of OPML to 'Pandoc' document.
+-}
+
module Text.Pandoc.Readers.OPML ( readOPML ) where
import Prelude
import Control.Monad.State.Strict
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 566f9b959..71a38cf82 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -651,11 +651,15 @@ directive' = do
skipMany spaceChar
top <- many $ satisfy (/='\n')
<|> try (char '\n' <*
- notFollowedBy' (rawFieldListItem 3) <*
- count 3 (char ' ') <*
+ notFollowedBy' (rawFieldListItem 1) <*
+ many1 (char ' ') <*
notFollowedBy blankline)
newline
- fields <- many $ rawFieldListItem 3
+ fields <- do
+ fieldIndent <- length <$> lookAhead (many (char ' '))
+ if fieldIndent == 0
+ then return []
+ else many $ rawFieldListItem fieldIndent
body <- option "" $ try $ blanklines >> indentedBlock
optional blanklines
let body' = body ++ "\n\n"
@@ -1086,10 +1090,15 @@ targetURI :: Monad m => ParserT [Char] st m [Char]
targetURI = do
skipSpaces
optional newline
- contents <- many1 (try (many spaceChar >> newline >>
- many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n")
+ contents <- trim <$>
+ many1 (satisfy (/='\n')
+ <|> try (newline >> many1 spaceChar >> noneOf " \t\n"))
blanklines
- return $ escapeURI $ trim contents
+ case reverse contents of
+ -- strip backticks
+ '_':'`':xs -> return (dropWhile (=='`') (reverse xs) ++ "_")
+ '_':_ -> return contents
+ _ -> return (escapeURI contents)
substKey :: PandocMonad m => RSTParser m ()
substKey = try $ do