aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/BibTeX.hs24
-rw-r--r--src/Text/Pandoc/Readers/CSV.hs18
-rw-r--r--src/Text/Pandoc/Readers/CommonMark.hs70
-rw-r--r--src/Text/Pandoc/Readers/Creole.hs11
-rw-r--r--src/Text/Pandoc/Readers/CslJson.hs11
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs189
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs101
-rw-r--r--src/Text/Pandoc/Readers/Docx/Combine.hs12
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs334
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse/Styles.hs57
-rw-r--r--src/Text/Pandoc/Readers/Docx/Util.hs54
-rw-r--r--src/Text/Pandoc/Readers/DokuWiki.hs19
-rw-r--r--src/Text/Pandoc/Readers/EPUB.hs80
-rw-r--r--src/Text/Pandoc/Readers/FB2.hs108
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs411
-rw-r--r--src/Text/Pandoc/Readers/HTML/Parsing.hs26
-rw-r--r--src/Text/Pandoc/Readers/HTML/Table.hs57
-rw-r--r--src/Text/Pandoc/Readers/HTML/TagCategories.hs2
-rw-r--r--src/Text/Pandoc/Readers/HTML/Types.hs2
-rw-r--r--src/Text/Pandoc/Readers/Haddock.hs18
-rw-r--r--src/Text/Pandoc/Readers/Ipynb.hs10
-rw-r--r--src/Text/Pandoc/Readers/JATS.hs88
-rw-r--r--src/Text/Pandoc/Readers/Jira.hs24
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs1685
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Citation.hs210
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Inline.hs397
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Lang.hs321
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Macro.hs184
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Math.hs221
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Parsing.hs273
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/SIunitx.hs223
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Table.hs379
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Types.hs17
-rw-r--r--src/Text/Pandoc/Readers/Man.hs29
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs531
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs35
-rw-r--r--src/Text/Pandoc/Readers/Metadata.hs63
-rw-r--r--src/Text/Pandoc/Readers/Muse.hs19
-rw-r--r--src/Text/Pandoc/Readers/Native.hs14
-rw-r--r--src/Text/Pandoc/Readers/OPML.hs58
-rw-r--r--src/Text/Pandoc/Readers/Odt.hs53
-rw-r--r--src/Text/Pandoc/Readers/Odt/Arrows/State.hs4
-rw-r--r--src/Text/Pandoc/Readers/Odt/ContentReader.hs32
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs3
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/Utils.hs33
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs27
-rw-r--r--src/Text/Pandoc/Readers/Odt/Namespaces.hs11
-rw-r--r--src/Text/Pandoc/Readers/Odt/StyleReader.hs38
-rw-r--r--src/Text/Pandoc/Readers/Org.hs13
-rw-r--r--src/Text/Pandoc/Readers/Org/BlockStarts.hs2
-rw-r--r--src/Text/Pandoc/Readers/Org/Blocks.hs80
-rw-r--r--src/Text/Pandoc/Readers/Org/DocumentTree.hs2
-rw-r--r--src/Text/Pandoc/Readers/Org/ExportSettings.hs2
-rw-r--r--src/Text/Pandoc/Readers/Org/Inlines.hs26
-rw-r--r--src/Text/Pandoc/Readers/Org/Meta.hs16
-rw-r--r--src/Text/Pandoc/Readers/Org/ParserState.hs2
-rw-r--r--src/Text/Pandoc/Readers/Org/Parsing.hs12
-rw-r--r--src/Text/Pandoc/Readers/Org/Shared.hs2
-rw-r--r--src/Text/Pandoc/Readers/RST.hs205
-rw-r--r--src/Text/Pandoc/Readers/Roff.hs34
-rw-r--r--src/Text/Pandoc/Readers/TWiki.hs34
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs186
-rw-r--r--src/Text/Pandoc/Readers/TikiWiki.hs12
-rw-r--r--src/Text/Pandoc/Readers/Txt2Tags.hs70
-rw-r--r--src/Text/Pandoc/Readers/Vimwiki.hs19
65 files changed, 4123 insertions, 3180 deletions
diff --git a/src/Text/Pandoc/Readers/BibTeX.hs b/src/Text/Pandoc/Readers/BibTeX.hs
index 6c96ab30a..318afda85 100644
--- a/src/Text/Pandoc/Readers/BibTeX.hs
+++ b/src/Text/Pandoc/Readers/BibTeX.hs
@@ -1,7 +1,7 @@
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.BibTeX
- Copyright : Copyright (C) 2020 John MacFarlane
+ Copyright : Copyright (C) 2020-2021 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -23,41 +23,47 @@ where
import Text.Pandoc.Options
import Text.Pandoc.Definition
import Text.Pandoc.Builder (setMeta, cite, str)
-import Data.Text (Text)
import Citeproc (Lang(..), parseLang)
import Citeproc.Locale (getLocale)
import Text.Pandoc.Error (PandocError(..))
import Text.Pandoc.Class (PandocMonad, lookupEnv)
import Text.Pandoc.Citeproc.BibTeX as BibTeX
import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
+import Text.Pandoc.Sources (ToSources(..))
import Control.Monad.Except (throwError)
-- | Read BibTeX from an input string and return a Pandoc document.
-- The document will have only metadata, with an empty body.
-- The metadata will contain a `references` field with the
-- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
-readBibTeX :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readBibTeX :: (PandocMonad m, ToSources a)
+ => ReaderOptions -> a -> m Pandoc
readBibTeX = readBibTeX' BibTeX.Bibtex
-- | Read BibLaTeX from an input string and return a Pandoc document.
-- The document will have only metadata, with an empty body.
-- The metadata will contain a `references` field with the
-- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
-readBibLaTeX :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readBibLaTeX :: (PandocMonad m, ToSources a)
+ => ReaderOptions -> a -> m Pandoc
readBibLaTeX = readBibTeX' BibTeX.Biblatex
-readBibTeX' :: PandocMonad m => Variant -> ReaderOptions -> Text -> m Pandoc
+readBibTeX' :: (PandocMonad m, ToSources a)
+ => Variant -> ReaderOptions -> a -> m Pandoc
readBibTeX' variant _opts t = do
- lang <- maybe (Lang "en" (Just "US")) parseLang
- <$> lookupEnv "LANG"
+ mblangEnv <- lookupEnv "LANG"
+ let defaultLang = Lang "en" Nothing (Just "US") [] [] []
+ let lang = case mblangEnv of
+ Nothing -> defaultLang
+ Just l -> either (const defaultLang) id $ parseLang l
locale <- case getLocale lang of
Left e ->
- case getLocale (Lang "en" (Just "US")) of
+ case getLocale (Lang "en" Nothing (Just "US") [] [] []) of
Right l -> return l
Left _ -> throwError $ PandocCiteprocError e
Right l -> return l
case BibTeX.readBibtexString variant locale (const True) t of
- Left e -> throwError $ PandocParsecError t e
+ Left e -> throwError $ PandocParsecError (toSources t) e
Right refs -> return $ setMeta "references"
(map referenceToMetaValue refs)
. setMeta "nocite"
diff --git a/src/Text/Pandoc/Readers/CSV.hs b/src/Text/Pandoc/Readers/CSV.hs
index f0edcaa16..eca8f9425 100644
--- a/src/Text/Pandoc/Readers/CSV.hs
+++ b/src/Text/Pandoc/Readers/CSV.hs
@@ -2,8 +2,8 @@
{-# LANGUAGE ScopedTypeVariables #-}
{- |
- Module : Text.Pandoc.Readers.RST
- Copyright : Copyright (C) 2006-2020 John MacFarlane
+ Module : Text.Pandoc.Readers.CSV
+ Copyright : Copyright (C) 2006-2021 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -13,23 +13,23 @@
Conversion from CSV to a 'Pandoc' table.
-}
module Text.Pandoc.Readers.CSV ( readCSV ) where
-import Data.Text (Text)
import qualified Data.Text as T
import Text.Pandoc.CSV (parseCSV, defaultCSVOptions)
import Text.Pandoc.Definition
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad)
-import Text.Pandoc.Shared (crFilter)
import Text.Pandoc.Error
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
import Text.Pandoc.Options (ReaderOptions)
import Control.Monad.Except (throwError)
-readCSV :: PandocMonad m
+readCSV :: (PandocMonad m, ToSources a)
=> ReaderOptions -- ^ Reader options
- -> Text -- ^ Text to parse (assuming @'\n'@ line endings)
+ -> a
-> m Pandoc
-readCSV _opts s =
- case parseCSV defaultCSVOptions (crFilter s) of
+readCSV _opts s = do
+ let txt = sourcesToText $ toSources s
+ case parseCSV defaultCSVOptions txt of
Right (r:rs) -> return $ B.doc $ B.table capt
(zip aligns widths)
(TableHead nullAttr hdrs)
@@ -45,4 +45,4 @@ readCSV _opts s =
aligns = replicate numcols AlignDefault
widths = replicate numcols ColWidthDefault
Right [] -> return $ B.doc mempty
- Left e -> throwError $ PandocParsecError s e
+ Left e -> throwError $ PandocParsecError (toSources [("",txt)]) e
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index c1773eaab..411d64278 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -1,8 +1,9 @@
{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ScopedTypeVariables #-}
{- |
Module : Text.Pandoc.Readers.CommonMark
- Copyright : Copyright (C) 2015-2020 John MacFarlane
+ Copyright : Copyright (C) 2015-2021 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -25,17 +26,66 @@ import Text.Pandoc.Definition
import Text.Pandoc.Builder as B
import Text.Pandoc.Options
import Text.Pandoc.Error
+import Text.Pandoc.Readers.Metadata (yamlMetaBlock)
import Control.Monad.Except
import Data.Functor.Identity (runIdentity)
+import Data.Typeable
+import Text.Pandoc.Parsing (runParserT, getInput,
+ runF, defaultParserState, option, many1, anyChar,
+ Sources(..), ToSources(..), ParserT, Future,
+ sourceName)
+import qualified Data.Text as T
-- | Parse a CommonMark formatted string into a 'Pandoc' structure.
-readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-readCommonMark opts s = do
- let res = runIdentity $
- commonmarkWith (foldr ($) defaultSyntaxSpec exts) "input" s
- case res of
- Left err -> throwError $ PandocParsecError s err
- Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls
+readCommonMark :: (PandocMonad m, ToSources a)
+ => ReaderOptions -> a -> m Pandoc
+readCommonMark opts s
+ | isEnabled Ext_yaml_metadata_block opts = do
+ let sources = toSources s
+ let toks = concatMap sourceToToks (unSources sources)
+ res <- runParserT (do meta <- yamlMetaBlock (metaValueParser opts)
+ rest <- getInput
+ return (meta, rest))
+ defaultParserState "YAML metadata" (toSources s)
+ case res of
+ Left _ -> readCommonMarkBody opts sources toks
+ Right (meta, rest) -> do
+ -- strip off metadata section and parse body
+ let body = concatMap sourceToToks (unSources rest)
+ Pandoc _ bs <- readCommonMarkBody opts sources body
+ return $ Pandoc (runF meta defaultParserState) bs
+ | otherwise = do
+ let sources = toSources s
+ let toks = concatMap sourceToToks (unSources sources)
+ readCommonMarkBody opts sources toks
+
+sourceToToks :: (SourcePos, Text) -> [Tok]
+sourceToToks (pos, s) = tokenize (sourceName pos) s
+
+metaValueParser :: Monad m
+ => ReaderOptions -> ParserT Sources st m (Future st MetaValue)
+metaValueParser opts = do
+ inp <- option "" $ T.pack <$> many1 anyChar
+ let toks = concatMap sourceToToks (unSources (toSources inp))
+ case runIdentity (parseCommonmarkWith (specFor opts) toks) of
+ Left _ -> mzero
+ Right (Cm bls :: Cm () Blocks) -> return $ return $ B.toMetaValue bls
+
+readCommonMarkBody :: PandocMonad m => ReaderOptions -> Sources -> [Tok] -> m Pandoc
+readCommonMarkBody opts s toks
+ | isEnabled Ext_sourcepos opts =
+ case runIdentity (parseCommonmarkWith (specFor opts) toks) of
+ Left err -> throwError $ PandocParsecError s err
+ Right (Cm bls :: Cm SourceRange Blocks) -> return $ B.doc bls
+ | otherwise =
+ case runIdentity (parseCommonmarkWith (specFor opts) toks) of
+ Left err -> throwError $ PandocParsecError s err
+ Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls
+
+specFor :: (Monad m, Typeable m, Typeable a,
+ Rangeable (Cm a Inlines), Rangeable (Cm a Blocks))
+ => ReaderOptions -> SyntaxSpec m (Cm a Inlines) (Cm a Blocks)
+specFor opts = foldr ($) defaultSyntaxSpec exts
where
exts = [ (hardLineBreaksSpec <>) | isEnabled Ext_hard_line_breaks opts ] ++
[ (smartPunctuationSpec <>) | isEnabled Ext_smart opts ] ++
@@ -62,5 +112,7 @@ readCommonMark opts s = do
| isEnabled Ext_implicit_header_references opts ] ++
[ (footnoteSpec <>) | isEnabled Ext_footnotes opts ] ++
[ (definitionListSpec <>) | isEnabled Ext_definition_lists opts ] ++
- [ (taskListSpec <>) | isEnabled Ext_task_lists opts ]
+ [ (taskListSpec <>) | isEnabled Ext_task_lists opts ] ++
+ [ (rebaseRelativePathsSpec <>)
+ | isEnabled Ext_rebase_relative_paths opts ]
diff --git a/src/Text/Pandoc/Readers/Creole.hs b/src/Text/Pandoc/Readers/Creole.hs
index 2658dfea2..ad848ada7 100644
--- a/src/Text/Pandoc/Readers/Creole.hs
+++ b/src/Text/Pandoc/Readers/Creole.hs
@@ -23,21 +23,20 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (enclosed)
-import Text.Pandoc.Shared (crFilter)
-
-- | Read creole from an input string and return a Pandoc document.
-readCreole :: PandocMonad m
+readCreole :: (PandocMonad m, ToSources a)
=> ReaderOptions
- -> Text
+ -> a
-> m Pandoc
readCreole opts s = do
- res <- readWithM parseCreole def{ stateOptions = opts } $ crFilter s <> "\n\n"
+ let sources = ensureFinalNewlines 2 (toSources s)
+ res <- readWithM parseCreole def{ stateOptions = opts } sources
case res of
Left e -> throwError e
Right d -> return d
-type CRLParser = ParserT Text ParserState
+type CRLParser = ParserT Sources ParserState
--
-- Utility functions
diff --git a/src/Text/Pandoc/Readers/CslJson.hs b/src/Text/Pandoc/Readers/CslJson.hs
index 377186b1e..a0af5c325 100644
--- a/src/Text/Pandoc/Readers/CslJson.hs
+++ b/src/Text/Pandoc/Readers/CslJson.hs
@@ -1,7 +1,7 @@
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.CslJson
- Copyright : Copyright (C) 2020 John MacFarlane
+ Copyright : Copyright (C) 2020-2021 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -24,21 +24,22 @@ import Text.Pandoc.Options
import Text.Pandoc.Definition
import Text.Pandoc.Builder (setMeta, cite, str)
import qualified Text.Pandoc.UTF8 as UTF8
-import Data.Text (Text)
import qualified Data.Text as T
import Text.Pandoc.Error (PandocError(..))
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences)
import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
import Control.Monad.Except (throwError)
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
-- | Read CSL JSON from an input string and return a Pandoc document.
-- The document will have only metadata, with an empty body.
-- The metadata will contain a `references` field with the
-- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
-readCslJson :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-readCslJson _opts t =
- case cslJsonToReferences (UTF8.fromText t) of
+readCslJson :: (PandocMonad m, ToSources a)
+ => ReaderOptions -> a -> m Pandoc
+readCslJson _opts x =
+ case cslJsonToReferences (UTF8.fromText $ sourcesToText $ toSources x) of
Left e -> throwError $ PandocParseError $ T.pack e
Right refs -> return $ setMeta "references"
(map referenceToMetaValue refs)
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index b0846e345..c49b82ccf 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -1,7 +1,7 @@
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.DocBook
- Copyright : Copyright (C) 2006-2020 John MacFarlane
+ Copyright : Copyright (C) 2006-2021 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -12,23 +12,28 @@ Conversion of DocBook XML to 'Pandoc' document.
-}
module Text.Pandoc.Readers.DocBook ( readDocBook ) where
import Control.Monad.State.Strict
-import Data.Char (isSpace, toUpper)
+import Data.Char (isSpace, isLetter)
import Data.Default
import Data.Either (rights)
import Data.Foldable (asum)
import Data.Generics
import Data.List (intersperse,elemIndex)
+import Data.List.NonEmpty (nonEmpty)
import Data.Maybe (fromMaybe,mapMaybe)
import Data.Text (Text)
import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
+import Control.Monad.Except (throwError)
import Text.HTML.TagSoup.Entity (lookupEntity)
+import Text.Pandoc.Error (PandocError(..))
import Text.Pandoc.Builder
import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
import Text.Pandoc.Options
import Text.Pandoc.Logging (LogMessage(..))
-import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
+import Text.Pandoc.Shared (safeRead, extractSpaces)
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
import Text.TeXMath (readMathML, writeTeX)
-import Text.XML.Light
+import Text.Pandoc.XML.Light
{-
@@ -92,7 +97,7 @@ List of all DocBook tags, with [x] indicating implemented,
[x] chapterinfo - Meta-information for a Chapter
[ ] citation - An inline bibliographic reference to another published work
[ ] citebiblioid - A citation of a bibliographic identifier
-[ ] citerefentry - A citation to a reference page
+[x] citerefentry - A citation to a reference page
[ ] citetitle - The title of a cited work
[ ] city - The name of a city in an address
[x] classname - The name of a class, in the object-oriented programming sense
@@ -129,6 +134,7 @@ List of all DocBook tags, with [x] indicating implemented,
[ ] corpcredit - A corporation or organization credited in a document
[ ] corpname - The name of a corporation
[ ] country - The name of a country
+[x] danger - An admonition set off from the text indicating hazardous situation
[ ] database - The name of a database, or part of a database
[x] date - The date of publication or revision of a document
[ ] dedication - A wrapper for the dedication section of a book
@@ -206,7 +212,7 @@ List of all DocBook tags, with [x] indicating implemented,
[x] info - A wrapper for information about a component or other block. (DocBook v5)
[x] informalequation - A displayed mathematical equation without a title
[x] informalexample - A displayed example without a title
-[ ] informalfigure - A untitled figure
+[x] informalfigure - An untitled figure
[ ] informaltable - A table without a title
[ ] initializer - The initializer for a FieldSynopsis
[x] inlineequation - A mathematical equation or expression occurring inline
@@ -535,24 +541,32 @@ instance Default DBState where
, dbContent = [] }
-readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readDocBook :: (PandocMonad m, ToSources a)
+ => ReaderOptions
+ -> a
+ -> m Pandoc
readDocBook _ inp = do
- let tree = normalizeTree . parseXML . handleInstructions $ crFilter inp
+ let sources = toSources inp
+ tree <- either (throwError . PandocXMLError "") return $
+ parseXMLContents
+ (TL.fromStrict . handleInstructions . sourcesToText $ sources)
(bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree
return $ Pandoc (dbMeta st') (toList . mconcat $ bs)
--- We treat <?asciidoc-br?> specially (issue #1236), converting it
--- to <br/>, since xml-light doesn't parse the instruction correctly.
--- Other xml instructions are simply removed from the input stream.
+-- We treat certain processing instructions by converting them to tags
+-- beginning "pi-".
handleInstructions :: Text -> Text
-handleInstructions = T.pack . handleInstructions' . T.unpack
-
-handleInstructions' :: String -> String
-handleInstructions' ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions' xs
-handleInstructions' xs = case break (=='<') xs of
- (ys, []) -> ys
- ([], '<':zs) -> '<' : handleInstructions' zs
- (ys, zs) -> ys ++ handleInstructions' zs
+handleInstructions t =
+ let (x,y) = T.breakOn "<?" t
+ in if T.null y
+ then x
+ else
+ let (w,z) = T.breakOn "?>" y
+ in (if T.takeWhile (\c -> isLetter c || c == '-')
+ (T.drop 2 w) `elem` ["asciidoc-br", "dbfo"]
+ then x <> "<pi-" <> T.drop 2 w <> "/>"
+ else x <> w <> T.take 2 z) <>
+ handleInstructions (T.drop 2 z)
getFigure :: PandocMonad m => Element -> DB m Blocks
getFigure e = do
@@ -565,32 +579,14 @@ getFigure e = do
modify $ \st -> st{ dbFigureTitle = mempty, dbFigureId = mempty }
return res
--- normalize input, consolidating adjacent Text and CRef elements
-normalizeTree :: [Content] -> [Content]
-normalizeTree = everywhere (mkT go)
- where go :: [Content] -> [Content]
- go (Text (CData CDataRaw _ _):xs) = xs
- go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
- Text (CData CDataText (s1 ++ s2) z):xs
- go (Text (CData CDataText s1 z):CRef r:xs) =
- Text (CData CDataText (s1 ++ convertEntity r) z):xs
- go (CRef r:Text (CData CDataText s1 z):xs) =
- Text (CData CDataText (convertEntity r ++ s1) z):xs
- go (CRef r1:CRef r2:xs) =
- Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
- go xs = xs
-
-convertEntity :: String -> String
-convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
-
-- convenience function to get an attribute value, defaulting to ""
-attrValue :: String -> Element -> Text
+attrValue :: Text -> Element -> Text
attrValue attr elt =
- maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
+ fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
-- convenience function
named :: Text -> Element -> Bool
-named s e = qName (elName e) == T.unpack s
+named s e = qName (elName e) == s
--
@@ -605,16 +601,24 @@ addMetadataFromElement e = do
Nothing -> return ()
Just z -> addMetaField "author" z
addMetaField "subtitle" e
- addMetaField "author" e
+ addAuthor e
addMetaField "date" e
addMetaField "release" e
addMetaField "releaseinfo" e
return mempty
- where addMetaField fieldname elt =
- case filterChildren (named fieldname) elt of
- [] -> return ()
- [z] -> getInlines z >>= addMeta fieldname
- zs -> mapM getInlines zs >>= addMeta fieldname
+ where
+ addAuthor elt =
+ case filterChildren (named "author") elt of
+ [] -> return ()
+ [z] -> fromAuthor z >>= addMeta "author"
+ zs -> mapM fromAuthor zs >>= addMeta "author"
+ fromAuthor elt =
+ mconcat . intersperse space <$> mapM getInlines (elChildren elt)
+ addMetaField fieldname elt =
+ case filterChildren (named fieldname) elt of
+ [] -> return ()
+ [z] -> getInlines z >>= addMeta fieldname
+ zs -> mapM getInlines zs >>= addMeta fieldname
addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> DB m ()
addMeta field val = modify (setMeta field val)
@@ -627,7 +631,7 @@ isBlockElement :: Content -> Bool
isBlockElement (Elem e) = qName (elName e) `elem` blockTags
isBlockElement _ = False
-blockTags :: [String]
+blockTags :: [Text]
blockTags =
[ "abstract"
, "ackno"
@@ -669,6 +673,7 @@ blockTags =
, "index"
, "info"
, "informalexample"
+ , "informalfigure"
, "informaltable"
, "itemizedlist"
, "linegroup"
@@ -713,8 +718,8 @@ blockTags =
, "variablelist"
] ++ admonitionTags
-admonitionTags :: [String]
-admonitionTags = ["important","caution","note","tip","warning"]
+admonitionTags :: [Text]
+admonitionTags = ["caution","danger","important","note","tip","warning"]
-- Trim leading and trailing newline characters
trimNl :: Text -> Text
@@ -736,9 +741,9 @@ getMediaobject e = do
figTitle <- gets dbFigureTitle
ident <- gets dbFigureId
(imageUrl, attr) <-
- case filterChild (named "imageobject") e of
- Nothing -> return (mempty, nullAttr)
- Just z -> case filterChild (named "imagedata") z of
+ case filterElements (named "imageobject") e of
+ [] -> return (mempty, nullAttr)
+ (z:_) -> case filterChild (named "imagedata") z of
Nothing -> return (mempty, nullAttr)
Just i -> let atVal a = attrValue a i
w = case atVal "width" of
@@ -771,10 +776,10 @@ getBlocks e = mconcat <$>
parseBlock :: PandocMonad m => Content -> DB m Blocks
parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
-parseBlock (Text (CData _ s _)) = if all isSpace s
+parseBlock (Text (CData _ s _)) = if T.all isSpace s
then return mempty
- else return $ plain $ trimInlines $ text $ T.pack s
-parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x
+ else return $ plain $ trimInlines $ text s
+parseBlock (CRef x) = return $ plain $ str $ T.toUpper x
parseBlock (Elem e) =
case qName (elName e) of
"toc" -> skip -- skip TOC, since in pandoc it's autogenerated
@@ -829,7 +834,7 @@ parseBlock (Elem e) =
"refsect2" -> sect 2
"refsect3" -> sect 3
"refsection" -> gets dbSectionLevel >>= sect . (+1)
- l | l `elem` admonitionTags -> parseAdmonition $ T.pack l
+ l | l `elem` admonitionTags -> parseAdmonition l
"area" -> skip
"areaset" -> skip
"areaspec" -> skip
@@ -855,6 +860,7 @@ parseBlock (Elem e) =
"variablelist" -> definitionList <$> deflistitems
"procedure" -> bulletList <$> steps
"figure" -> getFigure e
+ "informalfigure" -> getFigure e
"mediaobject" -> para <$> getMediaobject e
"caption" -> skip
"info" -> addMetadataFromElement e
@@ -890,7 +896,11 @@ parseBlock (Elem e) =
"subtitle" -> return mempty -- handled in parent element
_ -> skip >> getBlocks e
where skip = do
- lift $ report $ IgnoredElement $ T.pack $ qName (elName e)
+ let qn = qName $ elName e
+ let name = if "pi-" `T.isPrefixOf` qn
+ then "<?" <> qn <> "?>"
+ else qn
+ lift $ report $ IgnoredElement name
return mempty
codeBlockWithLang = do
@@ -898,7 +908,7 @@ parseBlock (Elem e) =
"" -> []
x -> [x]
return $ codeBlockWith (attrValue "id" e, classes', [])
- $ trimNl $ T.pack $ strContentRecursive e
+ $ trimNl $ strContentRecursive e
parseBlockquote = do
attrib <- case filterChild (named "attribution") e of
Nothing -> return mempty
@@ -952,17 +962,16 @@ parseBlock (Elem e) =
w <- findAttr (unqual "colwidth") c
n <- safeRead $ "0" <> T.filter (\x ->
(x >= '0' && x <= '9')
- || x == '.') (T.pack w)
+ || x == '.') w
if n > 0 then Just n else Nothing
- let numrows = case bodyrows of
- [] -> 0
- xs -> maximum $ map length xs
+ let numrows = maybe 0 maximum $ nonEmpty
+ $ map length bodyrows
let aligns = case colspecs of
[] -> replicate numrows AlignDefault
cs -> map toAlignment cs
let parseWidth s = safeRead (T.filter (\x -> (x >= '0' && x <= '9')
|| x == '.') s)
- let textWidth = case filterChild (named "?dbfo") e of
+ let textWidth = case filterChild (named "pi-dbfo") e of
Just d -> case attrValue "table-width" d of
"" -> 1.0
w -> fromMaybe 100.0 (parseWidth w) / 100.0
@@ -1035,12 +1044,12 @@ parseMixed container conts = do
x <- parseMixed container rs
return $ p <> b <> x
-parseRow :: PandocMonad m => [String] -> Element -> DB m [Cell]
+parseRow :: PandocMonad m => [Text] -> Element -> DB m [Cell]
parseRow cn = do
let isEntry x = named "entry" x || named "td" x || named "th" x
mapM (parseEntry cn) . filterChildren isEntry
-parseEntry :: PandocMonad m => [String] -> Element -> DB m Cell
+parseEntry :: PandocMonad m => [Text] -> Element -> DB m Cell
parseEntry cn el = do
let colDistance sa ea = do
let iStrt = elemIndex sa cn
@@ -1062,7 +1071,7 @@ getInlines :: PandocMonad m => Element -> DB m Inlines
getInlines e' = trimInlines . mconcat <$>
mapM parseInline (elContent e')
-strContentRecursive :: Element -> String
+strContentRecursive :: Element -> Text
strContentRecursive = strContent .
(\e' -> e'{ elContent = map elementToStr $ elContent e' })
@@ -1071,16 +1080,16 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
elementToStr x = x
parseInline :: PandocMonad m => Content -> DB m Inlines
-parseInline (Text (CData _ s _)) = return $ text $ T.pack s
+parseInline (Text (CData _ s _)) = return $ text s
parseInline (CRef ref) =
- return $ text $ maybe (T.toUpper $ T.pack ref) T.pack $ lookupEntity ref
+ return $ text $ maybe (T.toUpper ref) T.pack $ lookupEntity (T.unpack ref)
parseInline (Elem e) =
case qName (elName e) of
"anchor" -> do
return $ spanWith (attrValue "id" e, [], []) mempty
"phrase" -> do
let ident = attrValue "id" e
- let classes = T.words $ attrValue "class" e
+ let classes = T.words $ attrValue "role" e
if ident /= "" || classes /= []
then innerInlines (spanWith (ident,classes,[]))
else innerInlines id
@@ -1103,6 +1112,10 @@ parseInline (Elem e) =
"segmentedlist" -> segmentedList
"classname" -> codeWithLang
"code" -> codeWithLang
+ "citerefentry" -> do
+ let title = maybe mempty strContent $ filterChild (named "refentrytitle") e
+ let manvolnum = maybe mempty (\el -> "(" <> strContent el <> ")") $ filterChild (named "manvolnum") e
+ return $ codeWith ("",["citerefentry"],[]) (title <> manvolnum)
"filename" -> codeWithLang
"envar" -> codeWithLang
"literal" -> codeWithLang
@@ -1125,7 +1138,7 @@ parseInline (Elem e) =
"userinput" -> codeWithLang
"systemitem" -> codeWithLang
"varargs" -> return $ code "(...)"
- "keycap" -> return (str $ T.pack $ strContent e)
+ "keycap" -> return (str $ strContent e)
"keycombo" -> keycombo <$>
mapM parseInline (elContent e)
"menuchoice" -> menuchoice <$>
@@ -1137,17 +1150,17 @@ parseInline (Elem e) =
let title = case attrValue "endterm" e of
"" -> maybe "???" xrefTitleByElem
(findElementById linkend content)
- endterm -> maybe "???" (T.pack . strContent)
+ endterm -> maybe "???" strContent
(findElementById endterm content)
return $ link ("#" <> linkend) "" (text title)
- "email" -> return $ link ("mailto:" <> T.pack (strContent e)) ""
- $ str $ T.pack $ strContent e
- "uri" -> return $ link (T.pack $ strContent e) "" $ str $ T.pack $ strContent e
+ "email" -> return $ link ("mailto:" <> strContent e) ""
+ $ str $ strContent e
+ "uri" -> return $ link (strContent e) "" $ str $ strContent e
"ulink" -> innerInlines (link (attrValue "url" e) "")
"link" -> do
ils <- innerInlines id
let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
- Just h -> T.pack h
+ Just h -> h
_ -> "#" <> attrValue "linkend" e
let ils' = if ils == mempty then str href else ils
let attr = (attrValue "id" e, T.words $ attrValue "role" e, [])
@@ -1163,12 +1176,15 @@ parseInline (Elem e) =
"title" -> return mempty
"affiliation" -> skip
-- Note: this isn't a real docbook tag; it's what we convert
- -- <?asciidor-br?> to in handleInstructions, above. A kludge to
- -- work around xml-light's inability to parse an instruction.
- "br" -> return linebreak
+ -- <?asciidor-br?> to in handleInstructions, above.
+ "pi-asciidoc-br" -> return linebreak
_ -> skip >> innerInlines id
where skip = do
- lift $ report $ IgnoredElement $ T.pack $ qName (elName e)
+ let qn = qName $ elName e
+ let name = if "pi-" `T.isPrefixOf` qn
+ then "<?" <> qn <> "?>"
+ else qn
+ lift $ report $ IgnoredElement name
return mempty
innerInlines f = extractSpaces f . mconcat <$>
@@ -1177,7 +1193,7 @@ parseInline (Elem e) =
let classes' = case attrValue "language" e of
"" -> []
l -> [l]
- return $ codeWith (attrValue "id" e,classes',[]) $ T.pack $ strContentRecursive e
+ return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e
simpleList = mconcat . intersperse (str "," <> space) <$> mapM getInlines
(filterChildren (named "member") e)
segmentedList = do
@@ -1218,10 +1234,10 @@ parseInline (Elem e) =
"sect5" -> descendantContent "title" el
"cmdsynopsis" -> descendantContent "command" el
"funcsynopsis" -> descendantContent "function" el
- _ -> T.pack $ qName (elName el) ++ "_title"
+ _ -> qName (elName el) <> "_title"
where
xrefLabel = attrValue "xreflabel" el
- descendantContent name = maybe "???" (T.pack . strContent)
+ descendantContent name = maybe "???" strContent
. filterElementName (\n -> qName n == name)
-- | Extract a math equation from an element
@@ -1241,8 +1257,9 @@ equation e constructor =
where
mathMLEquations :: [Text]
mathMLEquations = map writeTeX $ rights $ readMath
- (\x -> qName (elName x) == "math" && qPrefix (elName x) == Just "mml")
- (readMathML . T.pack . showElement)
+ (\x -> qName (elName x) == "math" &&
+ qURI (elName x) == Just "http://www.w3.org/1998/Math/MathML")
+ (readMathML . showElement)
latexEquations :: [Text]
latexEquations = readMath (\x -> qName (elName x) == "mathphrase")
@@ -1256,8 +1273,8 @@ equation e constructor =
-- | Get the actual text stored in a CData block. 'showContent'
-- returns the text still surrounded by the [[CDATA]] tags.
showVerbatimCData :: Content -> Text
-showVerbatimCData (Text (CData _ d _)) = T.pack d
-showVerbatimCData c = T.pack $ showContent c
+showVerbatimCData (Text (CData _ d _)) = d
+showVerbatimCData c = showContent c
-- | Set the prefix of a name to 'Nothing'
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 00de6a0cd..c06adf7e3 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -61,13 +61,14 @@ module Text.Pandoc.Readers.Docx
import Codec.Archive.Zip
import Control.Monad.Reader
import Control.Monad.State.Strict
+import Data.Bifunctor (bimap, first)
import qualified Data.ByteString.Lazy as B
import Data.Default (Default)
-import Data.List (delete, intersect)
+import Data.List (delete, intersect, foldl')
import Data.Char (isSpace)
import qualified Data.Map as M
import qualified Data.Text as T
-import Data.Maybe (isJust, fromMaybe)
+import Data.Maybe (catMaybes, isJust, fromMaybe)
import Data.Sequence (ViewL (..), viewl)
import qualified Data.Sequence as Seq
import qualified Data.Set as Set
@@ -85,6 +86,7 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
import qualified Text.Pandoc.Class.PandocMonad as P
import Text.Pandoc.Error
import Text.Pandoc.Logging
+import Data.List.NonEmpty (nonEmpty)
readDocx :: PandocMonad m
=> ReaderOptions
@@ -112,6 +114,7 @@ data DState = DState { docxAnchorMap :: M.Map T.Text T.Text
-- restarting
, docxListState :: M.Map (T.Text, T.Text) Integer
, docxPrevPara :: Inlines
+ , docxTableCaptions :: [Blocks]
}
instance Default DState where
@@ -122,6 +125,7 @@ instance Default DState where
, docxDropCap = mempty
, docxListState = M.empty
, docxPrevPara = mempty
+ , docxTableCaptions = []
}
data DEnv = DEnv { docxOptions :: ReaderOptions
@@ -490,15 +494,32 @@ singleParaToPlain blks
singleton $ Plain ils
singleParaToPlain blks = blks
-cellToBlocks :: PandocMonad m => Docx.Cell -> DocxContext m Blocks
-cellToBlocks (Docx.Cell bps) = do
+cellToCell :: PandocMonad m => RowSpan -> Docx.Cell -> DocxContext m Pandoc.Cell
+cellToCell rowSpan (Docx.Cell gridSpan _ bps) = do
blks <- smushBlocks <$> mapM bodyPartToBlocks bps
- return $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks
+ let blks' = singleParaToPlain $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks
+ return (cell AlignDefault rowSpan (ColSpan (fromIntegral gridSpan)) blks')
+
+rowsToRows :: PandocMonad m => [Docx.Row] -> DocxContext m [Pandoc.Row]
+rowsToRows rows = do
+ let rowspans = (fmap . fmap) (first RowSpan) (Docx.rowsToRowspans rows)
+ cells <- traverse (traverse (uncurry cellToCell)) rowspans
+ return (fmap (Pandoc.Row nullAttr) cells)
+
+splitHeaderRows :: Bool -> [Docx.Row] -> ([Docx.Row], [Docx.Row])
+splitHeaderRows hasFirstRowFormatting rs = bimap reverse reverse $ fst
+ $ if hasFirstRowFormatting
+ then foldl' f ((take 1 rs, []), True) (drop 1 rs)
+ else foldl' f (([], []), False) rs
+ where
+ f ((headerRows, bodyRows), previousRowWasHeader) r@(Docx.Row h cs)
+ | h == HasTblHeader || (previousRowWasHeader && any isContinuationCell cs)
+ = ((r : headerRows, bodyRows), True)
+ | otherwise
+ = ((headerRows, r : bodyRows), False)
+
+ isContinuationCell (Docx.Cell _ vm _) = vm == Docx.Continue
-rowToBlocksList :: PandocMonad m => Docx.Row -> DocxContext m [Blocks]
-rowToBlocksList (Docx.Row cells) = do
- blksList <- mapM cellToBlocks cells
- return $ map singleParaToPlain blksList
-- like trimInlines, but also take out linebreaks
trimSps :: Inlines -> Inlines
@@ -545,6 +566,11 @@ normalizeToClassName = T.map go . fromStyleName
where go c | isSpace c = '-'
| otherwise = c
+bodyPartToTableCaption :: PandocMonad m => BodyPart -> DocxContext m (Maybe Blocks)
+bodyPartToTableCaption (TblCaption pPr parparts) =
+ Just <$> bodyPartToBlocks (Paragraph pPr parparts)
+bodyPartToTableCaption _ = pure Nothing
+
bodyPartToBlocks :: PandocMonad m => BodyPart -> DocxContext m Blocks
bodyPartToBlocks (Paragraph pPr parparts)
| Just True <- pBidi pPr = do
@@ -636,54 +662,43 @@ bodyPartToBlocks (ListItem pPr _ _ _ parparts) =
let pPr' = pPr {pStyle = constructBogusParStyleData "list-paragraph": pStyle pPr}
in
bodyPartToBlocks $ Paragraph pPr' parparts
+bodyPartToBlocks (TblCaption _ _) =
+ return $ para mempty -- collected separately
bodyPartToBlocks (Tbl _ _ _ []) =
return $ para mempty
-bodyPartToBlocks (Tbl cap _ look parts@(r:rs)) = do
- let cap' = simpleCaption $ plain $ text cap
- (hdr, rows) = case firstRowFormatting look of
- True | null rs -> (Nothing, [r])
- | otherwise -> (Just r, rs)
- False -> (Nothing, r:rs)
-
- cells <- mapM rowToBlocksList rows
+bodyPartToBlocks (Tbl cap grid look parts) = do
+ captions <- gets docxTableCaptions
+ fullCaption <- case captions of
+ c : cs -> do
+ modify (\s -> s { docxTableCaptions = cs })
+ return c
+ [] -> return $ if T.null cap then mempty else plain (text cap)
+ let shortCaption = if T.null cap then Nothing else Just (toList (text cap))
+ cap' = caption shortCaption fullCaption
+ (hdr, rows) = splitHeaderRows (firstRowFormatting look) parts
let width = maybe 0 maximum $ nonEmpty $ map rowLength parts
- -- Data.List.NonEmpty is not available with ghc 7.10 so we roll out
- -- our own, see
- -- https://github.com/jgm/pandoc/pull/4361#issuecomment-365416155
- nonEmpty [] = Nothing
- nonEmpty l = Just l
rowLength :: Docx.Row -> Int
- rowLength (Docx.Row c) = length c
+ rowLength (Docx.Row _ c) = sum (fmap (\(Docx.Cell gridSpan _ _) -> fromIntegral gridSpan) c)
- let toRow = Pandoc.Row nullAttr . map simpleCell
- toHeaderRow l = [toRow l | not (null l)]
+ headerCells <- rowsToRows hdr
+ bodyCells <- rowsToRows rows
- -- pad cells. New Text.Pandoc.Builder will do that for us,
- -- so this is for compatibility while we switch over.
- let cells' = map (\row -> toRow $ take width (row ++ repeat mempty)) cells
-
- hdrCells <- case hdr of
- Just r' -> toHeaderRow <$> rowToBlocksList r'
- Nothing -> return []
-
- -- The two following variables (horizontal column alignment and
- -- relative column widths) go to the default at the
- -- moment. Width information is in the TblGrid field of the Tbl,
- -- so should be possible. Alignment might be more difficult,
- -- since there doesn't seem to be a column entity in docx.
+ -- Horizontal column alignment goes to the default at the moment. Getting
+ -- it might be difficult, since there doesn't seem to be a column entity
+ -- in docx.
let alignments = replicate width AlignDefault
- widths = replicate width ColWidthDefault
+ totalWidth = sum grid
+ widths = (\w -> ColWidth (fromInteger w / fromInteger totalWidth)) <$> grid
return $ table cap'
(zip alignments widths)
- (TableHead nullAttr hdrCells)
- [TableBody nullAttr 0 [] cells']
+ (TableHead nullAttr headerCells)
+ [TableBody nullAttr 0 [] bodyCells]
(TableFoot nullAttr [])
bodyPartToBlocks (OMathPara e) =
return $ para $ displayMath (writeTeX e)
-
-- replace targets with generated anchors.
rewriteLink' :: PandocMonad m => Inline -> DocxContext m Inline
rewriteLink' l@(Link attr ils (T.uncons -> Just ('#',target), title)) = do
@@ -719,6 +734,8 @@ bodyToOutput :: PandocMonad m => Body -> DocxContext m (Meta, [Block])
bodyToOutput (Body bps) = do
let (metabps, blkbps) = sepBodyParts bps
meta <- bodyPartsToMeta metabps
+ captions <- catMaybes <$> mapM bodyPartToTableCaption blkbps
+ modify (\s -> s { docxTableCaptions = captions })
blks <- smushBlocks <$> mapM bodyPartToBlocks blkbps
blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks
blks'' <- removeOrphanAnchors blks'
diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs
index 46112af19..6e4faa639 100644
--- a/src/Text/Pandoc/Readers/Docx/Combine.hs
+++ b/src/Text/Pandoc/Readers/Docx/Combine.hs
@@ -2,7 +2,7 @@
{- |
Module : Text.Pandoc.Readers.Docx.Combine
Copyright : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>,
- 2014-2020 John MacFarlane <jgm@berkeley.edu>,
+ 2014-2021 John MacFarlane <jgm@berkeley.edu>,
2020 Nikolay Yakimov <root@livid.pp.ru>
License : GNU GPL, version 2 or above
@@ -61,7 +61,7 @@ import Data.List
import Data.Bifunctor
import Data.Sequence ( ViewL (..), ViewR (..), viewl, viewr, spanr, spanl
, (><), (|>) )
-import Text.Pandoc.Builder
+import Text.Pandoc.Builder as B
data Modifier a = Modifier (a -> a)
| AttrModifier (Attr -> a -> a) Attr
@@ -116,12 +116,12 @@ ilModifierAndInnards ils = case viewl $ unMany ils of
inlinesL :: Inlines -> (Inlines, Inlines)
inlinesL ils = case viewl $ unMany ils of
- (s :< sq) -> (singleton s, Many sq)
+ (s :< sq) -> (B.singleton s, Many sq)
_ -> (mempty, ils)
inlinesR :: Inlines -> (Inlines, Inlines)
inlinesR ils = case viewr $ unMany ils of
- (sq :> s) -> (Many sq, singleton s)
+ (sq :> s) -> (Many sq, B.singleton s)
_ -> (ils, mempty)
combineInlines :: Inlines -> Inlines -> Inlines
@@ -182,7 +182,7 @@ isAttrModifier _ = False
smushInlines :: [Inlines] -> Inlines
smushInlines xs = combineInlines xs' mempty
- where xs' = foldl combineInlines mempty xs
+ where xs' = foldl' combineInlines mempty xs
smushBlocks :: [Blocks] -> Blocks
-smushBlocks xs = foldl combineBlocks mempty xs
+smushBlocks xs = foldl' combineBlocks mempty xs
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index fdcffcc3f..dbb16a821 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -33,7 +33,9 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
, ParStyle
, CharStyle(cStyleData)
, Row(..)
+ , TblHeader(..)
, Cell(..)
+ , VMerge(..)
, TrackedChange(..)
, ChangeType(..)
, ChangeInfo(..)
@@ -50,6 +52,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
, pHeading
, constructBogusParStyleData
, leftBiasedMergeRunStyle
+ , rowsToRowspans
) where
import Text.Pandoc.Readers.Docx.Parse.Styles
import Codec.Archive.Zip
@@ -63,6 +66,7 @@ import Data.Char (chr, ord, readLitChar)
import Data.List
import qualified Data.Map as M
import qualified Data.Text as T
+import Data.Text (Text)
import Data.Maybe
import System.FilePath
import Text.Pandoc.Readers.Docx.Util
@@ -72,8 +76,7 @@ import qualified Text.Pandoc.UTF8 as UTF8
import Text.TeXMath (Exp)
import Text.TeXMath.Readers.OMML (readOMML)
import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, textToFont)
-import Text.XML.Light
-import qualified Text.XML.Light.Cursor as XMLC
+import Text.Pandoc.XML.Light
data ReaderEnv = ReaderEnv { envNotes :: Notes
, envComments :: Comments
@@ -127,37 +130,23 @@ mapD f xs =
in
concatMapM handler xs
-unwrap :: NameSpaces -> Content -> [Content]
-unwrap ns (Elem element)
+unwrapElement :: NameSpaces -> Element -> [Element]
+unwrapElement ns element
| isElem ns "w" "sdt" element
, Just sdtContent <- findChildByName ns "w" "sdtContent" element
- = concatMap (unwrap ns . Elem) (elChildren sdtContent)
+ = concatMap (unwrapElement ns) (elChildren sdtContent)
| isElem ns "w" "smartTag" element
- = concatMap (unwrap ns . Elem) (elChildren element)
-unwrap _ content = [content]
+ = concatMap (unwrapElement ns) (elChildren element)
+ | otherwise
+ = [element{ elContent = concatMap (unwrapContent ns) (elContent element) }]
-unwrapChild :: NameSpaces -> Content -> Content
-unwrapChild ns (Elem element) =
- Elem $ element { elContent = concatMap (unwrap ns) (elContent element) }
-unwrapChild _ content = content
+unwrapContent :: NameSpaces -> Content -> [Content]
+unwrapContent ns (Elem element) = map Elem $ unwrapElement ns element
+unwrapContent _ content = [content]
-walkDocument' :: NameSpaces -> XMLC.Cursor -> XMLC.Cursor
-walkDocument' ns cur =
- let modifiedCur = XMLC.modifyContent (unwrapChild ns) cur
- in
- case XMLC.nextDF modifiedCur of
- Just cur' -> walkDocument' ns cur'
- Nothing -> XMLC.root modifiedCur
-
-walkDocument :: NameSpaces -> Element -> Maybe Element
+walkDocument :: NameSpaces -> Element -> Element
walkDocument ns element =
- let cur = XMLC.fromContent (Elem element)
- cur' = walkDocument' ns cur
- in
- case XMLC.toTree cur' of
- Elem element' -> Just element'
- _ -> Nothing
-
+ element{ elContent = concatMap (unwrapContent ns) (elContent element) }
newtype Docx = Docx Document
deriving Show
@@ -239,6 +228,7 @@ defaultParagraphStyle = ParagraphStyle { pStyle = []
data BodyPart = Paragraph ParagraphStyle [ParPart]
| ListItem ParagraphStyle T.Text T.Text (Maybe Level) [ParPart]
| Tbl T.Text TblGrid TblLook [Row]
+ | TblCaption ParagraphStyle [ParPart]
| OMathPara [Exp]
deriving Show
@@ -250,12 +240,61 @@ newtype TblLook = TblLook {firstRowFormatting::Bool}
defaultTblLook :: TblLook
defaultTblLook = TblLook{firstRowFormatting = False}
-newtype Row = Row [Cell]
- deriving Show
+data Row = Row TblHeader [Cell] deriving Show
-newtype Cell = Cell [BodyPart]
+data TblHeader = HasTblHeader | NoTblHeader deriving (Show, Eq)
+
+data Cell = Cell GridSpan VMerge [BodyPart]
deriving Show
+type GridSpan = Integer
+
+data VMerge = Continue
+ -- ^ This cell should be merged with the one above it
+ | Restart
+ -- ^ This cell should not be merged with the one above it
+ deriving (Show, Eq)
+
+rowsToRowspans :: [Row] -> [[(Int, Cell)]]
+rowsToRowspans rows = let
+ removeMergedCells = fmap (filter (\(_, Cell _ vmerge _) -> vmerge == Restart))
+ in removeMergedCells (foldr f [] rows)
+ where
+ f :: Row -> [[(Int, Cell)]] -> [[(Int, Cell)]]
+ f (Row _ cells) acc = let
+ spans = g cells Nothing (listToMaybe acc)
+ in spans : acc
+
+ g ::
+ -- | The current row
+ [Cell] ->
+ -- | Number of columns left below
+ Maybe Integer ->
+ -- | (rowspan so far, cell) for the row below this one
+ Maybe [(Int, Cell)] ->
+ -- | (rowspan so far, cell) for this row
+ [(Int, Cell)]
+ g cells _ Nothing = zip (repeat 1) cells
+ g cells columnsLeftBelow (Just rowBelow) =
+ case cells of
+ [] -> []
+ thisCell@(Cell thisGridSpan _ _) : restOfRow -> case rowBelow of
+ [] -> zip (repeat 1) cells
+ (spanSoFarBelow, Cell gridSpanBelow vmerge _) : _ ->
+ let spanSoFar = case vmerge of
+ Restart -> 1
+ Continue -> 1 + spanSoFarBelow
+ columnsToDrop = thisGridSpan + (gridSpanBelow - fromMaybe gridSpanBelow columnsLeftBelow)
+ (newColumnsLeftBelow, restOfRowBelow) = dropColumns columnsToDrop rowBelow
+ in (spanSoFar, thisCell) : g restOfRow (Just newColumnsLeftBelow) (Just restOfRowBelow)
+
+ dropColumns :: Integer -> [(a, Cell)] -> (Integer, [(a, Cell)])
+ dropColumns n [] = (n, [])
+ dropColumns n cells@((_, Cell gridSpan _ _) : otherCells) =
+ if n < gridSpan
+ then (gridSpan - n, cells)
+ else dropColumns (n - gridSpan) otherCells
+
leftBiasedMergeRunStyle :: RunStyle -> RunStyle -> RunStyle
leftBiasedMergeRunStyle a b = RunStyle
{ isBold = isBold a <|> isBold b
@@ -343,10 +382,16 @@ archiveToDocxWithWarnings archive = do
Right doc -> Right (Docx doc, stateWarnings st)
Left e -> Left e
+parseXMLFromEntry :: Entry -> Maybe Element
+parseXMLFromEntry entry =
+ case parseXMLElement (UTF8.toTextLazy (fromEntry entry)) of
+ Left _ -> Nothing
+ Right el -> Just el
+
getDocumentXmlPath :: Archive -> Maybe FilePath
getDocumentXmlPath zf = do
entry <- findEntryByPath "_rels/.rels" zf
- relsElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+ relsElem <- parseXMLFromEntry entry
let rels = filterChildrenName (\n -> qName n == "Relationship") relsElem
rel <- find (\e -> findAttr (QName "Type" Nothing Nothing) e ==
Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument")
@@ -354,18 +399,18 @@ getDocumentXmlPath zf = do
fp <- findAttr (QName "Target" Nothing Nothing) rel
-- sometimes there will be a leading slash, which windows seems to
-- have trouble with.
- return $ case fp of
+ return $ case T.unpack fp of
'/' : fp' -> fp'
- _ -> fp
+ fp' -> fp'
archiveToDocument :: Archive -> D Document
archiveToDocument zf = do
docPath <- asks envDocXmlPath
entry <- maybeToD $ findEntryByPath docPath zf
- docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+ docElem <- maybeToD $ parseXMLFromEntry entry
let namespaces = elemToNameSpaces docElem
bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem
- let bodyElem' = fromMaybe bodyElem (walkDocument namespaces bodyElem)
+ let bodyElem' = walkDocument namespaces bodyElem
body <- elemToBody namespaces bodyElem'
return $ Document namespaces body
@@ -401,23 +446,24 @@ constructBogusParStyleData stName = ParStyle
archiveToNotes :: Archive -> Notes
archiveToNotes zf =
let fnElem = findEntryByPath "word/footnotes.xml" zf
- >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+ >>= parseXMLFromEntry
enElem = findEntryByPath "word/endnotes.xml" zf
- >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
- fn_namespaces = maybe [] elemToNameSpaces fnElem
- en_namespaces = maybe [] elemToNameSpaces enElem
- ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
- fn = fnElem >>= walkDocument ns >>= elemToNotes ns "footnote"
- en = enElem >>= walkDocument ns >>= elemToNotes ns "endnote"
+ >>= parseXMLFromEntry
+ fn_namespaces = maybe mempty elemToNameSpaces fnElem
+ en_namespaces = maybe mempty elemToNameSpaces enElem
+ ns = M.union fn_namespaces en_namespaces
+ fn = fnElem >>= elemToNotes ns "footnote" . walkDocument ns
+ en = enElem >>= elemToNotes ns "endnote" . walkDocument ns
in
Notes ns fn en
archiveToComments :: Archive -> Comments
archiveToComments zf =
let cmtsElem = findEntryByPath "word/comments.xml" zf
- >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
- cmts_namespaces = maybe [] elemToNameSpaces cmtsElem
- cmts = elemToComments cmts_namespaces <$> (cmtsElem >>= walkDocument cmts_namespaces)
+ >>= parseXMLFromEntry
+ cmts_namespaces = maybe mempty elemToNameSpaces cmtsElem
+ cmts = elemToComments cmts_namespaces . walkDocument cmts_namespaces <$>
+ cmtsElem
in
case cmts of
Just c -> Comments cmts_namespaces c
@@ -433,20 +479,26 @@ filePathToRelType path docXmlPath =
then Just InDocument
else Nothing
-relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship
-relElemToRelationship relType element | qName (elName element) == "Relationship" =
+relElemToRelationship :: FilePath -> DocumentLocation -> Element
+ -> Maybe Relationship
+relElemToRelationship fp relType element | qName (elName element) == "Relationship" =
do
- relId <- findAttrText (QName "Id" Nothing Nothing) element
- target <- findAttrText (QName "Target" Nothing Nothing) element
- return $ Relationship relType relId target
-relElemToRelationship _ _ = Nothing
+ relId <- findAttr (QName "Id" Nothing Nothing) element
+ target <- findAttr (QName "Target" Nothing Nothing) element
+ -- target may be relative (media/image1.jpeg) or absolute
+ -- (/word/media/image1.jpeg); we need to relativize it (see #7374)
+ let frontOfFp = T.pack $ takeWhile (/= '_') fp
+ let target' = fromMaybe target $
+ T.stripPrefix frontOfFp $ T.dropWhile (== '/') target
+ return $ Relationship relType relId target'
+relElemToRelationship _ _ _ = Nothing
filePathToRelationships :: Archive -> FilePath -> FilePath -> [Relationship]
filePathToRelationships ar docXmlPath fp
| Just relType <- filePathToRelType fp docXmlPath
, Just entry <- findEntryByPath fp ar
- , Just relElems <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry =
- mapMaybe (relElemToRelationship relType) $ elChildren relElems
+ , Just relElems <- parseXMLFromEntry entry =
+ mapMaybe (relElemToRelationship fp relType) $ elChildren relElems
filePathToRelationships _ _ _ = []
archiveToRelationships :: Archive -> FilePath -> [Relationship]
@@ -478,10 +530,10 @@ lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
loElemToLevelOverride :: NameSpaces -> Element -> Maybe LevelOverride
loElemToLevelOverride ns element
| isElem ns "w" "lvlOverride" element = do
- ilvl <- findAttrTextByName ns "w" "ilvl" element
+ ilvl <- findAttrByName ns "w" "ilvl" element
let startOverride = findChildByName ns "w" "startOverride" element
>>= findAttrByName ns "w" "val"
- >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
+ >>= stringToInteger
lvl = findChildByName ns "w" "lvl" element
>>= levelElemToLevel ns
return $ LevelOverride ilvl startOverride lvl
@@ -490,9 +542,9 @@ loElemToLevelOverride _ _ = Nothing
numElemToNum :: NameSpaces -> Element -> Maybe Numb
numElemToNum ns element
| isElem ns "w" "num" element = do
- numId <- findAttrTextByName ns "w" "numId" element
+ numId <- findAttrByName ns "w" "numId" element
absNumId <- findChildByName ns "w" "abstractNumId" element
- >>= findAttrTextByName ns "w" "val"
+ >>= findAttrByName ns "w" "val"
let lvlOverrides = mapMaybe
(loElemToLevelOverride ns)
(findChildrenByName ns "w" "lvlOverride" element)
@@ -502,7 +554,7 @@ numElemToNum _ _ = Nothing
absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb
absNumElemToAbsNum ns element
| isElem ns "w" "abstractNum" element = do
- absNumId <- findAttrTextByName ns "w" "abstractNumId" element
+ absNumId <- findAttrByName ns "w" "abstractNumId" element
let levelElems = findChildrenByName ns "w" "lvl" element
levels = mapMaybe (levelElemToLevel ns) levelElems
return $ AbstractNumb absNumId levels
@@ -511,23 +563,23 @@ absNumElemToAbsNum _ _ = Nothing
levelElemToLevel :: NameSpaces -> Element -> Maybe Level
levelElemToLevel ns element
| isElem ns "w" "lvl" element = do
- ilvl <- findAttrTextByName ns "w" "ilvl" element
+ ilvl <- findAttrByName ns "w" "ilvl" element
fmt <- findChildByName ns "w" "numFmt" element
- >>= findAttrTextByName ns "w" "val"
+ >>= findAttrByName ns "w" "val"
txt <- findChildByName ns "w" "lvlText" element
- >>= findAttrTextByName ns "w" "val"
+ >>= findAttrByName ns "w" "val"
let start = findChildByName ns "w" "start" element
>>= findAttrByName ns "w" "val"
- >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
+ >>= stringToInteger
return (Level ilvl fmt txt start)
levelElemToLevel _ _ = Nothing
archiveToNumbering' :: Archive -> Maybe Numbering
archiveToNumbering' zf =
case findEntryByPath "word/numbering.xml" zf of
- Nothing -> Just $ Numbering [] [] []
+ Nothing -> Just $ Numbering mempty [] []
Just entry -> do
- numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+ numberingElem <- parseXMLFromEntry entry
let namespaces = elemToNameSpaces numberingElem
numElems = findChildrenByName namespaces "w" "num" numberingElem
absNumElems = findChildrenByName namespaces "w" "abstractNum" numberingElem
@@ -537,13 +589,13 @@ archiveToNumbering' zf =
archiveToNumbering :: Archive -> Numbering
archiveToNumbering archive =
- fromMaybe (Numbering [] [] []) (archiveToNumbering' archive)
+ fromMaybe (Numbering mempty [] []) (archiveToNumbering' archive)
-elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map T.Text Element)
+elemToNotes :: NameSpaces -> Text -> Element -> Maybe (M.Map T.Text Element)
elemToNotes ns notetype element
| isElem ns "w" (notetype <> "s") element =
let pairs = mapMaybe
- (\e -> findAttrTextByName ns "w" "id" e >>=
+ (\e -> findAttrByName ns "w" "id" e >>=
(\a -> Just (a, e)))
(findChildrenByName ns "w" notetype element)
in
@@ -555,7 +607,7 @@ elemToComments :: NameSpaces -> Element -> M.Map T.Text Element
elemToComments ns element
| isElem ns "w" "comments" element =
let pairs = mapMaybe
- (\e -> findAttrTextByName ns "w" "id" e >>=
+ (\e -> findAttrByName ns "w" "id" e >>=
(\a -> Just (a, e)))
(findChildrenByName ns "w" "comment" element)
in
@@ -570,7 +622,7 @@ elemToTblGrid :: NameSpaces -> Element -> D TblGrid
elemToTblGrid ns element | isElem ns "w" "tblGrid" element =
let cols = findChildrenByName ns "w" "gridCol" element
in
- mapD (\e -> maybeToD (findAttrByName ns "w" "val" e >>= stringToInteger))
+ mapD (\e -> maybeToD (findAttrByName ns "w" "w" e >>= stringToInteger))
cols
elemToTblGrid _ _ = throwError WrongElem
@@ -594,14 +646,31 @@ elemToRow ns element | isElem ns "w" "tr" element =
do
let cellElems = findChildrenByName ns "w" "tc" element
cells <- mapD (elemToCell ns) cellElems
- return $ Row cells
+ let hasTblHeader = maybe NoTblHeader (const HasTblHeader)
+ (findChildByName ns "w" "trPr" element
+ >>= findChildByName ns "w" "tblHeader")
+ return $ Row hasTblHeader cells
elemToRow _ _ = throwError WrongElem
elemToCell :: NameSpaces -> Element -> D Cell
elemToCell ns element | isElem ns "w" "tc" element =
do
+ let properties = findChildByName ns "w" "tcPr" element
+ let gridSpan = properties
+ >>= findChildByName ns "w" "gridSpan"
+ >>= findAttrByName ns "w" "val"
+ >>= stringToInteger
+ let vMerge = case properties >>= findChildByName ns "w" "vMerge" of
+ Nothing -> Restart
+ Just e ->
+ fromMaybe Continue $ do
+ s <- findAttrByName ns "w" "val" e
+ case s of
+ "continue" -> Just Continue
+ "restart" -> Just Restart
+ _ -> Nothing
cellContents <- mapD (elemToBodyPart ns) (elChildren element)
- return $ Cell cellContents
+ return $ Cell (fromMaybe 1 gridSpan) vMerge cellContents
elemToCell _ _ = throwError WrongElem
elemToParIndentation :: NameSpaces -> Element -> Maybe ParIndentation
@@ -615,12 +684,12 @@ elemToParIndentation ns element | isElem ns "w" "ind" element =
stringToInteger
, hangingParIndent =
findAttrByName ns "w" "hanging" element >>=
- stringToInteger}
+ stringToInteger }
elemToParIndentation _ _ = Nothing
-testBitMask :: String -> Int -> Bool
+testBitMask :: Text -> Int -> Bool
testBitMask bitMaskS n =
- case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of
+ case (reads ("0x" ++ T.unpack bitMaskS) :: [(Int, String)]) of
[] -> False
((n', _) : _) -> (n' .|. n) /= 0
@@ -633,10 +702,9 @@ pNumInfo = getParStyleField numInfo . pStyle
elemToBodyPart :: NameSpaces -> Element -> D BodyPart
elemToBodyPart ns element
| isElem ns "w" "p" element
- , (c:_) <- findChildrenByName ns "m" "oMathPara" element =
- do
- expsLst <- eitherToD $ readOMML $ T.pack $ showElement c
- return $ OMathPara expsLst
+ , (c:_) <- findChildrenByName ns "m" "oMathPara" element = do
+ expsLst <- eitherToD $ readOMML $ showElement c
+ return $ OMathPara expsLst
elemToBodyPart ns element
| isElem ns "w" "p" element
, Just (numId, lvl) <- getNumInfo ns element = do
@@ -654,13 +722,31 @@ elemToBodyPart ns element
Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do
levelInfo <- lookupLevel numId lvl <$> asks envNumbering
return $ ListItem parstyle numId lvl levelInfo parparts
- _ -> return $ Paragraph parstyle parparts
+ _ -> let
+ hasCaptionStyle = elem "Caption" (pStyleId <$> pStyle parstyle)
+
+ hasSimpleTableField = fromMaybe False $ do
+ fldSimple <- findChildByName ns "w" "fldSimple" element
+ instr <- findAttrByName ns "w" "instr" fldSimple
+ pure ("Table" `elem` T.words instr)
+
+ hasComplexTableField = fromMaybe False $ do
+ instrText <- findElementByName ns "w" "instrText" element
+ pure ("Table" `elem` T.words (strContent instrText))
+
+ in if hasCaptionStyle && (hasSimpleTableField || hasComplexTableField)
+ then return $ TblCaption parstyle parparts
+ else return $ Paragraph parstyle parparts
+
elemToBodyPart ns element
| isElem ns "w" "tbl" element = do
- let caption' = findChildByName ns "w" "tblPr" element
+ let tblProperties = findChildByName ns "w" "tblPr" element
+ caption = fromMaybe "" $ tblProperties
>>= findChildByName ns "w" "tblCaption"
- >>= findAttrTextByName ns "w" "val"
- caption = fromMaybe "" caption'
+ >>= findAttrByName ns "w" "val"
+ description = fromMaybe "" $ tblProperties
+ >>= findChildByName ns "w" "tblDescription"
+ >>= findAttrByName ns "w" "val"
grid' = case findChildByName ns "w" "tblGrid" element of
Just g -> elemToTblGrid ns g
Nothing -> return []
@@ -673,7 +759,7 @@ elemToBodyPart ns element
grid <- grid'
tblLook <- tblLook'
rows <- mapD (elemToRow ns) (elChildren element)
- return $ Tbl caption grid tblLook rows
+ return $ Tbl (caption <> description) grid tblLook rows
elemToBodyPart _ _ = throwError WrongElem
lookupRelationship :: DocumentLocation -> RelId -> [Relationship] -> Maybe Target
@@ -698,8 +784,8 @@ getTitleAndAlt :: NameSpaces -> Element -> (T.Text, T.Text)
getTitleAndAlt ns element =
let mbDocPr = findChildByName ns "wp" "inline" element >>=
findChildByName ns "wp" "docPr"
- title = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "title")
- alt = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "descr")
+ title = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "title")
+ alt = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "descr")
in (title, alt)
elemToParPart :: NameSpaces -> Element -> D ParPart
@@ -711,22 +797,29 @@ elemToParPart ns element
= let (title, alt) = getTitleAndAlt ns drawingElem
a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
- >>= findAttrTextByName ns "r" "embed"
+ >>= findAttrByName ns "r" "embed"
in
case drawing of
Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem)
Nothing -> throwError WrongElem
--- The below is an attempt to deal with images in deprecated vml format.
+-- The two cases below are an attempt to deal with images in deprecated vml format.
+-- Todo: check out title and attr for deprecated format.
elemToParPart ns element
| isElem ns "w" "r" element
, Just _ <- findChildByName ns "w" "pict" element =
let drawing = findElement (elemName ns "v" "imagedata") element
- >>= findAttrTextByName ns "r" "id"
+ >>= findAttrByName ns "r" "id"
in
case drawing of
- -- Todo: check out title and attr for deprecated format.
Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing)
Nothing -> throwError WrongElem
+elemToParPart ns element
+ | isElem ns "w" "r" element
+ , Just objectElem <- findChildByName ns "w" "object" element
+ , Just shapeElem <- findChildByName ns "v" "shape" objectElem
+ , Just imagedataElem <- findChildByName ns "v" "imagedata" shapeElem
+ , Just drawingId <- findAttrByName ns "r" "id" imagedataElem
+ = expandDrawingId drawingId >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing)
-- Chart
elemToParPart ns element
| isElem ns "w" "r" element
@@ -790,7 +883,7 @@ elemToParPart ns element
fldCharState <- gets stateFldCharState
case fldCharState of
FldCharOpen -> do
- info <- eitherToD $ parseFieldInfo $ T.pack $ strContent instrText
+ info <- eitherToD $ parseFieldInfo $ strContent instrText
modify $ \st -> st{stateFldCharState = FldCharFieldInfo info}
return NullParPart
_ -> return NullParPart
@@ -811,48 +904,48 @@ elemToParPart ns element
return $ ChangedRuns change runs
elemToParPart ns element
| isElem ns "w" "bookmarkStart" element
- , Just bmId <- findAttrTextByName ns "w" "id" element
- , Just bmName <- findAttrTextByName ns "w" "name" element =
+ , Just bmId <- findAttrByName ns "w" "id" element
+ , Just bmName <- findAttrByName ns "w" "name" element =
return $ BookMark bmId bmName
elemToParPart ns element
| isElem ns "w" "hyperlink" element
- , Just relId <- findAttrTextByName ns "r" "id" element = do
+ , Just relId <- findAttrByName ns "r" "id" element = do
location <- asks envLocation
runs <- mapD (elemToRun ns) (elChildren element)
rels <- asks envRelationships
case lookupRelationship location relId rels of
Just target ->
- case findAttrTextByName ns "w" "anchor" element of
+ case findAttrByName ns "w" "anchor" element of
Just anchor -> return $ ExternalHyperLink (target <> "#" <> anchor) runs
Nothing -> return $ ExternalHyperLink target runs
Nothing -> return $ ExternalHyperLink "" runs
elemToParPart ns element
| isElem ns "w" "hyperlink" element
- , Just anchor <- findAttrTextByName ns "w" "anchor" element = do
+ , Just anchor <- findAttrByName ns "w" "anchor" element = do
runs <- mapD (elemToRun ns) (elChildren element)
return $ InternalHyperLink anchor runs
elemToParPart ns element
| isElem ns "w" "commentRangeStart" element
- , Just cmtId <- findAttrTextByName ns "w" "id" element = do
+ , Just cmtId <- findAttrByName ns "w" "id" element = do
(Comments _ commentMap) <- asks envComments
case M.lookup cmtId commentMap of
Just cmtElem -> elemToCommentStart ns cmtElem
Nothing -> throwError WrongElem
elemToParPart ns element
| isElem ns "w" "commentRangeEnd" element
- , Just cmtId <- findAttrTextByName ns "w" "id" element =
+ , Just cmtId <- findAttrByName ns "w" "id" element =
return $ CommentEnd cmtId
elemToParPart ns element
| isElem ns "m" "oMath" element =
- fmap PlainOMath (eitherToD $ readOMML $ T.pack $ showElement element)
+ fmap PlainOMath (eitherToD $ readOMML $ showElement element)
elemToParPart _ _ = throwError WrongElem
elemToCommentStart :: NameSpaces -> Element -> D ParPart
elemToCommentStart ns element
| isElem ns "w" "comment" element
- , Just cmtId <- findAttrTextByName ns "w" "id" element
- , Just cmtAuthor <- findAttrTextByName ns "w" "author" element
- , cmtDate <- findAttrTextByName ns "w" "date" element = do
+ , Just cmtId <- findAttrByName ns "w" "id" element
+ , Just cmtAuthor <- findAttrByName ns "w" "author" element
+ , cmtDate <- findAttrByName ns "w" "date" element = do
bps <- mapD (elemToBodyPart ns) (elChildren element)
return $ CommentStart cmtId cmtAuthor cmtDate bps
elemToCommentStart _ _ = throwError WrongElem
@@ -871,7 +964,7 @@ elemToExtent drawingElem =
where
wp_ns = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem
- >>= findAttr (QName at Nothing Nothing) >>= safeRead . T.pack
+ >>= findAttr (QName at Nothing Nothing) >>= safeRead
childElemToRun :: NameSpaces -> Element -> D Run
@@ -882,7 +975,7 @@ childElemToRun ns element
= let (title, alt) = getTitleAndAlt ns element
a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
- >>= findAttrText (QName "embed" (lookup "r" ns) (Just "r"))
+ >>= findAttr (QName "embed" (M.lookup "r" ns) (Just "r"))
in
case drawing of
Just s -> expandDrawingId s >>=
@@ -895,7 +988,7 @@ childElemToRun ns element
= return InlineChart
childElemToRun ns element
| isElem ns "w" "footnoteReference" element
- , Just fnId <- findAttrTextByName ns "w" "id" element = do
+ , Just fnId <- findAttrByName ns "w" "id" element = do
notes <- asks envNotes
case lookupFootnote fnId notes of
Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e)
@@ -903,7 +996,7 @@ childElemToRun ns element
Nothing -> return $ Footnote []
childElemToRun ns element
| isElem ns "w" "endnoteReference" element
- , Just enId <- findAttrTextByName ns "w" "id" element = do
+ , Just enId <- findAttrByName ns "w" "id" element = do
notes <- asks envNotes
case lookupEndnote enId notes of
Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e)
@@ -956,15 +1049,15 @@ getParStyleField _ _ = Nothing
getTrackedChange :: NameSpaces -> Element -> Maybe TrackedChange
getTrackedChange ns element
| isElem ns "w" "ins" element || isElem ns "w" "moveTo" element
- , Just cId <- findAttrTextByName ns "w" "id" element
- , Just cAuthor <- findAttrTextByName ns "w" "author" element
- , mcDate <- findAttrTextByName ns "w" "date" element =
+ , Just cId <- findAttrByName ns "w" "id" element
+ , Just cAuthor <- findAttrByName ns "w" "author" element
+ , mcDate <- findAttrByName ns "w" "date" element =
Just $ TrackedChange Insertion (ChangeInfo cId cAuthor mcDate)
getTrackedChange ns element
| isElem ns "w" "del" element || isElem ns "w" "moveFrom" element
- , Just cId <- findAttrTextByName ns "w" "id" element
- , Just cAuthor <- findAttrTextByName ns "w" "author" element
- , mcDate <- findAttrTextByName ns "w" "date" element =
+ , Just cId <- findAttrByName ns "w" "id" element
+ , Just cAuthor <- findAttrByName ns "w" "author" element
+ , mcDate <- findAttrByName ns "w" "date" element =
Just $ TrackedChange Deletion (ChangeInfo cId cAuthor mcDate)
getTrackedChange _ _ = Nothing
@@ -973,7 +1066,7 @@ elemToParagraphStyle ns element sty
| Just pPr <- findChildByName ns "w" "pPr" element =
let style =
mapMaybe
- (fmap ParaStyleId . findAttrTextByName ns "w" "val")
+ (fmap ParaStyleId . findAttrByName ns "w" "val")
(findChildrenByName ns "w" "pStyle" pPr)
in ParagraphStyle
{pStyle = mapMaybe (`M.lookup` sty) style
@@ -1005,7 +1098,7 @@ elemToRunStyleD ns element
charStyles <- asks envCharStyles
let parentSty =
findChildByName ns "w" "rStyle" rPr >>=
- findAttrTextByName ns "w" "val" >>=
+ findAttrByName ns "w" "val" >>=
flip M.lookup charStyles . CharStyleId
return $ elemToRunStyle ns element parentSty
elemToRunStyleD _ _ = return defaultRunStyle
@@ -1015,7 +1108,7 @@ elemToRunElem ns element
| isElem ns "w" "t" element
|| isElem ns "w" "delText" element
|| isElem ns "m" "t" element = do
- let str = T.pack $ strContent element
+ let str = strContent element
font <- asks envFont
case font of
Nothing -> return $ TextRun str
@@ -1037,14 +1130,14 @@ getSymChar :: NameSpaces -> Element -> RunElem
getSymChar ns element
| Just s <- lowerFromPrivate <$> getCodepoint
, Just font <- getFont =
- case readLitChar ("\\x" ++ s) of
+ case readLitChar ("\\x" ++ T.unpack s) of
[(char, _)] -> TextRun . maybe "" T.singleton $ getUnicode font char
_ -> TextRun ""
where
getCodepoint = findAttrByName ns "w" "char" element
- getFont = textToFont . T.pack =<< findAttrByName ns "w" "font" element
- lowerFromPrivate ('F':xs) = '0':xs
- lowerFromPrivate xs = xs
+ getFont = textToFont =<< findAttrByName ns "w" "font" element
+ lowerFromPrivate t | "F" `T.isPrefixOf` t = "0" <> T.drop 1 t
+ | otherwise = t
getSymChar _ _ = TextRun ""
elemToRunElems :: NameSpaces -> Element -> D [RunElem]
@@ -1054,8 +1147,9 @@ elemToRunElems ns element
let qualName = elemName ns "w"
let font = do
fontElem <- findElement (qualName "rFonts") element
- textToFont . T.pack =<<
- foldr ((<|>) . (flip findAttr fontElem . qualName)) Nothing ["ascii", "hAnsi"]
+ textToFont =<<
+ foldr ((<|>) . (flip findAttr fontElem . qualName))
+ Nothing ["ascii", "hAnsi"]
local (setFont font) (mapD (elemToRunElem ns) (elChildren element))
elemToRunElems _ _ = throwError WrongElem
diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
index 236167187..0d7271d6a 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
@@ -48,11 +48,13 @@ import Data.Function (on)
import Data.String (IsString(..))
import qualified Data.Map as M
import qualified Data.Text as T
+import qualified Data.Text.Read
+import Data.Text (Text)
import Data.Maybe
import Data.Coerce
import Text.Pandoc.Readers.Docx.Util
import qualified Text.Pandoc.UTF8 as UTF8
-import Text.XML.Light
+import Text.Pandoc.XML.Light
newtype CharStyleId = CharStyleId T.Text
deriving (Show, Eq, Ord, IsString, FromStyleId)
@@ -108,7 +110,7 @@ data RunStyle = RunStyle { isBold :: Maybe Bool
, isRTL :: Maybe Bool
, isForceCTL :: Maybe Bool
, rVertAlign :: Maybe VertAlign
- , rUnderline :: Maybe String
+ , rUnderline :: Maybe Text
, rParentStyle :: Maybe CharStyle
}
deriving Show
@@ -135,19 +137,22 @@ defaultRunStyle = RunStyle { isBold = Nothing
, rParentStyle = Nothing
}
-archiveToStyles' :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2) =>
- (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2)
+archiveToStyles'
+ :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2)
+ => (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2)
archiveToStyles' conv1 conv2 zf =
- let stylesElem = findEntryByPath "word/styles.xml" zf >>=
- (parseXMLDoc . UTF8.toStringLazy . fromEntry)
- in
- case stylesElem of
- Nothing -> (M.empty, M.empty)
- Just styElem ->
- let namespaces = elemToNameSpaces styElem
- in
- ( M.fromList $ map (\r -> (conv1 r, r)) $ buildBasedOnList namespaces styElem Nothing,
- M.fromList $ map (\p -> (conv2 p, p)) $ buildBasedOnList namespaces styElem Nothing)
+ case findEntryByPath "word/styles.xml" zf of
+ Nothing -> (M.empty, M.empty)
+ Just entry ->
+ case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of
+ Left _ -> (M.empty, M.empty)
+ Right styElem ->
+ let namespaces = elemToNameSpaces styElem
+ in
+ ( M.fromList $ map (\r -> (conv1 r, r)) $
+ buildBasedOnList namespaces styElem Nothing,
+ M.fromList $ map (\p -> (conv2 p, p)) $
+ buildBasedOnList namespaces styElem Nothing)
isBasedOnStyle :: (ElemToStyle a, FromStyleId (StyleId a)) => NameSpaces -> Element -> Maybe a -> Bool
isBasedOnStyle ns element parentStyle
@@ -155,7 +160,7 @@ isBasedOnStyle ns element parentStyle
, Just styleType <- findAttrByName ns "w" "type" element
, styleType == cStyleType parentStyle
, Just basedOnVal <- findChildByName ns "w" "basedOn" element >>=
- findAttrTextByName ns "w" "val"
+ findAttrByName ns "w" "val"
, Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps)
| isElem ns "w" "style" element
, Just styleType <- findAttrByName ns "w" "type" element
@@ -165,7 +170,7 @@ isBasedOnStyle ns element parentStyle
| otherwise = False
class HasStyleId a => ElemToStyle a where
- cStyleType :: Maybe a -> String
+ cStyleType :: Maybe a -> Text
elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a
class FromStyleId (StyleId a) => HasStyleId a where
@@ -222,8 +227,10 @@ buildBasedOnList ns element rootStyle =
stys -> stys ++
concatMap (buildBasedOnList ns element . Just) stys
-stringToInteger :: String -> Maybe Integer
-stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)])
+stringToInteger :: Text -> Maybe Integer
+stringToInteger s = case Data.Text.Read.decimal s of
+ Right (x,_) -> Just x
+ Left _ -> Nothing
checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool
checkOnOff ns rPr tag
@@ -243,7 +250,7 @@ checkOnOff _ _ _ = Nothing
elemToCharStyle :: NameSpaces
-> Element -> Maybe CharStyle -> Maybe CharStyle
elemToCharStyle ns element parentStyle
- = CharStyle <$> (CharStyleId <$> findAttrTextByName ns "w" "styleId" element)
+ = CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element)
<*> getElementStyleName ns element
<*> Just (elemToRunStyle ns element parentStyle)
@@ -277,7 +284,7 @@ elemToRunStyle _ _ _ = defaultRunStyle
getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int)
getHeaderLevel ns element
| Just styleName <- getElementStyleName ns element
- , Just n <- stringToInteger . T.unpack =<<
+ , Just n <- stringToInteger =<<
(T.stripPrefix "heading " . T.toLower $
fromStyleName styleName)
, n > 0 = Just (styleName, fromInteger n)
@@ -285,8 +292,8 @@ getHeaderLevel _ _ = Nothing
getElementStyleName :: Coercible T.Text a => NameSpaces -> Element -> Maybe a
getElementStyleName ns el = coerce <$>
- ((findChildByName ns "w" "name" el >>= findAttrTextByName ns "w" "val")
- <|> findAttrTextByName ns "w" "styleId" el)
+ ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val")
+ <|> findAttrByName ns "w" "styleId" el)
getNumInfo :: NameSpaces -> Element -> Maybe (T.Text, T.Text)
getNumInfo ns element = do
@@ -294,15 +301,15 @@ getNumInfo ns element = do
findChildByName ns "w" "numPr"
lvl = fromMaybe "0" (numPr >>=
findChildByName ns "w" "ilvl" >>=
- findAttrTextByName ns "w" "val")
+ findAttrByName ns "w" "val")
numId <- numPr >>=
findChildByName ns "w" "numId" >>=
- findAttrTextByName ns "w" "val"
+ findAttrByName ns "w" "val"
return (numId, lvl)
elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle
elemToParStyleData ns element parentStyle
- | Just styleId <- findAttrTextByName ns "w" "styleId" element
+ | Just styleId <- findAttrByName ns "w" "styleId" element
, Just styleName <- getElementStyleName ns element
= Just $ ParStyle
{
diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs
index a573344ff..970697a2d 100644
--- a/src/Text/Pandoc/Readers/Docx/Util.hs
+++ b/src/Text/Pandoc/Readers/Docx/Util.hs
@@ -1,7 +1,8 @@
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Docx.StyleMaps
Copyright : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>,
- 2014-2020 John MacFarlane <jgm@berkeley.edu>,
+ 2014-2021 John MacFarlane <jgm@berkeley.edu>,
2015 Nikolay Yakimov <root@livid.pp.ru>
License : GNU GPL, version 2 or above
@@ -18,51 +19,52 @@ module Text.Pandoc.Readers.Docx.Util (
, elemToNameSpaces
, findChildByName
, findChildrenByName
- , findAttrText
+ , findElementByName
, findAttrByName
- , findAttrTextByName
) where
-import Data.Maybe (mapMaybe)
import qualified Data.Text as T
-import Text.XML.Light
+import Data.Text (Text)
+import Text.Pandoc.XML.Light
+import qualified Data.Map as M
-type NameSpaces = [(String, String)]
+type NameSpaces = M.Map Text Text
elemToNameSpaces :: Element -> NameSpaces
-elemToNameSpaces = mapMaybe attrToNSPair . elAttribs
+elemToNameSpaces = foldr (\(Attr qn val) ->
+ case qn of
+ QName s _ (Just "xmlns") -> M.insert s val
+ _ -> id) mempty . elAttribs
-attrToNSPair :: Attr -> Maybe (String, String)
-attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
-attrToNSPair _ = Nothing
-
-elemName :: NameSpaces -> String -> String -> QName
+elemName :: NameSpaces -> Text -> Text -> QName
elemName ns prefix name =
- QName name (lookup prefix ns) (if null prefix then Nothing else Just prefix)
+ QName name (M.lookup prefix ns)
+ (if T.null prefix then Nothing else Just prefix)
-isElem :: NameSpaces -> String -> String -> Element -> Bool
+isElem :: NameSpaces -> Text -> Text -> Element -> Bool
isElem ns prefix name element =
- let ns' = ns ++ elemToNameSpaces element
+ let ns' = ns <> elemToNameSpaces element
in qName (elName element) == name &&
- qURI (elName element) == lookup prefix ns'
+ qURI (elName element) == M.lookup prefix ns'
-findChildByName :: NameSpaces -> String -> String -> Element -> Maybe Element
+findChildByName :: NameSpaces -> Text -> Text -> Element -> Maybe Element
findChildByName ns pref name el =
- let ns' = ns ++ elemToNameSpaces el
+ let ns' = ns <> elemToNameSpaces el
in findChild (elemName ns' pref name) el
-findChildrenByName :: NameSpaces -> String -> String -> Element -> [Element]
+findChildrenByName :: NameSpaces -> Text -> Text -> Element -> [Element]
findChildrenByName ns pref name el =
- let ns' = ns ++ elemToNameSpaces el
+ let ns' = ns <> elemToNameSpaces el
in findChildren (elemName ns' pref name) el
-findAttrText :: QName -> Element -> Maybe T.Text
-findAttrText x = fmap T.pack . findAttr x
+-- | Like 'findChildrenByName', but searches descendants.
+findElementByName :: NameSpaces -> Text -> Text -> Element -> Maybe Element
+findElementByName ns pref name el =
+ let ns' = ns <> elemToNameSpaces el
+ in findElement (elemName ns' pref name) el
-findAttrByName :: NameSpaces -> String -> String -> Element -> Maybe String
+findAttrByName :: NameSpaces -> Text -> Text -> Element -> Maybe Text
findAttrByName ns pref name el =
- let ns' = ns ++ elemToNameSpaces el
+ let ns' = ns <> elemToNameSpaces el
in findAttr (elemName ns' pref name) el
-findAttrTextByName :: NameSpaces -> String -> String -> Element -> Maybe T.Text
-findAttrTextByName a b c = fmap T.pack . findAttrByName a b c
diff --git a/src/Text/Pandoc/Readers/DokuWiki.hs b/src/Text/Pandoc/Readers/DokuWiki.hs
index 336be09e5..db98ac8de 100644
--- a/src/Text/Pandoc/Readers/DokuWiki.hs
+++ b/src/Text/Pandoc/Readers/DokuWiki.hs
@@ -29,26 +29,27 @@ import Text.Pandoc.Definition
import Text.Pandoc.Error (PandocError (PandocParsecError))
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (enclosed, nested)
-import Text.Pandoc.Shared (crFilter, trim, stringify, tshow)
+import Text.Pandoc.Shared (trim, stringify, tshow)
-- | Read DokuWiki from an input string and return a Pandoc document.
-readDokuWiki :: PandocMonad m
+readDokuWiki :: (PandocMonad m, ToSources a)
=> ReaderOptions
- -> Text
+ -> a
-> m Pandoc
readDokuWiki opts s = do
- let input = crFilter s
- res <- runParserT parseDokuWiki def {stateOptions = opts } "source" input
+ let sources = toSources s
+ res <- runParserT parseDokuWiki def {stateOptions = opts }
+ (initialSourceName sources) sources
case res of
- Left e -> throwError $ PandocParsecError input e
+ Left e -> throwError $ PandocParsecError sources e
Right d -> return d
-type DWParser = ParserT Text ParserState
+type DWParser = ParserT Sources ParserState
-- * Utility functions
-- | Parse end-of-line, which can be either a newline or end-of-file.
-eol :: Stream s m Char => ParserT s st m ()
+eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
eol = void newline <|> eof
nested :: PandocMonad m => DWParser m a -> DWParser m a
@@ -317,7 +318,7 @@ interwikiToUrl "wpes" page = "https://es.wikipedia.org/wiki/" <> page
interwikiToUrl "wpfr" page = "https://fr.wikipedia.org/wiki/" <> page
interwikiToUrl "wpjp" page = "https://jp.wikipedia.org/wiki/" <> page
interwikiToUrl "wppl" page = "https://pl.wikipedia.org/wiki/" <> page
-interwikiToUrl _ page = "https://www.google.com/search?q=" <> page <> "&btnI=lucky"
+interwikiToUrl unknown page = unknown <> ">" <> page
linkText :: PandocMonad m => DWParser m B.Inlines
linkText = parseLink fromRaw "[[" "]]"
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index 5e3326e6d..eb8d2405d 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -17,14 +17,14 @@ module Text.Pandoc.Readers.EPUB
(readEPUB)
where
-import Codec.Archive.Zip (Archive (..), Entry, findEntryByPath, fromEntry,
+import Codec.Archive.Zip (Archive (..), Entry(..), findEntryByPath, fromEntry,
toArchiveOrFail)
import Control.DeepSeq (NFData, deepseq)
import Control.Monad (guard, liftM, liftM2, mplus)
import Control.Monad.Except (throwError)
import qualified Data.ByteString.Lazy as BL (ByteString)
-import Data.List (isInfixOf)
import qualified Data.Text as T
+import Data.Text (Text)
import qualified Data.Map as M (Map, elems, fromList, lookup)
import Data.Maybe (mapMaybe)
import qualified Data.Text.Lazy as TL
@@ -40,12 +40,12 @@ import Text.Pandoc.Extensions (Extension (Ext_raw_html), enableExtension)
import Text.Pandoc.MIME (MimeType)
import Text.Pandoc.Options (ReaderOptions (..))
import Text.Pandoc.Readers.HTML (readHtml)
-import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI)
-import qualified Text.Pandoc.UTF8 as UTF8 (toStringLazy)
+import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI, tshow)
+import qualified Text.Pandoc.UTF8 as UTF8 (toTextLazy)
import Text.Pandoc.Walk (query, walk)
-import Text.XML.Light
+import Text.Pandoc.XML.Light
-type Items = M.Map String (FilePath, MimeType)
+type Items = M.Map Text (FilePath, MimeType)
readEPUB :: PandocMonad m => ReaderOptions -> BL.ByteString -> m Pandoc
readEPUB opts bytes = case toArchiveOrFail bytes of
@@ -125,26 +125,27 @@ imageToPandoc s = B.doc . B.para $ B.image (T.pack s) "" mempty
imageMimes :: [MimeType]
imageMimes = ["image/gif", "image/jpeg", "image/png"]
-type CoverId = String
+type CoverId = Text
type CoverImage = FilePath
-parseManifest :: (PandocMonad m) => Element -> Maybe CoverId -> m (Maybe CoverImage, Items)
+parseManifest :: (PandocMonad m)
+ => Element -> Maybe CoverId -> m (Maybe CoverImage, Items)
parseManifest content coverId = do
manifest <- findElementE (dfName "manifest") content
let items = findChildren (dfName "item") manifest
r <- mapM parseItem items
let cover = findAttr (emptyName "href") =<< filterChild findCover manifest
- return (cover `mplus` coverId, M.fromList r)
+ return (T.unpack <$> (cover `mplus` coverId), M.fromList r)
where
- findCover e = maybe False (isInfixOf "cover-image")
+ findCover e = maybe False (T.isInfixOf "cover-image")
(findAttr (emptyName "properties") e)
|| Just True == liftM2 (==) coverId (findAttr (emptyName "id") e)
parseItem e = do
uid <- findAttrE (emptyName "id") e
href <- findAttrE (emptyName "href") e
mime <- findAttrE (emptyName "media-type") e
- return (uid, (href, T.pack mime))
+ return (uid, (T.unpack href, mime))
parseSpine :: PandocMonad m => Items -> Element -> m [(FilePath, MimeType)]
parseSpine is e = do
@@ -172,25 +173,25 @@ parseMeta content = do
-- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-metadata-elem
parseMetaItem :: Element -> Meta -> Meta
parseMetaItem e@(stripNamespace . elName -> field) meta =
- addMetaField (renameMeta field) (B.str $ T.pack $ strContent e) meta
+ addMetaField (renameMeta field) (B.str $ strContent e) meta
-renameMeta :: String -> T.Text
+renameMeta :: Text -> Text
renameMeta "creator" = "author"
-renameMeta s = T.pack s
+renameMeta s = s
getManifest :: PandocMonad m => Archive -> m (String, Element)
getManifest archive = do
metaEntry <- findEntryByPathE ("META-INF" </> "container.xml") archive
- docElem <- (parseXMLDocE . UTF8.toStringLazy . fromEntry) metaEntry
+ docElem <- parseXMLDocE metaEntry
let namespaces = mapMaybe attrToNSPair (elAttribs docElem)
ns <- mkE "xmlns not in namespaces" (lookup "xmlns" namespaces)
as <- fmap (map attrToPair . elAttribs)
(findElementE (QName "rootfile" (Just ns) Nothing) docElem)
- manifestFile <- mkE "Root not found" (lookup "full-path" as)
+ manifestFile <- T.unpack <$> mkE "Root not found" (lookup "full-path" as)
let rootdir = dropFileName manifestFile
--mime <- lookup "media-type" as
manifest <- findEntryByPathE manifestFile archive
- (rootdir,) <$> (parseXMLDocE . UTF8.toStringLazy . fromEntry $ manifest)
+ (rootdir,) <$> parseXMLDocE manifest
-- Fixup
@@ -200,7 +201,8 @@ fixInternalReferences pathToFile =
. walk (fixBlockIRs filename)
. walk (fixInlineIRs filename)
where
- (root, T.unpack . escapeURI . T.pack -> filename) = splitFileName pathToFile
+ (root, T.unpack . escapeURI . T.pack -> filename) =
+ splitFileName pathToFile
fixInlineIRs :: String -> Inline -> Inline
fixInlineIRs s (Span as v) =
@@ -213,7 +215,7 @@ fixInlineIRs s (Link as is t) =
Link (fixAttrs s as) is t
fixInlineIRs _ v = v
-prependHash :: [T.Text] -> Inline -> Inline
+prependHash :: [Text] -> Inline -> Inline
prependHash ps l@(Link attr is (url, tit))
| or [s `T.isPrefixOf` url | s <- ps] =
Link attr is ("#" <> url, tit)
@@ -230,16 +232,17 @@ fixBlockIRs s (CodeBlock as code) =
fixBlockIRs _ b = b
fixAttrs :: FilePath -> B.Attr -> B.Attr
-fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . T.null) cs, removeEPUBAttrs kvs)
+fixAttrs s (ident, cs, kvs) =
+ (addHash s ident, filter (not . T.null) cs, removeEPUBAttrs kvs)
-addHash :: String -> T.Text -> T.Text
+addHash :: FilePath -> Text -> Text
addHash _ "" = ""
addHash s ident = T.pack (takeFileName s) <> "#" <> ident
-removeEPUBAttrs :: [(T.Text, T.Text)] -> [(T.Text, T.Text)]
+removeEPUBAttrs :: [(Text, Text)] -> [(Text, Text)]
removeEPUBAttrs kvs = filter (not . isEPUBAttr) kvs
-isEPUBAttr :: (T.Text, a) -> Bool
+isEPUBAttr :: (Text, a) -> Bool
isEPUBAttr (k, _) = "epub:" `T.isPrefixOf` k
-- Library
@@ -256,39 +259,44 @@ uncurry3 f (a, b, c) = f a b c
-- Utility
-stripNamespace :: QName -> String
+stripNamespace :: QName -> Text
stripNamespace (QName v _ _) = v
-attrToNSPair :: Attr -> Maybe (String, String)
+attrToNSPair :: Attr -> Maybe (Text, Text)
attrToNSPair (Attr (QName "xmlns" _ _) val) = Just ("xmlns", val)
attrToNSPair _ = Nothing
-attrToPair :: Attr -> (String, String)
+attrToPair :: Attr -> (Text, Text)
attrToPair (Attr (QName name _ _) val) = (name, val)
-defaultNameSpace :: Maybe String
+defaultNameSpace :: Maybe Text
defaultNameSpace = Just "http://www.idpf.org/2007/opf"
-dfName :: String -> QName
+dfName :: Text -> QName
dfName s = QName s defaultNameSpace Nothing
-emptyName :: String -> QName
+emptyName :: Text -> QName
emptyName s = QName s Nothing Nothing
-- Convert Maybe interface to Either
-findAttrE :: PandocMonad m => QName -> Element -> m String
+findAttrE :: PandocMonad m => QName -> Element -> m Text
findAttrE q e = mkE "findAttr" $ findAttr q e
findEntryByPathE :: PandocMonad m => FilePath -> Archive -> m Entry
findEntryByPathE (normalise . unEscapeString -> path) a =
- mkE ("No entry on path: " ++ path) $ findEntryByPath path a
+ mkE ("No entry on path: " <> T.pack path) $ findEntryByPath path a
-parseXMLDocE :: PandocMonad m => String -> m Element
-parseXMLDocE doc = mkE "Unable to parse XML doc" $ parseXMLDoc doc
+parseXMLDocE :: PandocMonad m => Entry -> m Element
+parseXMLDocE entry =
+ either (throwError . PandocXMLError fp) return $ parseXMLElement doc
+ where
+ doc = UTF8.toTextLazy . fromEntry $ entry
+ fp = T.pack $ eRelativePath entry
findElementE :: PandocMonad m => QName -> Element -> m Element
-findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x
+findElementE e x =
+ mkE ("Unable to find element: " <> tshow e) $ findElement e x
-mkE :: PandocMonad m => String -> Maybe a -> m a
-mkE s = maybe (throwError . PandocParseError $ T.pack s) return
+mkE :: PandocMonad m => Text -> Maybe a -> m a
+mkE s = maybe (throwError . PandocParseError $ s) return
diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs
index b0d2f092b..84e5278db 100644
--- a/src/Text/Pandoc/Readers/FB2.hs
+++ b/src/Text/Pandoc/Readers/FB2.hs
@@ -25,13 +25,13 @@ TODO:
module Text.Pandoc.Readers.FB2 ( readFB2 ) where
import Control.Monad.Except (throwError)
import Control.Monad.State.Strict
-import Data.ByteString.Lazy.Char8 ( pack )
import Data.ByteString.Base64.Lazy
import Data.Functor
import Data.List (intersperse)
import qualified Data.Map as M
import Data.Text (Text)
import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
import Data.Default
import Data.Maybe
import Text.HTML.TagSoup.Entity (lookupEntity)
@@ -40,8 +40,9 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad, insertMedia, report)
import Text.Pandoc.Error
import Text.Pandoc.Logging
import Text.Pandoc.Options
-import Text.Pandoc.Shared (crFilter)
-import Text.XML.Light
+import Text.Pandoc.XML.Light
+import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
type FB2 m = StateT FB2State m
@@ -62,12 +63,15 @@ instance HasMeta FB2State where
setMeta field v s = s {fb2Meta = setMeta field v (fb2Meta s)}
deleteMeta field s = s {fb2Meta = deleteMeta field (fb2Meta s)}
-readFB2 :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readFB2 :: (PandocMonad m, ToSources a)
+ => ReaderOptions
+ -> a
+ -> m Pandoc
readFB2 _ inp =
- case parseXMLDoc $ crFilter inp of
- Nothing -> throwError $ PandocParseError "Not an XML document"
- Just e -> do
- (bs, st) <- runStateT (parseRootElement e) def
+ case parseXMLElement $ TL.fromStrict $ sourcesToText $ toSources inp of
+ Left msg -> throwError $ PandocXMLError "" msg
+ Right el -> do
+ (bs, st) <- runStateT (parseRootElement el) def
let authors = if null $ fb2Authors st
then id
else setMeta "author" (map text $ reverse $ fb2Authors st)
@@ -83,12 +87,12 @@ removeHash t = case T.uncons t of
Just ('#', xs) -> xs
_ -> t
-convertEntity :: String -> Text
-convertEntity e = maybe (T.toUpper $ T.pack e) T.pack $ lookupEntity e
+convertEntity :: Text -> Text
+convertEntity e = maybe (T.toUpper e) T.pack $ lookupEntity (T.unpack e)
parseInline :: PandocMonad m => Content -> FB2 m Inlines
parseInline (Elem e) =
- case T.pack $ qName $ elName e of
+ case qName $ elName e of
"strong" -> strong <$> parseStyleType e
"emphasis" -> emph <$> parseStyleType e
"style" -> parseNamedStyle e
@@ -96,12 +100,12 @@ parseInline (Elem e) =
"strikethrough" -> strikeout <$> parseStyleType e
"sub" -> subscript <$> parseStyleType e
"sup" -> superscript <$> parseStyleType e
- "code" -> pure $ code $ T.pack $ strContent e
+ "code" -> pure $ code $ strContent e
"image" -> parseInlineImageElement e
name -> do
report $ IgnoredElement name
pure mempty
-parseInline (Text x) = pure $ text $ T.pack $ cdData x
+parseInline (Text x) = pure $ text $ cdData x
parseInline (CRef r) = pure $ str $ convertEntity r
parseSubtitle :: PandocMonad m => Element -> FB2 m Blocks
@@ -111,7 +115,7 @@ parseSubtitle e = headerWith ("", ["unnumbered"], []) <$> gets fb2SectionLevel <
parseRootElement :: PandocMonad m => Element -> FB2 m Blocks
parseRootElement e =
- case T.pack $ qName $ elName e of
+ case qName $ elName e of
"FictionBook" -> do
-- Parse notes before parsing the rest of the content.
case filterChild isNotesBody e of
@@ -144,7 +148,7 @@ parseNote e =
Just sectionId -> do
content <- mconcat <$> mapM parseSectionChild (dropTitle $ elChildren e)
oldNotes <- gets fb2Notes
- modify $ \s -> s { fb2Notes = M.insert ("#" <> T.pack sectionId) content oldNotes }
+ modify $ \s -> s { fb2Notes = M.insert ("#" <> sectionId) content oldNotes }
pure ()
where
isTitle x = qName (elName x) == "title"
@@ -156,7 +160,7 @@ parseNote e =
-- | Parse a child of @\<FictionBook>@ element.
parseFictionBookChild :: PandocMonad m => Element -> FB2 m Blocks
parseFictionBookChild e =
- case T.pack $ qName $ elName e of
+ case qName $ elName e of
"stylesheet" -> pure mempty -- stylesheet is ignored
"description" -> mempty <$ mapM_ parseDescriptionChild (elChildren e)
"body" -> if isNotesBody e
@@ -168,7 +172,7 @@ parseFictionBookChild e =
-- | Parse a child of @\<description>@ element.
parseDescriptionChild :: PandocMonad m => Element -> FB2 m ()
parseDescriptionChild e =
- case T.pack $ qName $ elName e of
+ case qName $ elName e of
"title-info" -> mapM_ parseTitleInfoChild (elChildren e)
"src-title-info" -> pure () -- ignore
"document-info" -> pure ()
@@ -182,7 +186,7 @@ parseDescriptionChild e =
-- | Parse a child of @\<body>@ element.
parseBodyChild :: PandocMonad m => Element -> FB2 m Blocks
parseBodyChild e =
- case T.pack $ qName $ elName e of
+ case qName $ elName e of
"image" -> parseImageElement e
"title" -> header <$> gets fb2SectionLevel <*> parseTitleType (elContent e)
"epigraph" -> parseEpigraph e
@@ -196,7 +200,10 @@ parseBinaryElement e =
(Nothing, _) -> report $ IgnoredElement "binary without id attribute"
(Just _, Nothing) ->
report $ IgnoredElement "binary without content-type attribute"
- (Just filename, contentType) -> insertMedia filename (T.pack <$> contentType) (decodeLenient (pack (strContent e)))
+ (Just filename, contentType) ->
+ insertMedia (T.unpack filename) contentType
+ (decodeLenient
+ (UTF8.fromTextLazy . TL.fromStrict . strContent $ e))
-- * Type parsers
@@ -206,13 +213,13 @@ parseAuthor e = T.unwords . catMaybes <$> mapM parseAuthorChild (elChildren e)
parseAuthorChild :: PandocMonad m => Element -> FB2 m (Maybe Text)
parseAuthorChild e =
- case T.pack $ qName $ elName e of
- "first-name" -> pure $ Just $ T.pack $ strContent e
- "middle-name" -> pure $ Just $ T.pack $ strContent e
- "last-name" -> pure $ Just $ T.pack $ strContent e
- "nickname" -> pure $ Just $ T.pack $ strContent e
- "home-page" -> pure $ Just $ T.pack $ strContent e
- "email" -> pure $ Just $ T.pack $ strContent e
+ case qName $ elName e of
+ "first-name" -> pure $ Just $ strContent e
+ "middle-name" -> pure $ Just $ strContent e
+ "last-name" -> pure $ Just $ strContent e
+ "nickname" -> pure $ Just $ strContent e
+ "home-page" -> pure $ Just $ strContent e
+ "email" -> pure $ Just $ strContent e
name -> do
report $ IgnoredElement $ name <> " in author"
pure Nothing
@@ -236,13 +243,13 @@ parseTitleContent _ = pure Nothing
parseImageElement :: PandocMonad m => Element -> FB2 m Blocks
parseImageElement e =
case href of
- Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash $ T.pack src) title alt
+ Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash src) title alt
Nothing -> do
report $ IgnoredElement " image without href"
pure mempty
- where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e
- title = maybe "" T.pack $ findAttr (unqual "title") e
- imgId = maybe "" T.pack $ findAttr (unqual "id") e
+ where alt = maybe mempty str $ findAttr (unqual "alt") e
+ title = fromMaybe "" $ findAttr (unqual "title") e
+ imgId = fromMaybe "" $ findAttr (unqual "id") e
href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
-- | Parse @pType@
@@ -256,7 +263,7 @@ parseCite e = blockQuote . mconcat <$> mapM parseCiteChild (elChildren e)
-- | Parse @citeType@ child
parseCiteChild :: PandocMonad m => Element -> FB2 m Blocks
parseCiteChild e =
- case T.pack $ qName $ elName e of
+ case qName $ elName e of
"p" -> para <$> parsePType e
"poem" -> parsePoem e
"empty-line" -> pure horizontalRule
@@ -271,13 +278,13 @@ parsePoem e = mconcat <$> mapM parsePoemChild (elChildren e)
parsePoemChild :: PandocMonad m => Element -> FB2 m Blocks
parsePoemChild e =
- case T.pack $ qName $ elName e of
+ case qName $ elName e of
"title" -> parseTitle e
"subtitle" -> parseSubtitle e
"epigraph" -> parseEpigraph e
"stanza" -> parseStanza e
"text-author" -> para <$> parsePType e
- "date" -> pure $ para $ text $ T.pack $ strContent e
+ "date" -> pure $ para $ text $ strContent e
name -> report (UnexpectedXmlElement name "poem") $> mempty
parseStanza :: PandocMonad m => Element -> FB2 m Blocks
@@ -290,7 +297,7 @@ joinLineBlocks [] = []
parseStanzaChild :: PandocMonad m => Element -> FB2 m Blocks
parseStanzaChild e =
- case T.pack $ qName $ elName e of
+ case qName $ elName e of
"title" -> parseTitle e
"subtitle" -> parseSubtitle e
"v" -> lineBlock . (:[]) <$> parsePType e
@@ -300,11 +307,11 @@ parseStanzaChild e =
parseEpigraph :: PandocMonad m => Element -> FB2 m Blocks
parseEpigraph e =
divWith (divId, ["epigraph"], []) . mconcat <$> mapM parseEpigraphChild (elChildren e)
- where divId = maybe "" T.pack $ findAttr (unqual "id") e
+ where divId = fromMaybe "" $ findAttr (unqual "id") e
parseEpigraphChild :: PandocMonad m => Element -> FB2 m Blocks
parseEpigraphChild e =
- case T.pack $ qName $ elName e of
+ case qName $ elName e of
"p" -> para <$> parsePType e
"poem" -> parsePoem e
"cite" -> parseCite e
@@ -318,7 +325,7 @@ parseAnnotation e = mconcat <$> mapM parseAnnotationChild (elChildren e)
parseAnnotationChild :: PandocMonad m => Element -> FB2 m Blocks
parseAnnotationChild e =
- case T.pack $ qName $ elName e of
+ case qName $ elName e of
"p" -> para <$> parsePType e
"poem" -> parsePoem e
"cite" -> parseCite e
@@ -332,14 +339,14 @@ parseSection :: PandocMonad m => Element -> FB2 m Blocks
parseSection e = do
n <- gets fb2SectionLevel
modify $ \st -> st{ fb2SectionLevel = n + 1 }
- let sectionId = maybe "" T.pack $ findAttr (unqual "id") e
+ let sectionId = fromMaybe "" $ findAttr (unqual "id") e
bs <- divWith (sectionId, ["section"], []) . mconcat <$> mapM parseSectionChild (elChildren e)
modify $ \st -> st{ fb2SectionLevel = n }
pure bs
parseSectionChild :: PandocMonad m => Element -> FB2 m Blocks
parseSectionChild e =
- case T.pack $ qName $ elName e of
+ case qName $ elName e of
"title" -> parseBodyChild e
"epigraph" -> parseEpigraph e
"image" -> parseImageElement e
@@ -361,16 +368,16 @@ parseStyleType e = mconcat <$> mapM parseInline (elContent e)
parseNamedStyle :: PandocMonad m => Element -> FB2 m Inlines
parseNamedStyle e = do
content <- mconcat <$> mapM parseNamedStyleChild (elContent e)
- let lang = maybeToList $ ("lang",) . T.pack <$> findAttr (QName "lang" Nothing (Just "xml")) e
+ let lang = maybeToList $ ("lang",) <$> findAttr (QName "lang" Nothing (Just "xml")) e
case findAttr (unqual "name") e of
- Just name -> pure $ spanWith ("", [T.pack name], lang) content
+ Just name -> pure $ spanWith ("", [name], lang) content
Nothing -> do
report $ IgnoredElement "link without required name"
pure mempty
parseNamedStyleChild :: PandocMonad m => Content -> FB2 m Inlines
parseNamedStyleChild (Elem e) =
- case T.pack $ qName (elName e) of
+ case qName (elName e) of
"strong" -> strong <$> parseStyleType e
"emphasis" -> emph <$> parseStyleType e
"style" -> parseNamedStyle e
@@ -378,7 +385,7 @@ parseNamedStyleChild (Elem e) =
"strikethrough" -> strikeout <$> parseStyleType e
"sub" -> subscript <$> parseStyleType e
"sup" -> superscript <$> parseStyleType e
- "code" -> pure $ code $ T.pack $ strContent e
+ "code" -> pure $ code $ strContent e
"image" -> parseInlineImageElement e
name -> do
report $ IgnoredElement $ name <> " in style"
@@ -390,7 +397,7 @@ parseLinkType :: PandocMonad m => Element -> FB2 m Inlines
parseLinkType e = do
content <- mconcat <$> mapM parseStyleLinkType (elContent e)
notes <- gets fb2Notes
- case T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
+ case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
Just href -> case findAttr (QName "type" Nothing Nothing) e of
Just "note" -> case M.lookup href notes of
Nothing -> pure $ link href "" content
@@ -417,15 +424,14 @@ parseTable _ = pure mempty -- TODO: tables are not supported yet
-- | Parse @title-infoType@
parseTitleInfoChild :: PandocMonad m => Element -> FB2 m ()
parseTitleInfoChild e =
- case T.pack $ qName (elName e) of
+ case qName (elName e) of
"genre" -> pure ()
"author" -> parseAuthor e >>= \author -> modify (\st -> st {fb2Authors = author:fb2Authors st})
- "book-title" -> modify (setMeta "title" (text $ T.pack $ strContent e))
+ "book-title" -> modify (setMeta "title" (text $ strContent e))
"annotation" -> parseAnnotation e >>= modify . setMeta "abstract"
"keywords" -> modify (setMeta "keywords" (map (MetaString . trim) $ T.splitOn ","
- $ T.pack
$ strContent e))
- "date" -> modify (setMeta "date" (text $ T.pack $ strContent e))
+ "date" -> modify (setMeta "date" (text $ strContent e))
"coverpage" -> parseCoverPage e
"lang" -> pure ()
"src-lang" -> pure ()
@@ -439,7 +445,7 @@ parseCoverPage e =
Just img -> case href of
Just src -> modify (setMeta "cover-image" (MetaString $ removeHash src))
Nothing -> pure ()
- where href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img
+ where href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img
Nothing -> pure ()
-- | Parse @inlineImageType@ element
@@ -452,5 +458,5 @@ parseInlineImageElement e =
Nothing -> do
report $ IgnoredElement "inline image without href"
pure mempty
- where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e
- href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
+ where alt = maybe mempty str $ findAttr (unqual "alt") e
+ href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index eb78979a3..fdf4f28e0 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -5,7 +5,7 @@
{-# LANGUAGE ViewPatterns #-}
{- |
Module : Text.Pandoc.Readers.HTML
- Copyright : Copyright (C) 2006-2020 John MacFarlane
+ Copyright : Copyright (C) 2006-2021 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -19,21 +19,20 @@ module Text.Pandoc.Readers.HTML ( readHtml
, htmlInBalanced
, isInlineTag
, isBlockTag
- , NamedTag(..)
, isTextTag
, isCommentTag
) where
import Control.Applicative ((<|>))
-import Control.Arrow (first)
import Control.Monad (guard, msum, mzero, unless, void)
-import Control.Monad.Except (throwError)
+import Control.Monad.Except (throwError, catchError)
import Control.Monad.Reader (ask, asks, lift, local, runReaderT)
import Data.ByteString.Base64 (encode)
import Data.Char (isAlphaNum, isLetter)
import Data.Default (Default (..), def)
import Data.Foldable (for_)
import Data.List.Split (splitWhen)
+import Data.List (foldl')
import qualified Data.Map as M
import Data.Maybe (fromMaybe, isJust, isNothing)
import Data.Monoid (First (..))
@@ -62,21 +61,22 @@ import Text.Pandoc.Options (
ReaderOptions (readerExtensions, readerStripComments),
extensionEnabled)
import Text.Pandoc.Parsing hiding ((<|>))
-import Text.Pandoc.Shared (addMetaField, blocksToInlines', crFilter, escapeURI,
- extractSpaces, htmlSpanLikeElements, safeRead, tshow)
+import Text.Pandoc.Shared (
+ addMetaField, blocksToInlines', escapeURI, extractSpaces,
+ htmlSpanLikeElements, renderTags', safeRead, tshow)
import Text.Pandoc.Walk
import Text.Parsec.Error
import Text.TeXMath (readMathML, writeTeX)
-- | Convert HTML-formatted string to 'Pandoc' document.
-readHtml :: PandocMonad m
+readHtml :: (PandocMonad m, ToSources a)
=> ReaderOptions -- ^ Reader options
- -> Text -- ^ String to parse (assumes @'\n'@ line endings)
+ -> a -- ^ Input to parse
-> m Pandoc
readHtml opts inp = do
- let tags = stripPrefixes . canonicalizeTags $
+ let tags = stripPrefixes $ canonicalizeTags $
parseTagsOptions parseOptions{ optTagPosition = True }
- (crFilter inp)
+ (sourcesToText $ toSources inp)
parseDoc = do
blocks <- fixPlains False . mconcat <$> manyTill block eof
meta <- stateMeta . parserState <$> getState
@@ -95,6 +95,15 @@ readHtml opts inp = do
Right doc -> return doc
Left err -> throwError $ PandocParseError $ T.pack $ getError err
+-- Strip namespace prefixes on tags (not attributes)
+stripPrefixes :: [Tag Text] -> [Tag Text]
+stripPrefixes = map stripPrefix
+
+stripPrefix :: Tag Text -> Tag Text
+stripPrefix (TagOpen s as) = TagOpen (T.takeWhileEnd (/=':') s) as
+stripPrefix (TagClose s) = TagClose (T.takeWhileEnd (/=':') s)
+stripPrefix x = x
+
replaceNotes :: PandocMonad m => [Block] -> TagParser m [Block]
replaceNotes bs = do
st <- getState
@@ -112,14 +121,18 @@ setInPlain :: PandocMonad m => HTMLParser m s a -> HTMLParser m s a
setInPlain = local (\s -> s {inPlain = True})
pHtml :: PandocMonad m => TagParser m Blocks
-pHtml = try $ do
+pHtml = do
(TagOpen "html" attr) <- lookAhead pAny
- for_ (lookup "lang" attr) $
+ for_ (lookup "lang" attr <|> lookup "xml:lang" attr) $
updateState . B.setMeta "lang" . B.text
pInTags "html" block
pBody :: PandocMonad m => TagParser m Blocks
-pBody = pInTags "body" block
+pBody = do
+ (TagOpen "body" attr) <- lookAhead pAny
+ for_ (lookup "lang" attr <|> lookup "xml:lang" attr) $
+ updateState . B.setMeta "lang" . B.text
+ pInTags "body" block
pHead :: PandocMonad m => TagParser m Blocks
pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAny)
@@ -145,32 +158,65 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAny)
return mempty
block :: PandocMonad m => TagParser m Blocks
-block = do
- res <- choice
- [ eSection
- , eSwitch B.para block
- , mempty <$ eFootnote
- , mempty <$ eTOC
- , mempty <$ eTitlePage
- , pPara
- , pHeader
- , pBlockQuote
- , pCodeBlock
- , pList
- , pHrule
- , pTable block
- , pHtml
- , pHead
- , pBody
- , pLineBlock
- , pDiv
- , pPlain
- , pFigure
- , pIframe
- , pRawHtmlBlock
- ]
- trace (T.take 60 $ tshow $ B.toList res)
- return res
+block = ((do
+ tag <- lookAhead (pSatisfy isBlockTag)
+ exts <- getOption readerExtensions
+ case tag of
+ TagOpen name attr ->
+ let type' = fromMaybe "" $
+ lookup "type" attr <|> lookup "epub:type" attr
+ epubExts = extensionEnabled Ext_epub_html_exts exts
+ in
+ case name of
+ _ | name `elem` sectioningContent
+ , epubExts
+ , "chapter" `T.isInfixOf` type'
+ -> eSection
+ _ | epubExts
+ , type' `elem` ["footnote", "rearnote"]
+ -> mempty <$ eFootnote
+ _ | epubExts
+ , type' == "toc"
+ -> mempty <$ eTOC
+ _ | "titlepage" `T.isInfixOf` type'
+ , name `elem` ("section" : groupingContent)
+ -> mempty <$ eTitlePage
+ "p" -> pPara
+ "h1" -> pHeader
+ "h2" -> pHeader
+ "h3" -> pHeader
+ "h4" -> pHeader
+ "h5" -> pHeader
+ "h6" -> pHeader
+ "blockquote" -> pBlockQuote
+ "pre" -> pCodeBlock
+ "ul" -> pBulletList
+ "ol" -> pOrderedList
+ "dl" -> pDefinitionList
+ "table" -> pTable block
+ "hr" -> pHrule
+ "html" -> pHtml
+ "head" -> pHead
+ "body" -> pBody
+ "div"
+ | extensionEnabled Ext_line_blocks exts
+ , Just "line-block" <- lookup "class" attr
+ -> pLineBlock
+ | otherwise
+ -> pDiv
+ "section" -> pDiv
+ "header" -> pDiv
+ "main" -> pDiv
+ "figure" -> pFigure
+ "iframe" -> pIframe
+ "style" -> pRawHtmlBlock
+ "textarea" -> pRawHtmlBlock
+ "switch"
+ | epubExts
+ -> eSwitch B.para block
+ _ -> mzero
+ _ -> mzero) <|> pPlain <|> pRawHtmlBlock) >>= \res ->
+ res <$ trace (T.take 60 $ tshow $ B.toList res)
namespaces :: PandocMonad m => [(Text, TagParser m Inlines)]
namespaces = [(mathMLNamespace, pMath True)]
@@ -243,9 +289,6 @@ eTOC = try $ do
guard $ (lookup "type" attr <|> lookup "epub:type" attr) == Just "toc"
void (pInTags tag block)
-pList :: PandocMonad m => TagParser m Blocks
-pList = pBulletList <|> pOrderedList <|> pDefinitionList
-
pBulletList :: PandocMonad m => TagParser m Blocks
pBulletList = try $ do
pSatisfy (matchTagOpen "ul" [])
@@ -319,7 +362,10 @@ pDefListItem = try $ do
terms <- many1 (try $ skipMany nonItem >> pInTags "dt" inline)
defs <- many1 (try $ skipMany nonItem >> pInTags "dd" block)
skipMany nonItem
- let term = foldl1 (\x y -> x <> B.linebreak <> y) $ map trimInlines terms
+ let term = foldl' (\x y -> if null x
+ then trimInlines y
+ else x <> B.linebreak <> trimInlines y)
+ mempty terms
return (term, map (fixPlains True) defs)
fixPlains :: Bool -> Blocks -> Blocks
@@ -356,13 +402,16 @@ pLineBlock = try $ do
B.toList ils
return $ B.lineBlock lns
+isDivLike :: Text -> Bool
+isDivLike "div" = True
+isDivLike "section" = True
+isDivLike "header" = True
+isDivLike "main" = True
+isDivLike _ = False
+
pDiv :: PandocMonad m => TagParser m Blocks
pDiv = try $ do
guardEnabled Ext_native_divs
- let isDivLike "div" = True
- isDivLike "section" = True
- isDivLike "main" = True
- isDivLike _ = False
TagOpen tag attr' <- lookAhead $ pSatisfy $ tagOpen isDivLike (const True)
let (ident, classes, kvs) = toAttr attr'
contents <- pInTags tag block
@@ -380,11 +429,17 @@ pIframe = try $ do
tag <- pSatisfy (tagOpen (=="iframe") (isJust . lookup "src"))
pCloses "iframe" <|> eof
url <- canonicalizeUrl $ fromAttrib "src" tag
- (bs, _) <- openURL url
- let inp = UTF8.toText bs
- opts <- readerOpts <$> getState
- Pandoc _ contents <- readHtml opts inp
- return $ B.divWith ("",["iframe"],[]) $ B.fromList contents
+ if T.null url
+ then ignore $ renderTags' [tag, TagClose "iframe"]
+ else catchError
+ (do (bs, _) <- openURL url
+ let inp = UTF8.toText bs
+ opts <- readerOpts <$> getState
+ Pandoc _ contents <- readHtml opts inp
+ return $ B.divWith ("",["iframe"],[]) $ B.fromList contents)
+ (\e -> do
+ logMessage $ CouldNotFetchResource url (renderError e)
+ ignore $ renderTags' [tag, TagClose "iframe"])
pRawHtmlBlock :: PandocMonad m => TagParser m Blocks
pRawHtmlBlock = do
@@ -446,17 +501,13 @@ pHeader = try $ do
tagOpen (`elem` ["h1","h2","h3","h4","h5","h6"])
(const True)
let attr = toStringAttr attr'
- let bodyTitle = TagOpen tagtype attr' ~== TagOpen ("h1" :: Text)
- [("class","title")]
level <- headerLevel tagtype
contents <- trimInlines . mconcat <$> manyTill inline (pCloses tagtype <|> eof)
let ident = fromMaybe "" $ lookup "id" attr
let classes = maybe [] T.words $ lookup "class" attr
let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"]
attr'' <- registerHeader (ident, classes, keyvals) contents
- return $ if bodyTitle
- then mempty -- skip a representation of the title in the body
- else B.headerWith attr'' level contents
+ return $ B.headerWith attr'' level contents
pHrule :: PandocMonad m => TagParser m Blocks
pHrule = do
@@ -506,7 +557,18 @@ pFigure = try $ do
pCodeBlock :: PandocMonad m => TagParser m Blocks
pCodeBlock = try $ do
TagOpen _ attr' <- pSatisfy (matchTagOpen "pre" [])
- let attr = toAttr attr'
+ -- if the `pre` has no attributes, try if it is followed by a `code`
+ -- element and use those attributes if possible.
+ attr <- case attr' of
+ _:_ -> pure (toAttr attr')
+ [] -> option nullAttr $ do
+ TagOpen _ codeAttr <- pSatisfy (matchTagOpen "code" [])
+ pure $ toAttr
+ [ (k, v') | (k, v) <- codeAttr
+ -- strip language from class
+ , let v' = if k == "class"
+ then fromMaybe v (T.stripPrefix "language-" v)
+ else v ]
contents <- manyTill pAny (pCloses "pre" <|> eof)
let rawText = T.concat $ map tagToText contents
-- drop leading newline if any
@@ -525,31 +587,47 @@ tagToText (TagOpen "br" _) = "\n"
tagToText _ = ""
inline :: PandocMonad m => TagParser m Inlines
-inline = choice
- [ eNoteref
- , eSwitch id inline
- , pTagText
- , pQ
- , pEmph
- , pStrong
- , pSuperscript
- , pSubscript
- , pSpanLike
- , pSmall
- , pStrikeout
- , pUnderline
- , pLineBreak
- , pLink
- , pImage
- , pSvg
- , pBdo
- , pCode
- , pCodeWithClass [("samp","sample"),("var","variable")]
- , pSpan
- , pMath False
- , pScriptMath
- , pRawHtmlInline
- ]
+inline = pTagText <|> do
+ tag <- lookAhead (pSatisfy isInlineTag)
+ exts <- getOption readerExtensions
+ case tag of
+ TagOpen name attr ->
+ case name of
+ "a" | extensionEnabled Ext_epub_html_exts exts
+ , Just "noteref" <- lookup "type" attr <|> lookup "epub:type" attr
+ , Just ('#',_) <- lookup "href" attr >>= T.uncons
+ -> eNoteref
+ | otherwise -> pLink
+ "switch" -> eSwitch id inline
+ "q" -> pQ
+ "em" -> pEmph
+ "i" -> pEmph
+ "strong" -> pStrong
+ "b" -> pStrong
+ "sup" -> pSuperscript
+ "sub" -> pSubscript
+ "small" -> pSmall
+ "s" -> pStrikeout
+ "strike" -> pStrikeout
+ "del" -> pStrikeout
+ "u" -> pUnderline
+ "ins" -> pUnderline
+ "br" -> pLineBreak
+ "img" -> pImage
+ "svg" -> pSvg
+ "bdo" -> pBdo
+ "code" -> pCode
+ "samp" -> pCodeWithClass "samp" "sample"
+ "var" -> pCodeWithClass "var" "variable"
+ "span" -> pSpan
+ "math" -> pMath False
+ "script"
+ | Just x <- lookup "type" attr
+ , "math/tex" `T.isPrefixOf` x -> pScriptMath
+ _ | name `elem` htmlSpanLikeElements -> pSpanLike
+ _ -> pRawHtmlInline
+ TagText _ -> pTagText
+ _ -> pRawHtmlInline
pSelfClosing :: PandocMonad m
=> (Text -> Bool) -> ([Attribute Text] -> Bool)
@@ -560,27 +638,25 @@ pSelfClosing f g = do
return open
pQ :: PandocMonad m => TagParser m Inlines
-pQ = choice $ map try [citedQuote, normalQuote]
- where citedQuote = do
- tag <- pSatisfy $ tagOpenLit "q" (any ((=="cite") . fst))
-
- url <- canonicalizeUrl $ fromAttrib "cite" tag
- let uid = fromMaybe (fromAttrib "name" tag) $
- maybeFromAttrib "id" tag
- let cls = T.words $ fromAttrib "class" tag
-
- makeQuote $ B.spanWith (uid, cls, [("cite", escapeURI url)])
- normalQuote = do
- pSatisfy $ tagOpenLit "q" (const True)
- makeQuote id
- makeQuote wrapper = do
- ctx <- asks quoteContext
- let (constructor, innerContext) = case ctx of
- InDoubleQuote -> (B.singleQuoted, InSingleQuote)
- _ -> (B.doubleQuoted, InDoubleQuote)
-
- content <- withQuoteContext innerContext (mconcat <$> manyTill inline (pCloses "q"))
- return $ extractSpaces (constructor . wrapper) content
+pQ = do
+ TagOpen _ attrs <- pSatisfy $ tagOpenLit "q" (const True)
+ case lookup "cite" attrs of
+ Just url -> do
+ let uid = fromMaybe mempty $
+ lookup "name" attrs <> lookup "id" attrs
+ let cls = maybe [] T.words $ lookup "class" attrs
+ url' <- canonicalizeUrl url
+ makeQuote $ B.spanWith (uid, cls, [("cite", escapeURI url')])
+ Nothing -> makeQuote id
+ where
+ makeQuote wrapper = do
+ ctx <- asks quoteContext
+ let (constructor, innerContext) = case ctx of
+ InDoubleQuote -> (B.singleQuoted, InSingleQuote)
+ _ -> (B.doubleQuoted, InDoubleQuote)
+ content <- withQuoteContext innerContext
+ (mconcat <$> manyTill inline (pCloses "q"))
+ return $ extractSpaces (constructor . wrapper) content
pEmph :: PandocMonad m => TagParser m Inlines
pEmph = pInlinesInTags "em" B.emph <|> pInlinesInTags "i" B.emph
@@ -646,17 +722,12 @@ pLink = try $ do
pImage :: PandocMonad m => TagParser m Inlines
pImage = do
- tag <- pSelfClosing (=="img") (isJust . lookup "src")
+ tag@(TagOpen _ attr') <- pSelfClosing (=="img") (isJust . lookup "src")
url <- canonicalizeUrl $ fromAttrib "src" tag
let title = fromAttrib "title" tag
let alt = fromAttrib "alt" tag
- let uid = fromAttrib "id" tag
- let cls = T.words $ fromAttrib "class" tag
- let getAtt k = case fromAttrib k tag of
- "" -> []
- v -> [(k, v)]
- let kvs = concatMap getAtt ["width", "height", "sizes", "srcset"]
- return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt)
+ let attr = toAttr $ filter (\(k,_) -> k /= "alt" && k /= "title" && k /= "src") attr'
+ return $ B.imageWith attr (escapeURI url) title (B.text alt)
pSvg :: PandocMonad m => TagParser m Inlines
pSvg = do
@@ -671,13 +742,12 @@ pSvg = do
UTF8.toText (encode $ UTF8.fromText rawText)
return $ B.imageWith (ident,cls,[]) svgData mempty mempty
-pCodeWithClass :: PandocMonad m => [(T.Text,Text)] -> TagParser m Inlines
-pCodeWithClass elemToClass = try $ do
- let tagTest = flip elem . fmap fst $ elemToClass
- TagOpen open attr' <- pSatisfy $ tagOpen tagTest (const True)
+pCodeWithClass :: PandocMonad m => Text -> Text -> TagParser m Inlines
+pCodeWithClass name class' = try $ do
+ TagOpen open attr' <- pSatisfy $ tagOpen (== name) (const True)
result <- manyTill pAny (pCloses open)
let (ids,cs,kvs) = toAttr attr'
- cs' = maybe cs (:cs) . lookup open $ elemToClass
+ cs' = class' : cs
return . B.codeWith (ids,cs',kvs) .
T.unwords . T.lines . innerText $ result
@@ -764,17 +834,19 @@ pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline
pTagText :: PandocMonad m => TagParser m Inlines
pTagText = try $ do
+ pos <- getPosition
(TagText str) <- pSatisfy isTagText
st <- getState
qu <- ask
parsed <- lift $ lift $
- flip runReaderT qu $ runParserT (many pTagContents) st "text" str
+ flip runReaderT qu $ runParserT (many pTagContents) st "text"
+ (Sources [(pos, str)])
case parsed of
Left _ -> throwError $ PandocParseError $
"Could not parse `" <> str <> "'"
Right result -> return $ mconcat result
-type InlinesParser m = HTMLParser m Text
+type InlinesParser m = HTMLParser m Sources
pTagContents :: PandocMonad m => InlinesParser m Inlines
pTagContents =
@@ -868,27 +940,23 @@ pSpace = many1 (satisfy isSpace) >>= \xs ->
then return B.softbreak
else return B.space
-class NamedTag a where
- getTagName :: a -> Maybe Text
-
-instance NamedTag (Tag Text) where
- getTagName (TagOpen t _) = Just t
- getTagName (TagClose t) = Just t
- getTagName _ = Nothing
-
-instance NamedTag (Tag String) where
- getTagName (TagOpen t _) = Just (T.pack t)
- getTagName (TagClose t) = Just (T.pack t)
- getTagName _ = Nothing
-
-isInlineTag :: NamedTag (Tag a) => Tag a -> Bool
-isInlineTag t =
- isCommentTag t || case getTagName t of
- Nothing -> False
- Just x -> x `Set.notMember` blockTags ||
- T.take 1 x == "?" -- processing instr.
-
-isBlockTag :: NamedTag (Tag a) => Tag a -> Bool
+getTagName :: Tag Text -> Maybe Text
+getTagName (TagOpen t _) = Just t
+getTagName (TagClose t) = Just t
+getTagName _ = Nothing
+
+isInlineTag :: Tag Text -> Bool
+isInlineTag t = isCommentTag t || case t of
+ TagOpen "script" _ -> "math/tex" `T.isPrefixOf` fromAttrib "type" t
+ TagClose "script" -> True
+ TagOpen name _ -> isInlineTagName name
+ TagClose name -> isInlineTagName name
+ _ -> False
+ where isInlineTagName x =
+ x `Set.notMember` blockTags ||
+ T.take 1 x == "?" -- processing instr.
+
+isBlockTag :: Tag Text -> Bool
isBlockTag t = isBlockTagName || isTagComment t
where isBlockTagName =
case getTagName t of
@@ -899,10 +967,10 @@ isBlockTag t = isBlockTagName || isTagComment t
|| x `Set.member` eitherBlockOrInline
Nothing -> False
-isTextTag :: Tag a -> Bool
+isTextTag :: Tag Text -> Bool
isTextTag = tagText (const True)
-isCommentTag :: Tag a -> Bool
+isCommentTag :: Tag Text -> Bool
isCommentTag = tagComment (const True)
--- parsers for use in markdown, textile readers
@@ -910,13 +978,14 @@ isCommentTag = tagComment (const True)
-- | Matches a stretch of HTML in balanced tags.
htmlInBalanced :: Monad m
=> (Tag Text -> Bool)
- -> ParserT Text st m Text
+ -> ParserT Sources st m Text
htmlInBalanced f = try $ do
lookAhead (char '<')
- inp <- getInput
- let ts = canonicalizeTags $
- parseTagsOptions parseOptions{ optTagWarning = True,
- optTagPosition = True } inp
+ sources <- getInput
+ let ts = canonicalizeTags
+ $ parseTagsOptions parseOptions{ optTagWarning = True,
+ optTagPosition = True }
+ $ sourcesToText sources
case ts of
(TagPosition sr sc : t@(TagOpen tn _) : rest) -> do
guard $ f t
@@ -951,22 +1020,24 @@ htmlInBalanced' tagname ts = fromMaybe [] $ go 0 ts
go n (t:ts') = (t :) <$> go n ts'
go _ [] = mzero
-hasTagWarning :: [Tag a] -> Bool
+hasTagWarning :: [Tag Text] -> Bool
hasTagWarning (TagWarning _:_) = True
hasTagWarning _ = False
-- | Matches a tag meeting a certain condition.
htmlTag :: (HasReaderOptions st, Monad m)
=> (Tag Text -> Bool)
- -> ParserT Text st m (Tag Text, Text)
+ -> ParserT Sources st m (Tag Text, Text)
htmlTag f = try $ do
lookAhead (char '<')
startpos <- getPosition
- inp <- getInput
+ sources <- getInput
+ let inp = sourcesToText sources
let ts = canonicalizeTags $ parseTagsOptions
parseOptions{ optTagWarning = False
, optTagPosition = True }
- (inp <> " ") -- add space to ensure that
+ (inp <> " ")
+ -- add space to ensure that
-- we get a TagPosition after the tag
(next, ln, col) <- case ts of
(TagPosition{} : next : TagPosition ln col : _)
@@ -1024,21 +1095,6 @@ htmlTag f = try $ do
handleTag tagname
_ -> mzero
--- Strip namespace prefixes
-stripPrefixes :: [Tag Text] -> [Tag Text]
-stripPrefixes = map stripPrefix
-
-stripPrefix :: Tag Text -> Tag Text
-stripPrefix (TagOpen s as) =
- TagOpen (stripPrefix' s) (map (first stripPrefix') as)
-stripPrefix (TagClose s) = TagClose (stripPrefix' s)
-stripPrefix x = x
-
-stripPrefix' :: Text -> Text
-stripPrefix' s =
- if T.null t then s else T.drop 1 t
- where (_, t) = T.span (/= ':') s
-
-- Utilities
-- | Adjusts a url according to the document's base URL.
@@ -1048,26 +1104,3 @@ canonicalizeUrl url = do
return $ case (parseURIReference (T.unpack url), mbBaseHref) of
(Just rel, Just bs) -> tshow (rel `nonStrictRelativeTo` bs)
_ -> url
-
--- For now we need a special version here; the one in Shared has String type
-renderTags' :: [Tag Text] -> Text
-renderTags' = renderTagsOptions
- renderOptions{ optMinimize = matchTags ["hr", "br", "img",
- "meta", "link"]
- , optRawTag = matchTags ["script", "style"] }
- where matchTags tags = flip elem tags . T.toLower
-
-
--- EPUB Specific
---
---
-{-
-
-types :: [(String, ([String], Int))]
-types = -- Document divisions
- map (\s -> (s, (["section", "body"], 0)))
- ["volume", "part", "chapter", "division"]
- <> -- Document section and components
- [
- ("abstract", ([], 0))]
--}
diff --git a/src/Text/Pandoc/Readers/HTML/Parsing.hs b/src/Text/Pandoc/Readers/HTML/Parsing.hs
index 2d58319da..bd8d7c96c 100644
--- a/src/Text/Pandoc/Readers/HTML/Parsing.hs
+++ b/src/Text/Pandoc/Readers/HTML/Parsing.hs
@@ -2,7 +2,7 @@
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.HTML.Parsing
- Copyright : Copyright (C) 2006-2020 John MacFarlane
+ Copyright : Copyright (C) 2006-2021 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -161,10 +161,12 @@ _ `closes` "html" = False
"li" `closes` "li" = True
"th" `closes` t | t `elem` ["th","td"] = True
"td" `closes` t | t `elem` ["th","td"] = True
-"tr" `closes` t | t `elem` ["th","td","tr"] = True
+"tr" `closes` t | t `elem` ["th","td","tr","colgroup"] = True
"dd" `closes` t | t `elem` ["dt", "dd"] = True
"dt" `closes` t | t `elem` ["dt","dd"] = True
"rt" `closes` t | t `elem` ["rb", "rt", "rtc"] = True
+"col" `closes` "col" = True
+"colgroup" `closes` "col" = True
"optgroup" `closes` "optgroup" = True
"optgroup" `closes` "option" = True
"option" `closes` "option" = True
@@ -193,14 +195,20 @@ t1 `closes` t2 |
_ `closes` _ = False
toStringAttr :: [(Text, Text)] -> [(Text, Text)]
-toStringAttr = map go
+toStringAttr = foldr go []
where
- go (x,y) =
- case T.stripPrefix "data-" x of
- Just x' | x' `Set.notMember` (html5Attributes <>
- html4Attributes <> rdfaAttributes)
- -> (x',y)
- _ -> (x,y)
+ go :: (Text, Text) -> [(Text, Text)] -> [(Text, Text)]
+ -- treat xml:lang as lang
+ go ("xml:lang",y) ats = go ("lang",y) ats
+ -- prevent duplicate attributes
+ go (x,y) ats
+ | any (\(x',_) -> x == x') ats = ats
+ | otherwise =
+ case T.stripPrefix "data-" x of
+ Just x' | x' `Set.notMember` (html5Attributes <>
+ html4Attributes <> rdfaAttributes)
+ -> go (x',y) ats
+ _ -> (x,y):ats
-- Unlike fromAttrib from tagsoup, this distinguishes
-- between a missing attribute and an attribute with empty content.
diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs
index 5a783988f..6e62e12f5 100644
--- a/src/Text/Pandoc/Readers/HTML/Table.hs
+++ b/src/Text/Pandoc/Readers/HTML/Table.hs
@@ -3,8 +3,8 @@
{-# LANGUAGE ViewPatterns #-}
{- |
Module : Text.Pandoc.Readers.HTML.Table
- Copyright : © 2006-2020 John MacFarlane,
- 2020 Albert Krewinkel
+ Copyright : © 2006-2021 John MacFarlane,
+ 2020-2021 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <albert@zeitkraut.de>
@@ -17,6 +17,8 @@ module Text.Pandoc.Readers.HTML.Table (pTable) where
import Control.Applicative ((<|>))
import Data.Maybe (fromMaybe)
+import Data.Either (lefts, rights)
+import Data.List.NonEmpty (nonEmpty)
import Data.Text (Text)
import Text.HTML.TagSoup
import Text.Pandoc.Builder (Blocks)
@@ -32,34 +34,51 @@ import Text.Pandoc.Shared (onlySimpleTableCells, safeRead)
import qualified Data.Text as T
import qualified Text.Pandoc.Builder as B
--- | Parses a @<col>@ element, returning the column's width. Defaults to
--- @'ColWidthDefault'@ if the width is not set or cannot be determined.
-pCol :: PandocMonad m => TagParser m ColWidth
+-- | Parses a @<col>@ element, returning the column's width.
+-- An Either value is used: Left i means a "relative length" with
+-- integral value i (see https://www.w3.org/TR/html4/types.html#h-6.6);
+-- Right w means a regular width. Defaults to @'Right ColWidthDefault'@
+-- if the width is not set or cannot be determined.
+pCol :: PandocMonad m => TagParser m (Either Int ColWidth)
pCol = try $ do
TagOpen _ attribs' <- pSatisfy (matchTagOpen "col" [])
let attribs = toStringAttr attribs'
skipMany pBlank
optional $ pSatisfy (matchTagClose "col")
skipMany pBlank
- let width = case lookup "width" attribs of
+ return $ case lookup "width" attribs of
Nothing -> case lookup "style" attribs of
Just (T.stripPrefix "width:" -> Just xs) | T.any (== '%') xs ->
- fromMaybe 0.0 $ safeRead (T.filter
- (`notElem` (" \t\r\n%'\";" :: [Char])) xs)
- _ -> 0.0
+ maybe (Right ColWidthDefault) (Right . ColWidth . (/ 100.0))
+ $ safeRead (T.filter
+ (`notElem` (" \t\r\n%'\";" :: [Char])) xs)
+ _ -> Right ColWidthDefault
+ Just (T.unsnoc -> Just (xs, '*')) ->
+ maybe (Left 1) Left $ safeRead xs
Just (T.unsnoc -> Just (xs, '%')) ->
- fromMaybe 0.0 $ safeRead xs
- _ -> 0.0
- if width > 0.0
- then return $ ColWidth $ width / 100.0
- else return ColWidthDefault
+ maybe (Right ColWidthDefault)
+ (Right . ColWidth . (/ 100.0)) $ safeRead xs
+ _ -> Right ColWidthDefault
-pColgroup :: PandocMonad m => TagParser m [ColWidth]
+pColgroup :: PandocMonad m => TagParser m [Either Int ColWidth]
pColgroup = try $ do
pSatisfy (matchTagOpen "colgroup" [])
skipMany pBlank
manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank
+resolveRelativeLengths :: [Either Int ColWidth] -> [ColWidth]
+resolveRelativeLengths ws =
+ let remaining = 1 - sum (map getColWidth $ rights ws)
+ relatives = sum $ lefts ws
+ relUnit = remaining / fromIntegral relatives
+ toColWidth (Right x) = x
+ toColWidth (Left i) = ColWidth (fromIntegral i * relUnit)
+ in map toColWidth ws
+
+getColWidth :: ColWidth -> Double
+getColWidth ColWidthDefault = 0
+getColWidth (ColWidth w) = w
+
data CellType
= HeaderCell
| BodyCell
@@ -181,7 +200,8 @@ pTable :: PandocMonad m
pTable block = try $ do
TagOpen _ attribs <- pSatisfy (matchTagOpen "table" []) <* skipMany pBlank
caption <- option mempty $ pInTags "caption" block <* skipMany pBlank
- widths <- ((mconcat <$> many1 pColgroup) <|> many pCol) <* skipMany pBlank
+ widths <- resolveRelativeLengths <$>
+ ((mconcat <$> many1 pColgroup) <|> many pCol) <* skipMany pBlank
thead <- pTableHead block <* skipMany pBlank
topfoot <- optionMaybe (pTableFoot block) <* skipMany pBlank
tbodies <- many (pTableBody block) <* skipMany pBlank
@@ -214,8 +234,9 @@ normalize :: [ColWidth] -> TableHead -> [TableBody] -> TableFoot
-> Either String ([ColSpec], TableHead, [TableBody], TableFoot)
normalize widths head' bodies foot = do
let rows = headRows head' <> concatMap bodyRows bodies <> footRows foot
- let rowLength = length . rowCells
- let ncols = maximum (map rowLength rows)
+ let cellWidth (Cell _ _ _ (ColSpan cs) _) = cs
+ let rowLength = foldr (\cell acc -> cellWidth cell + acc) 0 . rowCells
+ let ncols = maybe 0 maximum $ nonEmpty $ map rowLength rows
let tblType = tableType (map rowCells rows)
-- fail on empty table
if null rows
diff --git a/src/Text/Pandoc/Readers/HTML/TagCategories.hs b/src/Text/Pandoc/Readers/HTML/TagCategories.hs
index 4f82a1831..b7bd40fee 100644
--- a/src/Text/Pandoc/Readers/HTML/TagCategories.hs
+++ b/src/Text/Pandoc/Readers/HTML/TagCategories.hs
@@ -1,7 +1,7 @@
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.HTML.TagCategories
- Copyright : Copyright (C) 2006-2020 John MacFarlane
+ Copyright : Copyright (C) 2006-2021 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/HTML/Types.hs b/src/Text/Pandoc/Readers/HTML/Types.hs
index a94eeb828..12c519ad6 100644
--- a/src/Text/Pandoc/Readers/HTML/Types.hs
+++ b/src/Text/Pandoc/Readers/HTML/Types.hs
@@ -2,7 +2,7 @@
{-# LANGUAGE MultiParamTypeClasses #-}
{- |
Module : Text.Pandoc.Readers.HTML.Types
- Copyright : Copyright (C) 2006-2020 John MacFarlane
+ Copyright : Copyright (C) 2006-2021 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index 25d69f040..67b3af2d3 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -17,8 +17,9 @@ module Text.Pandoc.Readers.Haddock
import Control.Monad.Except (throwError)
import Data.List (intersperse)
+import Data.List.NonEmpty (nonEmpty)
import Data.Maybe (fromMaybe)
-import Data.Text (Text, unpack)
+import Data.Text (unpack)
import qualified Data.Text as T
import Documentation.Haddock.Parser
import Documentation.Haddock.Types as H
@@ -28,15 +29,17 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Error
import Text.Pandoc.Options
-import Text.Pandoc.Shared (crFilter, splitTextBy, trim)
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
+import Text.Pandoc.Shared (splitTextBy, trim)
-- | Parse Haddock markup and return a 'Pandoc' document.
-readHaddock :: PandocMonad m
+readHaddock :: (PandocMonad m, ToSources a)
=> ReaderOptions
- -> Text
+ -> a
-> m Pandoc
-readHaddock opts s = case readHaddockEither opts (unpack (crFilter s)) of
+readHaddock opts s = case readHaddockEither opts
+ (unpack . sourcesToText . toSources $ s) of
Right result -> return result
Left e -> throwError e
@@ -92,7 +95,7 @@ docHToBlocks d' =
then ([], map toCells bodyRows)
else (toCells (head headerRows),
map toCells (tail headerRows ++ bodyRows))
- colspecs = replicate (maximum (map length body))
+ colspecs = replicate (maybe 0 maximum (nonEmpty (map length body)))
(AlignDefault, ColWidthDefault)
in B.table B.emptyCaption
colspecs
@@ -128,7 +131,8 @@ docHToInlines isCode d' =
DocIdentifier s -> B.codeWith ("",["haskell","identifier"],[]) $ T.pack s
_ -> mempty
DocIdentifierUnchecked s -> B.codeWith ("",["haskell","identifier"],[]) $ T.pack s
- DocModule s -> B.codeWith ("",["haskell","module"],[]) $ T.pack s
+ DocModule s -> B.codeWith ("",["haskell","module"],[]) $
+ T.pack (modLinkName s)
DocWarning _ -> mempty -- TODO
DocEmphasis d -> B.emph (docHToInlines isCode d)
DocMonospaced (DocString s) -> B.code $ T.pack s
diff --git a/src/Text/Pandoc/Readers/Ipynb.hs b/src/Text/Pandoc/Readers/Ipynb.hs
index a866e6ec3..cd1093109 100644
--- a/src/Text/Pandoc/Readers/Ipynb.hs
+++ b/src/Text/Pandoc/Readers/Ipynb.hs
@@ -4,7 +4,7 @@
{-# LANGUAGE ScopedTypeVariables #-}
{- |
Module : Text.Pandoc.Readers.Ipynb
- Copyright : Copyright (C) 2019-2020 John MacFarlane
+ Copyright : Copyright (C) 2019-2021 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -39,10 +39,12 @@ import Data.Aeson as Aeson
import Control.Monad.Except (throwError)
import Text.Pandoc.Readers.Markdown (readMarkdown)
import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
-readIpynb :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-readIpynb opts t = do
- let src = BL.fromStrict (TE.encodeUtf8 t)
+readIpynb :: (PandocMonad m, ToSources a)
+ => ReaderOptions -> a -> m Pandoc
+readIpynb opts x = do
+ let src = BL.fromStrict . TE.encodeUtf8 . sourcesToText $ toSources x
case eitherDecode src of
Right (notebook4 :: Notebook NbV4) -> notebookToPandoc opts notebook4
Left _ ->
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index c638da519..9cdbf1611 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -14,7 +14,9 @@ Conversion of JATS XML to 'Pandoc' document.
module Text.Pandoc.Readers.JATS ( readJATS ) where
import Control.Monad.State.Strict
-import Data.Char (isDigit, isSpace, toUpper)
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Error (PandocError(..))
+import Data.Char (isDigit, isSpace)
import Data.Default
import Data.Generics
import Data.List (foldl', intersperse)
@@ -22,15 +24,17 @@ import qualified Data.Map as Map
import Data.Maybe (maybeToList, fromMaybe)
import Data.Text (Text)
import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
import Text.HTML.TagSoup.Entity (lookupEntity)
import Text.Pandoc.Builder
import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Options
-import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
+import Text.Pandoc.Shared (safeRead, extractSpaces)
import Text.TeXMath (readMathML, writeTeX)
-import Text.XML.Light
+import Text.Pandoc.XML.Light
import qualified Data.Set as S (fromList, member)
import Data.Set ((\\))
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
type JATS m = StateT JATSState m
@@ -49,42 +53,28 @@ instance Default JATSState where
, jatsContent = [] }
-readJATS :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readJATS :: (PandocMonad m, ToSources a)
+ => ReaderOptions
+ -> a
+ -> m Pandoc
readJATS _ inp = do
- let tree = normalizeTree . parseXML
- $ T.unpack $ crFilter inp
+ let sources = toSources inp
+ tree <- either (throwError . PandocXMLError "") return $
+ parseXMLContents (TL.fromStrict . sourcesToText $ sources)
(bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree
return $ Pandoc (jatsMeta st') (toList . mconcat $ bs)
--- normalize input, consolidating adjacent Text and CRef elements
-normalizeTree :: [Content] -> [Content]
-normalizeTree = everywhere (mkT go)
- where go :: [Content] -> [Content]
- go (Text (CData CDataRaw _ _):xs) = xs
- go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
- Text (CData CDataText (s1 ++ s2) z):xs
- go (Text (CData CDataText s1 z):CRef r:xs) =
- Text (CData CDataText (s1 ++ convertEntity r) z):xs
- go (CRef r:Text (CData CDataText s1 z):xs) =
- Text (CData CDataText (convertEntity r ++ s1) z):xs
- go (CRef r1:CRef r2:xs) =
- Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
- go xs = xs
-
-convertEntity :: String -> String
-convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
-
-- convenience function to get an attribute value, defaulting to ""
-attrValue :: String -> Element -> Text
+attrValue :: Text -> Element -> Text
attrValue attr =
fromMaybe "" . maybeAttrValue attr
-maybeAttrValue :: String -> Element -> Maybe Text
+maybeAttrValue :: Text -> Element -> Maybe Text
maybeAttrValue attr elt =
- T.pack <$> lookupAttrBy (\x -> qName x == attr) (elAttribs elt)
+ lookupAttrBy (\x -> qName x == attr) (elAttribs elt)
-- convenience function
-named :: String -> Element -> Bool
+named :: Text -> Element -> Bool
named s e = qName (elName e) == s
--
@@ -150,10 +140,10 @@ getBlocks e = mconcat <$>
parseBlock :: PandocMonad m => Content -> JATS m Blocks
parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
-parseBlock (Text (CData _ s _)) = if all isSpace s
+parseBlock (Text (CData _ s _)) = if T.all isSpace s
then return mempty
- else return $ plain $ trimInlines $ text $ T.pack s
-parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x
+ else return $ plain $ trimInlines $ text s
+parseBlock (CRef x) = return $ plain $ str $ T.toUpper x
parseBlock (Elem e) =
case qName (elName e) of
"p" -> parseMixed para (elContent e)
@@ -202,7 +192,7 @@ parseBlock (Elem e) =
"" -> []
x -> [x]
return $ codeBlockWith (attrValue "id" e, classes', [])
- $ trimNl $ textContentRecursive e
+ $ trimNl $ strContentRecursive e
parseBlockquote = do
attrib <- case filterChild (named "attribution") e of
Nothing -> return mempty
@@ -266,7 +256,7 @@ parseBlock (Elem e) =
Just "center" -> AlignCenter
_ -> AlignDefault
let toWidth c = do
- w <- findAttrText (unqual "colwidth") c
+ w <- findAttr (unqual "colwidth") c
n <- safeRead $ "0" <> T.filter (\x -> isDigit x || x == '.') w
if n > 0 then Just n else Nothing
let numrows = foldl' max 0 $ map length bodyrows
@@ -437,16 +427,10 @@ parseRef e = do
Nothing -> return $ Map.insert "id" (toMetaValue refId) mempty
-- TODO handle mixed-citation
-findAttrText :: QName -> Element -> Maybe Text
-findAttrText x = fmap T.pack . findAttr x
-
textContent :: Element -> Text
-textContent = T.pack . strContent
-
-textContentRecursive :: Element -> Text
-textContentRecursive = T.pack . strContentRecursive
+textContent = strContent
-strContentRecursive :: Element -> String
+strContentRecursive :: Element -> Text
strContentRecursive = strContent .
(\e' -> e'{ elContent = map elementToStr $ elContent e' })
@@ -455,9 +439,9 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
elementToStr x = x
parseInline :: PandocMonad m => Content -> JATS m Inlines
-parseInline (Text (CData _ s _)) = return $ text $ T.pack s
-parseInline (CRef ref) =
- return . text . maybe (T.toUpper $ T.pack ref) T.pack $ lookupEntity ref
+parseInline (Text (CData _ s _)) = return $ text s
+parseInline (CRef ref) = return $ maybe (text $ T.toUpper ref) (text . T.pack)
+ $ lookupEntity (T.unpack ref)
parseInline (Elem e) =
case qName (elName e) of
"italic" -> innerInlines emph
@@ -502,9 +486,9 @@ parseInline (Elem e) =
else linkWith attr ("#" <> rid) "" ils
"ext-link" -> do
ils <- innerInlines id
- let title = fromMaybe "" $ findAttrText (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e
+ let title = fromMaybe "" $ findAttr (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e
let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
- Just h -> T.pack h
+ Just h -> h
_ -> "#" <> attrValue "rid" e
let ils' = if ils == mempty then str href else ils
let attr = (attrValue "id" e, [], [])
@@ -512,7 +496,8 @@ parseInline (Elem e) =
"disp-formula" -> formula displayMath
"inline-formula" -> formula math
- "math" | qPrefix (elName e) == Just "mml" -> return . math $ mathML e
+ "math" | qURI (elName e) == Just "http://www.w3.org/1998/Math/MathML"
+ -> return . math $ mathML e
"tex-math" -> return . math $ textContent e
"email" -> return $ link ("mailto:" <> textContent e) ""
@@ -524,7 +509,7 @@ parseInline (Elem e) =
where innerInlines f = extractSpaces f . mconcat <$>
mapM parseInline (elContent e)
mathML x =
- case readMathML . T.pack . showElement $ everywhere (mkT removePrefix) x of
+ case readMathML . showElement $ everywhere (mkT removePrefix) x of
Left _ -> mempty
Right m -> writeTeX m
formula constructor = do
@@ -535,11 +520,12 @@ parseInline (Elem e) =
filterChildren isMathML whereToLook
return . mconcat . take 1 . map constructor $ texMaths ++ mathMLs
- isMathML x = qName (elName x) == "math" &&
- qPrefix (elName x) == Just "mml"
+ isMathML x = qName (elName x) == "math" &&
+ qURI (elName x) ==
+ Just "http://www.w3.org/1998/Math/MathML"
removePrefix elname = elname { qPrefix = Nothing }
codeWithLang = do
let classes' = case attrValue "language" e of
"" -> []
l -> [l]
- return $ codeWith (attrValue "id" e,classes',[]) $ textContentRecursive e
+ return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e
diff --git a/src/Text/Pandoc/Readers/Jira.hs b/src/Text/Pandoc/Readers/Jira.hs
index 9266ce10d..cf111f173 100644
--- a/src/Text/Pandoc/Readers/Jira.hs
+++ b/src/Text/Pandoc/Readers/Jira.hs
@@ -2,7 +2,7 @@
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org
- Copyright : © 2019-2020 Albert Krewinkel
+ Copyright : © 2019-2021 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -20,18 +20,20 @@ import Text.Pandoc.Builder hiding (cell)
import Text.Pandoc.Error (PandocError (PandocParseError))
import Text.Pandoc.Options (ReaderOptions)
import Text.Pandoc.Shared (stringify)
-
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
import qualified Text.Jira.Markup as Jira
-- | Read Jira wiki markup.
-readJira :: PandocMonad m
+readJira :: (PandocMonad m, ToSources a)
=> ReaderOptions
- -> Text
+ -> a
-> m Pandoc
-readJira _opts s = case parse s of
- Right d -> return $ jiraToPandoc d
- Left e -> throwError . PandocParseError $
- "Jira parse error" `append` pack (show e)
+readJira _opts inp = do
+ let sources = toSources inp
+ case parse (sourcesToText sources) of
+ Right d -> return $ jiraToPandoc d
+ Left e -> throwError . PandocParseError $
+ "Jira parse error" `append` pack (show e)
jiraToPandoc :: Jira.Doc -> Pandoc
jiraToPandoc (Jira.Doc blks) = doc $ foldMap jiraToPandocBlocks blks
@@ -71,10 +73,10 @@ toPandocCodeBlocks langMay params txt =
Nothing -> []
in codeBlockWith ("", classes, map paramToPair params) txt
--- | Create a pandoc @'Div'@
+-- | Create a pandoc @'Div'@ from a panel.
toPandocDiv :: [Jira.Parameter] -> [Jira.Block] -> Blocks
toPandocDiv params =
- divWith ("", [], map paramToPair params) . foldMap jiraToPandocBlocks
+ divWith ("", ["panel"], map paramToPair params) . foldMap jiraToPandocBlocks
paramToPair :: Jira.Parameter -> (Text, Text)
paramToPair (Jira.Parameter key value) = (key, value)
@@ -170,6 +172,8 @@ jiraLinkToPandoc linkType alias url =
Jira.Email -> link ("mailto:" <> url') "" alias'
Jira.Attachment -> linkWith ("", ["attachment"], []) url' "" alias'
Jira.User -> linkWith ("", ["user-account"], []) url' "" alias'
+ Jira.SmartCard -> linkWith ("", ["smart-card"], []) url' "" alias'
+ Jira.SmartLink -> linkWith ("", ["smart-link"], []) url' "" alias'
-- | Get unicode representation of a Jira icon.
iconUnicode :: Jira.Icon -> Text
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index cdccaa535..27c018e73 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1,14 +1,10 @@
-{-# LANGUAGE BangPatterns #-}
-{-# LANGUAGE CPP #-}
-{-# LANGUAGE FlexibleInstances #-}
-{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
{- |
Module : Text.Pandoc.Readers.LaTeX
- Copyright : Copyright (C) 2006-2020 John MacFarlane
+ Copyright : Copyright (C) 2006-2021 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -22,50 +18,58 @@ module Text.Pandoc.Readers.LaTeX ( readLaTeX,
applyMacros,
rawLaTeXInline,
rawLaTeXBlock,
- inlineCommand,
- tokenize,
- untokenize
+ inlineCommand
) where
import Control.Applicative (many, optional, (<|>))
import Control.Monad
import Control.Monad.Except (throwError)
-import Data.Char (isDigit, isLetter, toUpper, chr)
+import Data.Char (isDigit, isLetter, isAlphaNum, toUpper, chr)
import Data.Default
-import Data.Functor (($>))
import Data.List (intercalate)
import qualified Data.Map as M
import Data.Maybe (fromMaybe, maybeToList)
import qualified Data.Set as Set
-import qualified Data.Sequence as Seq
import Data.Text (Text)
import qualified Data.Text as T
+import Skylighting (defaultSyntaxMap)
import System.FilePath (addExtension, replaceExtension, takeExtension)
-import Text.Pandoc.BCP47 (Lang (..), renderLang)
-import Text.Pandoc.Builder
+import Text.Collate.Lang (renderLang)
+import Text.Pandoc.Builder as B
import Text.Pandoc.Class.PandocPure (PandocPure)
import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath,
- readFileFromDirs, report, setResourcePath,
- setTranslations, translateTerm)
+ readFileFromDirs, report,
+ setResourcePath)
import Text.Pandoc.Error (PandocError (PandocParseError, PandocParsecError))
-import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension)
+import Text.Pandoc.Highlighting (languagesByExtension)
import Text.Pandoc.ImageSize (numUnit, showFl)
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
optional, space, spaces, withRaw, (<|>))
-import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
- ArgSpec (..), Tok (..), TokType (..))
+import Text.Pandoc.Readers.LaTeX.Types (Tok (..), TokType (..))
import Text.Pandoc.Readers.LaTeX.Parsing
-import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
- babelLangToBCP47)
-import Text.Pandoc.Readers.LaTeX.SIunitx
+import Text.Pandoc.Readers.LaTeX.Citation (citationCommands, cites)
+import Text.Pandoc.Readers.LaTeX.Math (dollarsMath, inlineEnvironments,
+ inlineEnvironment,
+ mathDisplay, mathInline,
+ newtheorem, theoremstyle, proof,
+ theoremEnvironment)
+import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments)
+import Text.Pandoc.Readers.LaTeX.Macro (macroDef)
+import Text.Pandoc.Readers.LaTeX.Lang (inlineLanguageCommands,
+ enquoteCommands,
+ babelLangToBCP47, setDefaultLanguage)
+import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands)
+import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands,
+ nameCommands, charCommands,
+ accentCommands,
+ biblatexInlineCommands,
+ verbCommands, rawInlineOr,
+ listingsLanguage)
import Text.Pandoc.Shared
-import qualified Text.Pandoc.Translations as Translations
import Text.Pandoc.Walk
-import qualified Text.Pandoc.Builder as B
-import qualified Data.Text.Normalize as Normalize
-import Safe
+import Data.List.NonEmpty (nonEmpty)
-- for debugging:
-- import Text.Pandoc.Extensions (getDefaultExtensions)
@@ -73,16 +77,17 @@ import Safe
-- import Debug.Trace (traceShowId)
-- | Parse LaTeX from string and return 'Pandoc' document.
-readLaTeX :: PandocMonad m
+readLaTeX :: (PandocMonad m, ToSources a)
=> ReaderOptions -- ^ Reader options
- -> Text -- ^ String to parse (assumes @'\n'@ line endings)
+ -> a -- ^ Input to parse
-> m Pandoc
readLaTeX opts ltx = do
+ let sources = toSources ltx
parsed <- runParserT parseLaTeX def{ sOptions = opts } "source"
- (tokenize "source" (crFilter ltx))
+ (tokenizeSources sources)
case parsed of
Right result -> return result
- Left e -> throwError $ PandocParsecError ltx e
+ Left e -> throwError $ PandocParsecError sources e
parseLaTeX :: PandocMonad m => LP m Pandoc
parseLaTeX = do
@@ -93,11 +98,7 @@ parseLaTeX = do
let doc' = doc bs
let headerLevel (Header n _ _) = [n]
headerLevel _ = []
-#if MIN_VERSION_safe(0,3,18)
- let bottomLevel = minimumBound 1 $ query headerLevel doc'
-#else
- let bottomLevel = minimumDef 1 $ query headerLevel doc'
-#endif
+ let bottomLevel = maybe 1 minimum $ nonEmpty $ query headerLevel doc'
let adjustHeaders m (Header n attr ils) = Header (n+m) attr ils
adjustHeaders _ x = x
let (Pandoc _ bs') =
@@ -132,11 +133,10 @@ resolveRefs _ x = x
rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s)
- => ParserT Text s m Text
+ => ParserT Sources s m Text
rawLaTeXBlock = do
lookAhead (try (char '\\' >> letter))
- inp <- getInput
- let toks = tokenize "source" inp
+ toks <- getInputTokens
snd <$> (rawLaTeXParser toks False (macroDef (const mempty)) blocks
<|> rawLaTeXParser toks True
(do choice (map controlSeq
@@ -163,14 +163,13 @@ beginOrEndCommand = try $ do
(txt <> untokenize rawargs)
rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s)
- => ParserT Text s m Text
+ => ParserT Sources s m Text
rawLaTeXInline = do
lookAhead (try (char '\\' >> letter))
- inp <- getInput
- let toks = tokenize "source" inp
+ toks <- getInputTokens
raw <- snd <$>
( rawLaTeXParser toks True
- (mempty <$ (controlSeq "input" >> skipMany opt >> braced))
+ (mempty <$ (controlSeq "input" >> skipMany rawopt >> braced))
inlines
<|> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand')
inlines
@@ -178,11 +177,10 @@ rawLaTeXInline = do
finalbraces <- mconcat <$> many (try (string "{}")) -- see #5439
return $ raw <> T.pack finalbraces
-inlineCommand :: PandocMonad m => ParserT Text ParserState m Inlines
+inlineCommand :: PandocMonad m => ParserT Sources ParserState m Inlines
inlineCommand = do
lookAhead (try (char '\\' >> letter))
- inp <- getInput
- let toks = tokenize "source" inp
+ toks <- getInputTokens
fst <$> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand')
inlines
@@ -191,12 +189,6 @@ inlineCommand = do
word :: PandocMonad m => LP m Inlines
word = str . untoken <$> satisfyTok isWordTok
-regularSymbol :: PandocMonad m => LP m Inlines
-regularSymbol = str . untoken <$> satisfyTok isRegularSymbol
- where isRegularSymbol (Tok _ Symbol t) = not $ T.any isSpecial t
- isRegularSymbol _ = False
- isSpecial c = c `Set.member` specialChars
-
inlineGroup :: PandocMonad m => LP m Inlines
inlineGroup = do
ils <- grouped inline
@@ -237,19 +229,6 @@ mkImage options (T.unpack -> src) = do
_ -> return src
return $ imageWith attr (T.pack src') "" alt
-doxspace :: PandocMonad m => LP m Inlines
-doxspace =
- (space <$ lookAhead (satisfyTok startsWithLetter)) <|> return mempty
- where startsWithLetter (Tok _ Word t) =
- case T.uncons t of
- Just (c, _) | isLetter c -> True
- _ -> False
- startsWithLetter _ = False
-
-
-lit :: Text -> LP m Inlines
-lit = pure . str
-
removeDoubleQuotes :: Text -> Text
removeDoubleQuotes t =
Data.Maybe.fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\""
@@ -296,23 +275,14 @@ quoted' f starter ender = do
cs -> cs)
else lit startchs
-enquote :: PandocMonad m => Bool -> Maybe Text -> LP m Inlines
-enquote starred mblang = do
- skipopts
- let lang = mblang >>= babelLangToBCP47
- let langspan = case lang of
- Nothing -> id
- Just l -> spanWith ("",[],[("lang", renderLang l)])
- quoteContext <- sQuoteContext <$> getState
- if starred || quoteContext == InDoubleQuote
- then singleQuoted . langspan <$> withQuoteContext InSingleQuote tok
- else doubleQuoted . langspan <$> withQuoteContext InDoubleQuote tok
+lit :: Text -> LP m Inlines
+lit = pure . str
blockquote :: PandocMonad m => Bool -> Maybe Text -> LP m Blocks
blockquote cvariant mblang = do
citepar <- if cvariant
then (\xs -> para (cite xs mempty))
- <$> cites NormalCitation False
+ <$> cites inline NormalCitation False
else option mempty $ para <$> bracketed inline
let lang = mblang >>= babelLangToBCP47
let langdiv = case lang of
@@ -323,224 +293,13 @@ blockquote cvariant mblang = do
optional $ symbolIn (".:;?!" :: [Char]) -- currently ignored
return $ blockQuote . langdiv $ (bs <> citepar)
-doAcronym :: PandocMonad m => Text -> LP m Inlines
-doAcronym form = do
- acro <- braced
- return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro),
- ("acronym-form", "singular+" <> form)])
- $ str $ untokenize acro]
-
-doAcronymPlural :: PandocMonad m => Text -> LP m Inlines
-doAcronymPlural form = do
- acro <- braced
- plural <- lit "s"
- return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro),
- ("acronym-form", "plural+" <> form)]) $
- mconcat [str $ untokenize acro, plural]]
-
-doverb :: PandocMonad m => LP m Inlines
-doverb = do
- Tok _ Symbol t <- anySymbol
- marker <- case T.uncons t of
- Just (c, ts) | T.null ts -> return c
- _ -> mzero
- withVerbatimMode $
- code . untokenize <$>
- manyTill (notFollowedBy newlineTok >> verbTok marker) (symbol marker)
-
-verbTok :: PandocMonad m => Char -> LP m Tok
-verbTok stopchar = do
- t@(Tok pos toktype txt) <- anyTok
- case T.findIndex (== stopchar) txt of
- Nothing -> return t
- Just i -> do
- let (t1, t2) = T.splitAt i txt
- inp <- getInput
- setInput $ Tok (incSourceColumn pos i) Symbol (T.singleton stopchar)
- : totoks (incSourceColumn pos (i + 1)) (T.drop 1 t2) ++ inp
- return $ Tok pos toktype t1
-
-listingsLanguage :: [(Text, Text)] -> Maybe Text
-listingsLanguage opts =
- case lookup "language" opts of
- Nothing -> Nothing
- Just l -> fromListingsLanguage l `mplus` Just l
-
-dolstinline :: PandocMonad m => LP m Inlines
-dolstinline = do
- options <- option [] keyvals
- let classes = maybeToList $ listingsLanguage options
- doinlinecode classes
-
-domintinline :: PandocMonad m => LP m Inlines
-domintinline = do
- skipopts
- cls <- untokenize <$> braced
- doinlinecode [cls]
-
-doinlinecode :: PandocMonad m => [Text] -> LP m Inlines
-doinlinecode classes = do
- Tok _ Symbol t <- anySymbol
- marker <- case T.uncons t of
- Just (c, ts) | T.null ts -> return c
- _ -> mzero
- let stopchar = if marker == '{' then '}' else marker
- withVerbatimMode $
- codeWith ("",classes,[]) . T.map nlToSpace . untokenize <$>
- manyTill (verbTok stopchar) (symbol stopchar)
-
-nlToSpace :: Char -> Char
-nlToSpace '\n' = ' '
-nlToSpace x = x
-
-mathDisplay :: Text -> Inlines
-mathDisplay = displayMath . trimMath
-
-mathInline :: Text -> Inlines
-mathInline = math . trimMath
-
-dollarsMath :: PandocMonad m => LP m Inlines
-dollarsMath = do
- symbol '$'
- display <- option False (True <$ symbol '$')
- (do contents <- try $ untokenize <$> pDollarsMath 0
- if display
- then mathDisplay contents <$ symbol '$'
- else return $ mathInline contents)
- <|> (guard display >> return (mathInline ""))
-
--- Int is number of embedded groupings
-pDollarsMath :: PandocMonad m => Int -> LP m [Tok]
-pDollarsMath n = do
- tk@(Tok _ toktype t) <- anyTok
- case toktype of
- Symbol | t == "$"
- , n == 0 -> return []
- | t == "\\" -> do
- tk' <- anyTok
- (tk :) . (tk' :) <$> pDollarsMath n
- | t == "{" -> (tk :) <$> pDollarsMath (n+1)
- | t == "}" ->
- if n > 0
- then (tk :) <$> pDollarsMath (n-1)
- else mzero
- _ -> (tk :) <$> pDollarsMath n
-
--- citations
-
-addPrefix :: [Inline] -> [Citation] -> [Citation]
-addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
-addPrefix _ _ = []
-
-addSuffix :: [Inline] -> [Citation] -> [Citation]
-addSuffix s ks@(_:_) =
- let k = last ks
- in init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
-addSuffix _ _ = []
-
-simpleCiteArgs :: PandocMonad m => LP m [Citation]
-simpleCiteArgs = try $ do
- first <- optionMaybe $ toList <$> opt
- second <- optionMaybe $ toList <$> opt
- keys <- try $ bgroup *> manyTill citationLabel egroup
- let (pre, suf) = case (first , second ) of
- (Just s , Nothing) -> (mempty, s )
- (Just s , Just t ) -> (s , t )
- _ -> (mempty, mempty)
- conv k = Citation { citationId = k
- , citationPrefix = []
- , citationSuffix = []
- , citationMode = NormalCitation
- , citationHash = 0
- , citationNoteNum = 0
- }
- return $ addPrefix pre $ addSuffix suf $ map conv keys
-
-citationLabel :: PandocMonad m => LP m Text
-citationLabel = do
- sp
- untokenize <$>
- (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar)
- <* sp
- <* optional (symbol ',')
- <* sp)
- where bibtexKeyChar = ".:;?!`'()/*@_+=-&[]" :: [Char]
-
-cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation]
-cites mode multi = try $ do
- cits <- if multi
- then do
- multiprenote <- optionMaybe $ toList <$> paropt
- multipostnote <- optionMaybe $ toList <$> paropt
- let (pre, suf) = case (multiprenote, multipostnote) of
- (Just s , Nothing) -> (mempty, s)
- (Nothing , Just t) -> (mempty, t)
- (Just s , Just t ) -> (s, t)
- _ -> (mempty, mempty)
- tempCits <- many1 simpleCiteArgs
- case tempCits of
- (k:ks) -> case ks of
- (_:_) -> return $ (addMprenote pre k : init ks) ++
- [addMpostnote suf (last ks)]
- _ -> return [addMprenote pre (addMpostnote suf k)]
- _ -> return [[]]
- else count 1 simpleCiteArgs
- let cs = concat cits
- return $ case mode of
- AuthorInText -> case cs of
- (c:rest) -> c {citationMode = mode} : rest
- [] -> []
- _ -> map (\a -> a {citationMode = mode}) cs
- where mprenote (k:ks) = (k:ks) ++ [Space]
- mprenote _ = mempty
- mpostnote (k:ks) = [Str ",", Space] ++ (k:ks)
- mpostnote _ = mempty
- addMprenote mpn (k:ks) =
- let mpnfinal = case citationPrefix k of
- (_:_) -> mprenote mpn
- _ -> mpn
- in addPrefix mpnfinal (k:ks)
- addMprenote _ _ = []
- addMpostnote = addSuffix . mpostnote
-
-citation :: PandocMonad m => Text -> CitationMode -> Bool -> LP m Inlines
-citation name mode multi = do
- (c,raw) <- withRaw $ cites mode multi
- return $ cite c (rawInline "latex" $ "\\" <> name <> untokenize raw)
-
-handleCitationPart :: Inlines -> [Citation]
-handleCitationPart ils =
- let isCite Cite{} = True
- isCite _ = False
- (pref, rest) = break isCite (toList ils)
- in case rest of
- (Cite cs _:suff) -> addPrefix pref $ addSuffix suff cs
- _ -> []
-
-complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines
-complexNatbibCitation mode = try $ do
- (cs, raw) <-
- withRaw $ concat <$> do
- bgroup
- items <- mconcat <$>
- many1 (notFollowedBy (symbol ';') >> inline)
- `sepBy1` symbol ';'
- egroup
- return $ map handleCitationPart items
- case cs of
- [] -> mzero
- (c:cits) -> return $ cite (c{ citationMode = mode }:cits)
- (rawInline "latex" $ "\\citetext" <> untokenize raw)
-
-inNote :: Inlines -> Inlines
-inNote ils =
- note $ para $ ils <> str "."
-
inlineCommand' :: PandocMonad m => LP m Inlines
inlineCommand' = try $ do
Tok _ (CtrlSeq name) cmd <- anyControlSeq
guard $ name /= "begin" && name /= "end" && name /= "and"
- star <- option "" ("*" <$ symbol '*' <* sp)
+ star <- if T.all isAlphaNum name
+ then option "" ("*" <$ symbol '*' <* sp)
+ else pure ""
overlay <- option "" overlaySpecification
let name' = name <> star <> overlay
let names = ordNub [name', name] -- check non-starred as fallback
@@ -551,28 +310,8 @@ inlineCommand' = try $ do
<|> ignore rawcommand
lookupListDefault raw names inlineCommands
-
tok :: PandocMonad m => LP m Inlines
-tok = try $ spaces >> grouped inline <|> inlineCommand' <|> singleChar'
- where singleChar' = do
- Tok _ _ t <- singleChar
- return $ str t
-
-opt :: PandocMonad m => LP m Inlines
-opt = do
- toks <- try (sp *> bracketedToks <* sp)
- -- now parse the toks as inlines
- st <- getState
- parsed <- runParserT (mconcat <$> many inline) st "bracketed option" toks
- case parsed of
- Right result -> return result
- Left e -> throwError $ PandocParsecError (untokenize toks) e
-
-paropt :: PandocMonad m => LP m Inlines
-paropt = parenWrapped inline
-
-inBrackets :: Inlines -> Inlines
-inBrackets x = str "[" <> x <> str "]"
+tok = tokWith inline
unescapeURL :: Text -> Text
unescapeURL = T.concat . go . T.splitOn "\\"
@@ -585,381 +324,109 @@ unescapeURL = T.concat . go . T.splitOn "\\"
, isEscapable c = t
| otherwise = "\\" <> t
-mathEnvWith :: PandocMonad m
- => (Inlines -> a) -> Maybe Text -> Text -> LP m a
-mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name
- where inner x = case innerEnv of
- Nothing -> x
- Just y -> "\\begin{" <> y <> "}\n" <> x <>
- "\\end{" <> y <> "}"
-
-mathEnv :: PandocMonad m => Text -> LP m Text
-mathEnv name = do
- skipopts
- optional blankline
- res <- manyTill anyTok (end_ name)
- return $ stripTrailingNewlines $ untokenize res
-
-inlineEnvironment :: PandocMonad m => LP m Inlines
-inlineEnvironment = try $ do
- controlSeq "begin"
- name <- untokenize <$> braced
- M.findWithDefault mzero name inlineEnvironments
-
-inlineEnvironments :: PandocMonad m => M.Map Text (LP m Inlines)
-inlineEnvironments = M.fromList [
- ("displaymath", mathEnvWith id Nothing "displaymath")
- , ("math", math <$> mathEnv "math")
- , ("equation", mathEnvWith id Nothing "equation")
- , ("equation*", mathEnvWith id Nothing "equation*")
- , ("gather", mathEnvWith id (Just "gathered") "gather")
- , ("gather*", mathEnvWith id (Just "gathered") "gather*")
- , ("multline", mathEnvWith id (Just "gathered") "multline")
- , ("multline*", mathEnvWith id (Just "gathered") "multline*")
- , ("eqnarray", mathEnvWith id (Just "aligned") "eqnarray")
- , ("eqnarray*", mathEnvWith id (Just "aligned") "eqnarray*")
- , ("align", mathEnvWith id (Just "aligned") "align")
- , ("align*", mathEnvWith id (Just "aligned") "align*")
- , ("alignat", mathEnvWith id (Just "aligned") "alignat")
- , ("alignat*", mathEnvWith id (Just "aligned") "alignat*")
- , ("dmath", mathEnvWith id Nothing "dmath")
- , ("dmath*", mathEnvWith id Nothing "dmath*")
- , ("dgroup", mathEnvWith id (Just "aligned") "dgroup")
- , ("dgroup*", mathEnvWith id (Just "aligned") "dgroup*")
- , ("darray", mathEnvWith id (Just "aligned") "darray")
- , ("darray*", mathEnvWith id (Just "aligned") "darray*")
- ]
-
inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines)
-inlineCommands = M.union inlineLanguageCommands $ M.fromList
- [ ("emph", extractSpaces emph <$> tok)
- , ("textit", extractSpaces emph <$> tok)
- , ("textsl", extractSpaces emph <$> tok)
- , ("textsc", extractSpaces smallcaps <$> tok)
- , ("textsf", extractSpaces (spanWith ("",["sans-serif"],[])) <$> tok)
- , ("textmd", extractSpaces (spanWith ("",["medium"],[])) <$> tok)
- , ("textrm", extractSpaces (spanWith ("",["roman"],[])) <$> tok)
- , ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok)
- , ("texttt", ttfamily)
- , ("sout", extractSpaces strikeout <$> tok)
- , ("alert", skipopts >> spanWith ("",["alert"],[]) <$> tok) -- beamer
- , ("lq", return (str "‘"))
- , ("rq", return (str "’"))
- , ("textquoteleft", return (str "‘"))
- , ("textquoteright", return (str "’"))
- , ("textquotedblleft", return (str "“"))
- , ("textquotedblright", return (str "”"))
- , ("textsuperscript", extractSpaces superscript <$> tok)
- , ("textsubscript", extractSpaces subscript <$> tok)
- , ("textbackslash", lit "\\")
- , ("backslash", lit "\\")
- , ("slash", lit "/")
- , ("textbf", extractSpaces strong <$> tok)
- , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok)
- , ("underline", underline <$> tok)
- , ("ldots", lit "…")
- , ("vdots", lit "\8942")
- , ("dots", lit "…")
- , ("mdots", lit "…")
- , ("sim", lit "~")
- , ("sep", lit ",")
- , ("label", rawInlineOr "label" dolabel)
- , ("ref", rawInlineOr "ref" $ doref "ref")
- , ("cref", rawInlineOr "cref" $ doref "ref") -- from cleveref.sty
- , ("vref", rawInlineOr "vref" $ doref "ref+page") -- from varioref.sty
- , ("eqref", rawInlineOr "eqref" $ doref "eqref") -- from amsmath.sty
- , ("mbox", rawInlineOr "mbox" $ processHBox <$> tok)
- , ("hbox", rawInlineOr "hbox" $ processHBox <$> tok)
- , ("lettrine", rawInlineOr "lettrine" lettrine)
- , ("(", mathInline . untokenize <$> manyTill anyTok (controlSeq ")"))
- , ("[", mathDisplay . untokenize <$> manyTill anyTok (controlSeq "]"))
- , ("ensuremath", mathInline . untokenize <$> braced)
- , ("texorpdfstring", const <$> tok <*> tok)
- , ("P", lit "¶")
- , ("S", lit "§")
- , ("$", lit "$")
- , ("%", lit "%")
- , ("&", lit "&")
- , ("#", lit "#")
- , ("_", lit "_")
- , ("{", lit "{")
- , ("}", lit "}")
- , ("qed", lit "\a0\x25FB")
- -- old TeX commands
- , ("em", extractSpaces emph <$> inlines)
- , ("it", extractSpaces emph <$> inlines)
- , ("sl", extractSpaces emph <$> inlines)
- , ("bf", extractSpaces strong <$> inlines)
- , ("tt", code . stringify . toList <$> inlines)
- , ("rm", inlines)
- , ("itshape", extractSpaces emph <$> inlines)
- , ("slshape", extractSpaces emph <$> inlines)
- , ("scshape", extractSpaces smallcaps <$> inlines)
- , ("bfseries", extractSpaces strong <$> inlines)
- , ("MakeUppercase", makeUppercase <$> tok)
- , ("MakeTextUppercase", makeUppercase <$> tok) -- textcase
- , ("uppercase", makeUppercase <$> tok)
- , ("MakeLowercase", makeLowercase <$> tok)
- , ("MakeTextLowercase", makeLowercase <$> tok)
- , ("lowercase", makeLowercase <$> tok)
- , ("/", pure mempty) -- italic correction
- , ("aa", lit "å")
- , ("AA", lit "Å")
- , ("ss", lit "ß")
- , ("o", lit "ø")
- , ("O", lit "Ø")
- , ("L", lit "Ł")
- , ("l", lit "ł")
- , ("ae", lit "æ")
- , ("AE", lit "Æ")
- , ("oe", lit "œ")
- , ("OE", lit "Œ")
- , ("pounds", lit "£")
- , ("euro", lit "€")
- , ("copyright", lit "©")
- , ("textasciicircum", lit "^")
- , ("textasciitilde", lit "~")
- , ("H", accent '\779' Nothing) -- hungarumlaut
- , ("`", accent '\768' (Just '`')) -- grave
- , ("'", accent '\769' (Just '\'')) -- acute
- , ("^", accent '\770' (Just '^')) -- circ
- , ("~", accent '\771' (Just '~')) -- tilde
- , ("\"", accent '\776' Nothing) -- umlaut
- , (".", accent '\775' Nothing) -- dot
- , ("=", accent '\772' Nothing) -- macron
- , ("|", accent '\781' Nothing) -- vertical line above
- , ("b", accent '\817' Nothing) -- macron below
- , ("c", accent '\807' Nothing) -- cedilla
- , ("G", accent '\783' Nothing) -- doublegrave
- , ("h", accent '\777' Nothing) -- hookabove
- , ("d", accent '\803' Nothing) -- dotbelow
- , ("f", accent '\785' Nothing) -- inverted breve
- , ("r", accent '\778' Nothing) -- ringabove
- , ("t", accent '\865' Nothing) -- double inverted breve
- , ("U", accent '\782' Nothing) -- double vertical line above
- , ("v", accent '\780' Nothing) -- hacek
- , ("u", accent '\774' Nothing) -- breve
- , ("k", accent '\808' Nothing) -- ogonek
- , ("textogonekcentered", accent '\808' Nothing) -- ogonek
- , ("i", lit "ı") -- dotless i
- , ("j", lit "ȷ") -- dotless j
- , ("newtie", accent '\785' Nothing) -- inverted breve
- , ("textcircled", accent '\8413' Nothing) -- combining circle
- , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState
- guard $ not inTableCell
- optional opt
- spaces))
- , (",", lit "\8198")
- , ("@", pure mempty)
- , (" ", lit "\160")
- , ("ps", pure $ str "PS." <> space)
- , ("TeX", lit "TeX")
- , ("LaTeX", lit "LaTeX")
- , ("bar", lit "|")
- , ("textless", lit "<")
- , ("textgreater", lit ">")
- , ("thanks", skipopts >> note <$> grouped block)
- , ("footnote", skipopts >> note <$> grouped block)
- , ("passthrough", tok) -- \passthrough macro used by latex writer
- -- for listings
- , ("verb", doverb)
- , ("lstinline", dolstinline)
- , ("mintinline", domintinline)
- , ("Verb", doverb)
- , ("url", (\url -> link url "" (str url)) . unescapeURL . untokenize <$>
- bracedUrl)
- , ("nolinkurl", code . unescapeURL . untokenize <$> bracedUrl)
- , ("href", do url <- bracedUrl
- sp
- link (unescapeURL $ untokenize url) "" <$> tok)
- , ("includegraphics", do options <- option [] keyvals
- src <- braced
- mkImage options . unescapeURL . removeDoubleQuotes $
- untokenize src)
- , ("enquote*", enquote True Nothing)
- , ("enquote", enquote False Nothing)
- -- foreignquote is supposed to use native quote marks
- , ("foreignquote*", braced >>= enquote True . Just . untokenize)
- , ("foreignquote", braced >>= enquote False . Just . untokenize)
- -- hypehnquote uses regular quotes
- , ("hyphenquote*", braced >>= enquote True . Just . untokenize)
- , ("hyphenquote", braced >>= enquote False . Just . untokenize)
- , ("figurename", doTerm Translations.Figure)
- , ("prefacename", doTerm Translations.Preface)
- , ("refname", doTerm Translations.References)
- , ("bibname", doTerm Translations.Bibliography)
- , ("chaptername", doTerm Translations.Chapter)
- , ("partname", doTerm Translations.Part)
- , ("contentsname", doTerm Translations.Contents)
- , ("listfigurename", doTerm Translations.ListOfFigures)
- , ("listtablename", doTerm Translations.ListOfTables)
- , ("indexname", doTerm Translations.Index)
- , ("abstractname", doTerm Translations.Abstract)
- , ("tablename", doTerm Translations.Table)
- , ("enclname", doTerm Translations.Encl)
- , ("ccname", doTerm Translations.Cc)
- , ("headtoname", doTerm Translations.To)
- , ("pagename", doTerm Translations.Page)
- , ("seename", doTerm Translations.See)
- , ("seealsoname", doTerm Translations.SeeAlso)
- , ("proofname", doTerm Translations.Proof)
- , ("glossaryname", doTerm Translations.Glossary)
- , ("lstlistingname", doTerm Translations.Listing)
- , ("cite", citation "cite" NormalCitation False)
- , ("Cite", citation "Cite" NormalCitation False)
- , ("citep", citation "citep" NormalCitation False)
- , ("citep*", citation "citep*" NormalCitation False)
- , ("citeal", citation "citeal" NormalCitation False)
- , ("citealp", citation "citealp" NormalCitation False)
- , ("citealp*", citation "citealp*" NormalCitation False)
- , ("autocite", citation "autocite" NormalCitation False)
- , ("smartcite", citation "smartcite" NormalCitation False)
- , ("footcite", inNote <$> citation "footcite" NormalCitation False)
- , ("parencite", citation "parencite" NormalCitation False)
- , ("supercite", citation "supercite" NormalCitation False)
- , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False)
- , ("citeyearpar", citation "citeyearpar" SuppressAuthor False)
- , ("citeyear", citation "citeyear" SuppressAuthor False)
- , ("autocite*", citation "autocite*" SuppressAuthor False)
- , ("cite*", citation "cite*" SuppressAuthor False)
- , ("parencite*", citation "parencite*" SuppressAuthor False)
- , ("textcite", citation "textcite" AuthorInText False)
- , ("citet", citation "citet" AuthorInText False)
- , ("citet*", citation "citet*" AuthorInText False)
- , ("citealt", citation "citealt" AuthorInText False)
- , ("citealt*", citation "citealt*" AuthorInText False)
- , ("textcites", citation "textcites" AuthorInText True)
- , ("cites", citation "cites" NormalCitation True)
- , ("autocites", citation "autocites" NormalCitation True)
- , ("footcites", inNote <$> citation "footcites" NormalCitation True)
- , ("parencites", citation "parencites" NormalCitation True)
- , ("supercites", citation "supercites" NormalCitation True)
- , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True)
- , ("Autocite", citation "Autocite" NormalCitation False)
- , ("Smartcite", citation "Smartcite" NormalCitation False)
- , ("Footcite", inNote <$> citation "Footcite" NormalCitation False)
- , ("Parencite", citation "Parencite" NormalCitation False)
- , ("Supercite", citation "Supercite" NormalCitation False)
- , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False)
- , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False)
- , ("Citeyear", citation "Citeyear" SuppressAuthor False)
- , ("Autocite*", citation "Autocite*" SuppressAuthor False)
- , ("Cite*", citation "Cite*" SuppressAuthor False)
- , ("Parencite*", citation "Parencite*" SuppressAuthor False)
- , ("Textcite", citation "Textcite" AuthorInText False)
- , ("Textcites", citation "Textcites" AuthorInText True)
- , ("Cites", citation "Cites" NormalCitation True)
- , ("Autocites", citation "Autocites" NormalCitation True)
- , ("Footcites", inNote <$> citation "Footcites" NormalCitation True)
- , ("Parencites", citation "Parencites" NormalCitation True)
- , ("Supercites", citation "Supercites" NormalCitation True)
- , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True)
- , ("citetext", complexNatbibCitation NormalCitation)
- , ("citeauthor", (try (tok *> sp *> controlSeq "citetext") *>
- complexNatbibCitation AuthorInText)
- <|> citation "citeauthor" AuthorInText False)
- , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>=
- addMeta "nocite"))
- , ("hyperlink", hyperlink)
- , ("hypertarget", hypertargetInline)
- -- glossaries package
- , ("gls", doAcronym "short")
- , ("Gls", doAcronym "short")
- , ("glsdesc", doAcronym "long")
- , ("Glsdesc", doAcronym "long")
- , ("GLSdesc", doAcronym "long")
- , ("acrlong", doAcronym "long")
- , ("Acrlong", doAcronym "long")
- , ("acrfull", doAcronym "full")
- , ("Acrfull", doAcronym "full")
- , ("acrshort", doAcronym "abbrv")
- , ("Acrshort", doAcronym "abbrv")
- , ("glspl", doAcronymPlural "short")
- , ("Glspl", doAcronymPlural "short")
- , ("glsdescplural", doAcronymPlural "long")
- , ("Glsdescplural", doAcronymPlural "long")
- , ("GLSdescplural", doAcronymPlural "long")
- -- acronyms package
- , ("ac", doAcronym "short")
- , ("acf", doAcronym "full")
- , ("acs", doAcronym "abbrv")
- , ("acl", doAcronym "long")
- , ("acp", doAcronymPlural "short")
- , ("acfp", doAcronymPlural "full")
- , ("acsp", doAcronymPlural "abbrv")
- , ("aclp", doAcronymPlural "long")
- , ("Ac", doAcronym "short")
- , ("Acf", doAcronym "full")
- , ("Acs", doAcronym "abbrv")
- , ("Acl", doAcronym "long")
- , ("Acp", doAcronymPlural "short")
- , ("Acfp", doAcronymPlural "full")
- , ("Acsp", doAcronymPlural "abbrv")
- , ("Aclp", doAcronymPlural "long")
- -- siuntix
- , ("si", skipopts *> dosi tok)
- , ("SI", doSI tok)
- , ("SIrange", doSIrange True tok)
- , ("numrange", doSIrange False tok)
- , ("numlist", doSInumlist)
- , ("num", doSInum)
- , ("ang", doSIang)
- -- hyphenat
- , ("bshyp", lit "\\\173")
- , ("fshyp", lit "/\173")
- , ("dothyp", lit ".\173")
- , ("colonhyp", lit ":\173")
- , ("hyp", lit "-")
- , ("nohyphens", tok)
- , ("textnhtt", ttfamily)
- , ("nhttfamily", ttfamily)
- -- LaTeX colors
- , ("textcolor", coloredInline "color")
- , ("colorbox", coloredInline "background-color")
- -- fontawesome
- , ("faCheck", lit "\10003")
- , ("faClose", lit "\10007")
- -- xspace
- , ("xspace", doxspace)
- -- etoolbox
- , ("ifstrequal", ifstrequal)
- , ("newtoggle", braced >>= newToggle)
- , ("toggletrue", braced >>= setToggle True)
- , ("togglefalse", braced >>= setToggle False)
- , ("iftoggle", try $ ifToggle >> inline)
- -- biblatex misc
- , ("RN", romanNumeralUpper)
- , ("Rn", romanNumeralLower)
- -- babel
- , ("foreignlanguage", foreignlanguage)
- -- include
- , ("input", rawInlineOr "input" $ include "input")
- -- soul package
- , ("ul", underline <$> tok)
- -- ulem package
- , ("uline", underline <$> tok)
- -- plain tex stuff that should just be passed through as raw tex
- , ("ifdim", ifdim)
- -- stackengine
- , ("addstackgap", skipopts *> tok)
- ]
-
-accent :: PandocMonad m => Char -> Maybe Char -> LP m Inlines
-accent combiningAccent fallBack = try $ do
- ils <- tok
- case toList ils of
- (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $
- -- try to normalize to the combined character:
- Str (Normalize.normalize Normalize.NFC
- (T.pack [x, combiningAccent]) <> xs) : ys
- [Space] -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
- [] -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
- _ -> return ils
-
+inlineCommands = M.unions
+ [ accentCommands tok
+ , citationCommands inline
+ , siunitxCommands tok
+ , acronymCommands
+ , refCommands
+ , nameCommands
+ , verbCommands
+ , charCommands
+ , enquoteCommands tok
+ , inlineLanguageCommands tok
+ , biblatexInlineCommands tok
+ , rest ]
+ where
+ rest = M.fromList
+ [ ("emph", extractSpaces emph <$> tok)
+ , ("textit", extractSpaces emph <$> tok)
+ , ("textsl", extractSpaces emph <$> tok)
+ , ("textsc", extractSpaces smallcaps <$> tok)
+ , ("textsf", extractSpaces (spanWith ("",["sans-serif"],[])) <$> tok)
+ , ("textmd", extractSpaces (spanWith ("",["medium"],[])) <$> tok)
+ , ("textrm", extractSpaces (spanWith ("",["roman"],[])) <$> tok)
+ , ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok)
+ , ("texttt", ttfamily)
+ , ("sout", extractSpaces strikeout <$> tok)
+ , ("alert", skipopts >> spanWith ("",["alert"],[]) <$> tok) -- beamer
+ , ("textsuperscript", extractSpaces superscript <$> tok)
+ , ("textsubscript", extractSpaces subscript <$> tok)
+ , ("textbf", extractSpaces strong <$> tok)
+ , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok)
+ , ("underline", underline <$> tok)
+ , ("mbox", rawInlineOr "mbox" $ processHBox <$> tok)
+ , ("hbox", rawInlineOr "hbox" $ processHBox <$> tok)
+ , ("lettrine", rawInlineOr "lettrine" lettrine)
+ , ("(", mathInline . untokenize <$> manyTill anyTok (controlSeq ")"))
+ , ("[", mathDisplay . untokenize <$> manyTill anyTok (controlSeq "]"))
+ , ("ensuremath", mathInline . untokenize <$> braced)
+ , ("texorpdfstring", const <$> tok <*> tok)
+ -- old TeX commands
+ , ("em", extractSpaces emph <$> inlines)
+ , ("it", extractSpaces emph <$> inlines)
+ , ("sl", extractSpaces emph <$> inlines)
+ , ("bf", extractSpaces strong <$> inlines)
+ , ("tt", code . stringify . toList <$> inlines)
+ , ("rm", inlines)
+ , ("itshape", extractSpaces emph <$> inlines)
+ , ("slshape", extractSpaces emph <$> inlines)
+ , ("scshape", extractSpaces smallcaps <$> inlines)
+ , ("bfseries", extractSpaces strong <$> inlines)
+ , ("MakeUppercase", makeUppercase <$> tok)
+ , ("MakeTextUppercase", makeUppercase <$> tok) -- textcase
+ , ("uppercase", makeUppercase <$> tok)
+ , ("MakeLowercase", makeLowercase <$> tok)
+ , ("MakeTextLowercase", makeLowercase <$> tok)
+ , ("lowercase", makeLowercase <$> tok)
+ , ("thanks", skipopts >> note <$> grouped block)
+ , ("footnote", skipopts >> note <$> grouped block)
+ , ("passthrough", tok) -- \passthrough macro used by latex writer
+ -- for listings
+ , ("includegraphics", do options <- option [] keyvals
+ src <- braced
+ mkImage options .
+ unescapeURL .
+ removeDoubleQuotes $ untokenize src)
+ -- hyperref
+ , ("url", (\url -> link url "" (str url)) . unescapeURL . untokenize <$>
+ bracedUrl)
+ , ("nolinkurl", code . unescapeURL . untokenize <$> bracedUrl)
+ , ("href", do url <- bracedUrl
+ sp
+ link (unescapeURL $ untokenize url) "" <$> tok)
+ , ("hyperlink", hyperlink)
+ , ("hyperref", hyperref)
+ , ("hypertarget", hypertargetInline)
+ -- hyphenat
+ , ("nohyphens", tok)
+ , ("textnhtt", ttfamily)
+ , ("nhttfamily", ttfamily)
+ -- LaTeX colors
+ , ("textcolor", coloredInline "color")
+ , ("colorbox", coloredInline "background-color")
+ -- etoolbox
+ , ("ifstrequal", ifstrequal)
+ , ("newtoggle", braced >>= newToggle)
+ , ("toggletrue", braced >>= setToggle True)
+ , ("togglefalse", braced >>= setToggle False)
+ , ("iftoggle", try $ ifToggle >> inline)
+ -- include
+ , ("input", rawInlineOr "input" $ include "input")
+ -- soul package
+ , ("ul", underline <$> tok)
+ -- ulem package
+ , ("uline", underline <$> tok)
+ -- plain tex stuff that should just be passed through as raw tex
+ , ("ifdim", ifdim)
+ -- stackengine
+ , ("addstackgap", skipopts *> tok)
+ ]
lettrine :: PandocMonad m => LP m Inlines
lettrine = do
- optional opt
+ optional rawopt
x <- tok
y <- tok
return $ extractSpaces (spanWith ("",["lettrine"],[])) x <> smallcaps y
@@ -979,32 +446,18 @@ alterStr :: (Text -> Text) -> Inline -> Inline
alterStr f (Str xs) = Str (f xs)
alterStr _ x = x
-foreignlanguage :: PandocMonad m => LP m Inlines
-foreignlanguage = do
- babelLang <- untokenize <$> braced
- case babelLangToBCP47 babelLang of
- Just lang -> spanWith ("", [], [("lang", renderLang lang)]) <$> tok
- _ -> tok
-
-inlineLanguageCommands :: PandocMonad m => M.Map Text (LP m Inlines)
-inlineLanguageCommands = M.fromList $ mk <$> M.toList polyglossiaLangToBCP47
- where
- mk (polyglossia, bcp47Func) =
- ("text" <> polyglossia, inlineLanguage bcp47Func)
-
-inlineLanguage :: PandocMonad m => (Text -> Lang) -> LP m Inlines
-inlineLanguage bcp47Func = do
- o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
- <$> rawopt
- let lang = renderLang $ bcp47Func o
- extractSpaces (spanWith ("", [], [("lang", lang)])) <$> tok
-
hyperlink :: PandocMonad m => LP m Inlines
hyperlink = try $ do
src <- untokenize <$> braced
lab <- tok
return $ link ("#" <> src) "" lab
+hyperref :: PandocMonad m => LP m Inlines
+hyperref = try $ do
+ url <- (("#" <>) . untokenize <$> try (sp *> bracketedToks <* sp))
+ <|> untokenize <$> (bracedUrl <* bracedUrl <* bracedUrl)
+ link url "" <$> tok
+
hypertargetBlock :: PandocMonad m => LP m Blocks
hypertargetBlock = try $ do
ref <- untokenize <$> braced
@@ -1019,31 +472,6 @@ hypertargetInline = try $ do
ils <- grouped inline
return $ spanWith (ref, [], []) ils
-romanNumeralUpper :: (PandocMonad m) => LP m Inlines
-romanNumeralUpper =
- str . toRomanNumeral <$> romanNumeralArg
-
-romanNumeralLower :: (PandocMonad m) => LP m Inlines
-romanNumeralLower =
- str . T.toLower . toRomanNumeral <$> romanNumeralArg
-
-romanNumeralArg :: (PandocMonad m) => LP m Int
-romanNumeralArg = spaces *> (parser <|> inBraces)
- where
- inBraces = do
- symbol '{'
- spaces
- res <- parser
- spaces
- symbol '}'
- return res
- parser = do
- Tok _ Word s <- satisfyTok isWordTok
- let (digits, rest) = T.span isDigit s
- unless (T.null rest) $
- Prelude.fail "Non-digits in argument to \\Rn or \\RN"
- safeRead digits
-
newToggle :: (Monoid a, PandocMonad m) => [Tok] -> LP m a
newToggle name = do
updateState $ \st ->
@@ -1074,9 +502,6 @@ ifToggle = do
report $ UndefinedToggle name' pos
return ()
-doTerm :: PandocMonad m => Translations.Term -> LP m Inlines
-doTerm term = str <$> translateTerm term
-
ifstrequal :: (PandocMonad m, Monoid a) => LP m a
ifstrequal = do
str1 <- tok
@@ -1097,13 +522,6 @@ coloredInline stylename = do
ttfamily :: PandocMonad m => LP m Inlines
ttfamily = code . stringify . toList <$> tok
-rawInlineOr :: PandocMonad m => Text -> LP m Inlines -> LP m Inlines
-rawInlineOr name' fallback = do
- parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions
- if parseRaw
- then rawInline "latex" <$> getRawCommand name' ("\\" <> name')
- else fallback
-
processHBox :: Inlines -> Inlines
processHBox = walk convert
where
@@ -1154,79 +572,90 @@ treatAsInline = Set.fromList
, "pagebreak"
]
-label :: PandocMonad m => LP m ()
-label = do
- controlSeq "label"
- t <- braced
- updateState $ \st -> st{ sLastLabel = Just $ untokenize t }
-
-dolabel :: PandocMonad m => LP m Inlines
-dolabel = do
- v <- braced
- let refstr = untokenize v
- updateState $ \st ->
- st{ sLastLabel = Just refstr }
- return $ spanWith (refstr,[],[("label", refstr)])
- $ inBrackets $ str $ untokenize v
-
-doref :: PandocMonad m => Text -> LP m Inlines
-doref cls = do
- v <- braced
- let refstr = untokenize v
- return $ linkWith ("",[],[ ("reference-type", cls)
- , ("reference", refstr)])
- ("#" <> refstr)
- ""
- (inBrackets $ str refstr)
-
lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v
lookupListDefault d = (fromMaybe d .) . lookupList
where lookupList l m = msum $ map (`M.lookup` m) l
inline :: PandocMonad m => LP m Inlines
-inline = (mempty <$ comment)
- <|> (space <$ whitespace)
- <|> (softbreak <$ endline)
- <|> word
- <|> macroDef (rawInline "latex")
- <|> inlineCommand'
- <|> inlineEnvironment
- <|> inlineGroup
- <|> (symbol '-' *>
- option (str "-") (symbol '-' *>
- option (str "–") (str "—" <$ symbol '-')))
- <|> doubleQuote
- <|> singleQuote
- <|> (str "”" <$ try (symbol '\'' >> symbol '\''))
- <|> (str "”" <$ symbol '”')
- <|> (str "’" <$ symbol '\'')
- <|> (str "’" <$ symbol '’')
- <|> (str "\160" <$ symbol '~')
- <|> dollarsMath
- <|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb)
- <|> (str . T.singleton <$> primEscape)
- <|> regularSymbol
- <|> (do res <- symbolIn "#^'`\"[]&"
- pos <- getPosition
- let s = untoken res
- report $ ParsingUnescaped s pos
- return $ str s)
+inline = do
+ Tok pos toktype t <- lookAhead anyTok
+ let symbolAsString = str . untoken <$> anySymbol
+ let unescapedSymbolAsString =
+ do s <- untoken <$> anySymbol
+ report $ ParsingUnescaped s pos
+ return $ str s
+ case toktype of
+ Comment -> mempty <$ comment
+ Spaces -> space <$ whitespace
+ Newline -> softbreak <$ endline
+ Word -> word
+ Esc1 -> str . T.singleton <$> primEscape
+ Esc2 -> str . T.singleton <$> primEscape
+ Symbol ->
+ case t of
+ "-" -> symbol '-' *>
+ option (str "-") (symbol '-' *>
+ option (str "–") (str "—" <$ symbol '-'))
+ "'" -> symbol '\'' *>
+ option (str "’") (str "”" <$ symbol '\'')
+ "~" -> str "\160" <$ symbol '~'
+ "`" -> doubleQuote <|> singleQuote <|> symbolAsString
+ "\"" -> doubleQuote <|> singleQuote <|> symbolAsString
+ "“" -> doubleQuote <|> symbolAsString
+ "‘" -> singleQuote <|> symbolAsString
+ "$" -> dollarsMath <|> unescapedSymbolAsString
+ "|" -> (guardEnabled Ext_literate_haskell *>
+ symbol '|' *> doLHSverb) <|> symbolAsString
+ "{" -> inlineGroup
+ "#" -> unescapedSymbolAsString
+ "&" -> unescapedSymbolAsString
+ "_" -> unescapedSymbolAsString
+ "^" -> unescapedSymbolAsString
+ "\\" -> mzero
+ "}" -> mzero
+ _ -> symbolAsString
+ CtrlSeq _ -> macroDef (rawInline "latex")
+ <|> inlineCommand'
+ <|> inlineEnvironment
+ <|> inlineGroup
+ _ -> mzero
inlines :: PandocMonad m => LP m Inlines
inlines = mconcat <$> many inline
+opt :: PandocMonad m => LP m Inlines
+opt = do
+ toks <- try (sp *> bracketedToks <* sp)
+ -- now parse the toks as inlines
+ st <- getState
+ parsed <- runParserT (mconcat <$> many inline) st "bracketed option" toks
+ case parsed of
+ Right result -> return result
+ Left e -> throwError $ PandocParsecError (toSources toks) e
+
-- block elements:
preamble :: PandocMonad m => LP m Blocks
preamble = mconcat <$> many preambleBlock
where preambleBlock = (mempty <$ spaces1)
<|> macroDef (rawBlock "latex")
+ <|> filecontents
<|> (mempty <$ blockCommand)
<|> (mempty <$ braced)
<|> (do notFollowedBy (begin_ "document")
anyTok
return mempty)
+rule :: PandocMonad m => LP m Blocks
+rule = do
+ skipopts
+ width <- T.takeWhile (\c -> isDigit c || c == '.') . stringify <$> tok
+ _thickness <- tok
+ -- 0-width rules are used to fix spacing issues:
+ case safeRead width of
+ Just (0 :: Double) -> return mempty
+ _ -> return horizontalRule
+
paragraph :: PandocMonad m => LP m Blocks
paragraph = do
x <- trimInlines . mconcat <$> many1 inline
@@ -1264,6 +693,16 @@ include name = do
mapM_ (insertIncluded defaultExt) fs
return mempty
+readFileFromTexinputs :: PandocMonad m => FilePath -> LP m (Maybe Text)
+readFileFromTexinputs fp = do
+ fileContentsMap <- sFileContents <$> getState
+ case M.lookup (T.pack fp) fileContentsMap of
+ Just t -> return (Just t)
+ Nothing -> do
+ dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "."
+ <$> lookupEnv "TEXINPUTS"
+ readFileFromDirs dirs fp
+
insertIncluded :: PandocMonad m
=> FilePath
-> FilePath
@@ -1273,13 +712,12 @@ insertIncluded defaultExtension f' = do
".tex" -> f'
".sty" -> f'
_ -> addExtension f' defaultExtension
- dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "." <$> lookupEnv "TEXINPUTS"
pos <- getPosition
containers <- getIncludeFiles <$> getState
when (T.pack f `elem` containers) $
throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show pos
updateState $ addIncludeFile $ T.pack f
- mbcontents <- readFileFromDirs dirs f
+ mbcontents <- readFileFromTexinputs f
contents <- case mbcontents of
Just s -> return s
Nothing -> do
@@ -1288,10 +726,6 @@ insertIncluded defaultExtension f' = do
getInput >>= setInput . (tokenize f contents ++)
updateState dropLatestIncludeFile
-addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> LP m ()
-addMeta field val = updateState $ \st ->
- st{ sMeta = addMetaField field val $ sMeta st }
-
authors :: PandocMonad m => LP m ()
authors = try $ do
bgroup
@@ -1300,150 +734,6 @@ authors = try $ do
egroup
addMeta "author" (map trimInlines auths)
-macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a
-macroDef constructor = do
- (_, s) <- withRaw (commandDef <|> environmentDef)
- (constructor (untokenize s) <$
- guardDisabled Ext_latex_macros)
- <|> return mempty
- where commandDef = do
- (name, macro') <- newcommand <|> letmacro <|> defmacro
- guardDisabled Ext_latex_macros <|>
- updateState (\s -> s{ sMacros = M.insert name macro' (sMacros s) })
- environmentDef = do
- mbenv <- newenvironment
- case mbenv of
- Nothing -> return ()
- Just (name, macro1, macro2) ->
- guardDisabled Ext_latex_macros <|>
- do updateState $ \s -> s{ sMacros =
- M.insert name macro1 (sMacros s) }
- updateState $ \s -> s{ sMacros =
- M.insert ("end" <> name) macro2 (sMacros s) }
- -- @\newenvironment{envname}[n-args][default]{begin}{end}@
- -- is equivalent to
- -- @\newcommand{\envname}[n-args][default]{begin}@
- -- @\newcommand{\endenvname}@
-
-letmacro :: PandocMonad m => LP m (Text, Macro)
-letmacro = do
- controlSeq "let"
- (name, contents) <- withVerbatimMode $ do
- Tok _ (CtrlSeq name) _ <- anyControlSeq
- optional $ symbol '='
- spaces
- -- we first parse in verbatim mode, and then expand macros,
- -- because we don't want \let\foo\bar to turn into
- -- \let\foo hello if we have previously \def\bar{hello}
- contents <- bracedOrToken
- return (name, contents)
- contents' <- doMacros' 0 contents
- return (name, Macro ExpandWhenDefined [] Nothing contents')
-
-defmacro :: PandocMonad m => LP m (Text, Macro)
-defmacro = try $
- -- we use withVerbatimMode, because macros are to be expanded
- -- at point of use, not point of definition
- withVerbatimMode $ do
- controlSeq "def"
- Tok _ (CtrlSeq name) _ <- anyControlSeq
- argspecs <- many (argspecArg <|> argspecPattern)
- contents <- bracedOrToken
- return (name, Macro ExpandWhenUsed argspecs Nothing contents)
-
-argspecArg :: PandocMonad m => LP m ArgSpec
-argspecArg = do
- Tok _ (Arg i) _ <- satisfyTok isArgTok
- return $ ArgNum i
-
-argspecPattern :: PandocMonad m => LP m ArgSpec
-argspecPattern =
- Pattern <$> many1 (satisfyTok (\(Tok _ toktype' txt) ->
- (toktype' == Symbol || toktype' == Word) &&
- (txt /= "{" && txt /= "\\" && txt /= "}")))
-
-newcommand :: PandocMonad m => LP m (Text, Macro)
-newcommand = do
- pos <- getPosition
- Tok _ (CtrlSeq mtype) _ <- controlSeq "newcommand" <|>
- controlSeq "renewcommand" <|>
- controlSeq "providecommand" <|>
- controlSeq "DeclareMathOperator" <|>
- controlSeq "DeclareRobustCommand"
- withVerbatimMode $ do
- Tok _ (CtrlSeq name) txt <- do
- optional (symbol '*')
- anyControlSeq <|>
- (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}')
- spaces
- numargs <- option 0 $ try bracketedNum
- let argspecs = map ArgNum [1..numargs]
- spaces
- optarg <- option Nothing $ Just <$> try bracketedToks
- spaces
- contents' <- bracedOrToken
- let contents =
- case mtype of
- "DeclareMathOperator" ->
- Tok pos (CtrlSeq "mathop") "\\mathop"
- : Tok pos Symbol "{"
- : Tok pos (CtrlSeq "mathrm") "\\mathrm"
- : Tok pos Symbol "{"
- : (contents' ++
- [ Tok pos Symbol "}", Tok pos Symbol "}" ])
- _ -> contents'
- macros <- sMacros <$> getState
- case M.lookup name macros of
- Just macro
- | mtype == "newcommand" -> do
- report $ MacroAlreadyDefined txt pos
- return (name, macro)
- | mtype == "providecommand" -> return (name, macro)
- _ -> return (name, Macro ExpandWhenUsed argspecs optarg contents)
-
-newenvironment :: PandocMonad m => LP m (Maybe (Text, Macro, Macro))
-newenvironment = do
- pos <- getPosition
- Tok _ (CtrlSeq mtype) _ <- controlSeq "newenvironment" <|>
- controlSeq "renewenvironment" <|>
- controlSeq "provideenvironment"
- withVerbatimMode $ do
- optional $ symbol '*'
- spaces
- name <- untokenize <$> braced
- spaces
- numargs <- option 0 $ try bracketedNum
- spaces
- optarg <- option Nothing $ Just <$> try bracketedToks
- let argspecs = map (\i -> ArgNum i) [1..numargs]
- startcontents <- spaces >> bracedOrToken
- endcontents <- spaces >> bracedOrToken
- macros <- sMacros <$> getState
- case M.lookup name macros of
- Just _
- | mtype == "newenvironment" -> do
- report $ MacroAlreadyDefined name pos
- return Nothing
- | mtype == "provideenvironment" ->
- return Nothing
- _ -> return $ Just (name,
- Macro ExpandWhenUsed argspecs optarg startcontents,
- Macro ExpandWhenUsed [] Nothing endcontents)
-
-bracketedNum :: PandocMonad m => LP m Int
-bracketedNum = do
- ds <- untokenize <$> bracketedToks
- case safeRead ds of
- Just i -> return i
- _ -> return 0
-
-setCaption :: PandocMonad m => LP m ()
-setCaption = try $ do
- skipopts
- ils <- tok
- optional $ try $ spaces *> label
- updateState $ \st -> st{ sCaption = Just ils }
-
looseItem :: PandocMonad m => LP m Blocks
looseItem = do
inListItem <- sInListItem <$> getState
@@ -1457,10 +747,6 @@ epigraph = do
p2 <- grouped block
return $ divWith ("", ["epigraph"], []) (p1 <> p2)
-resetCaption :: PandocMonad m => LP m ()
-resetCaption = updateState $ \st -> st{ sCaption = Nothing
- , sLastLabel = Nothing }
-
section :: PandocMonad m => Attr -> Int -> LP m Blocks
section (ident, classes, kvs) lvl = do
skipopts
@@ -1554,7 +840,7 @@ blockCommands = M.fromList
, ("address", mempty <$ (skipopts *> tok >>= addMeta "address"))
, ("signature", mempty <$ (skipopts *> authors))
, ("date", mempty <$ (skipopts *> tok >>= addMeta "date"))
- , ("newtheorem", newtheorem)
+ , ("newtheorem", newtheorem inline)
, ("theoremstyle", theoremstyle)
-- KOMA-Script metadata commands
, ("extratitle", mempty <$ (skipopts *> tok >>= addMeta "extratitle"))
@@ -1598,11 +884,11 @@ blockCommands = M.fromList
--
, ("hrule", pure horizontalRule)
, ("strut", pure mempty)
- , ("rule", skipopts *> tok *> tok $> horizontalRule)
+ , ("rule", rule)
, ("item", looseItem)
, ("documentclass", skipopts *> braced *> preamble)
, ("centerline", para . trimInlines <$> (skipopts *> tok))
- , ("caption", mempty <$ setCaption)
+ , ("caption", mempty <$ setCaption inline)
, ("bibliography", mempty <$ (skipopts *> braced >>=
addMeta "bibliography" . splitBibs . untokenize))
, ("addbibresource", mempty <$ (skipopts *> braced >>=
@@ -1640,7 +926,8 @@ blockCommands = M.fromList
environments :: PandocMonad m => M.Map Text (LP m Blocks)
-environments = M.fromList
+environments = M.union (tableEnvironments blocks inline) $
+ M.fromList
[ ("document", env "document" blocks <* skipMany anyTok)
, ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract"))
, ("sloppypar", env "sloppypar" blocks)
@@ -1654,13 +941,6 @@ environments = M.fromList
, ("flushright", divWith ("", ["flushright"], []) <$> env "flushright" blocks)
, ("flushleft", divWith ("", ["flushleft"], []) <$> env "flushleft" blocks)
, ("landscape", env "landscape" blocks)
- , ("longtable", env "longtable" $
- resetCaption *> simpTable "longtable" False >>= addTableCaption)
- , ("table", env "table" $
- skipopts *> resetCaption *> blocks >>= addTableCaption)
- , ("tabular*", env "tabular*" $ simpTable "tabular*" True)
- , ("tabularx", env "tabularx" $ simpTable "tabularx" True)
- , ("tabular", env "tabular" $ simpTable "tabular" False)
, ("quote", blockQuote <$> env "quote" blocks)
, ("quotation", blockQuote <$> env "quotation" blocks)
, ("verse", blockQuote <$> env "verse" blocks)
@@ -1683,7 +963,7 @@ environments = M.fromList
, ("lilypond", rawVerbEnv "lilypond")
, ("ly", rawVerbEnv "ly")
-- amsthm
- , ("proof", proof)
+ , ("proof", proof blocks opt)
-- etoolbox
, ("ifstrequal", ifstrequal)
, ("newtoggle", braced >>= newToggle)
@@ -1692,130 +972,29 @@ environments = M.fromList
, ("iftoggle", try $ ifToggle >> block)
]
-theoremstyle :: PandocMonad m => LP m Blocks
-theoremstyle = do
- stylename <- untokenize <$> braced
- let mbstyle = case stylename of
- "plain" -> Just PlainStyle
- "definition" -> Just DefinitionStyle
- "remark" -> Just RemarkStyle
- _ -> Nothing
- case mbstyle of
- Nothing -> return ()
- Just sty -> updateState $ \s -> s{ sLastTheoremStyle = sty }
- return mempty
-
-newtheorem :: PandocMonad m => LP m Blocks
-newtheorem = do
- number <- option True (False <$ symbol '*' <* sp)
+filecontents :: PandocMonad m => LP m Blocks
+filecontents = try $ do
+ controlSeq "begin"
name <- untokenize <$> braced
- sp
- series <- option Nothing $ Just . untokenize <$> bracketedToks
- sp
- showName <- tok
- sp
- syncTo <- option Nothing $ Just . untokenize <$> bracketedToks
- sty <- sLastTheoremStyle <$> getState
- let spec = TheoremSpec { theoremName = showName
- , theoremStyle = sty
- , theoremSeries = series
- , theoremSyncTo = syncTo
- , theoremNumber = number
- , theoremLastNum = DottedNum [0] }
- tmap <- sTheoremMap <$> getState
- updateState $ \s -> s{ sTheoremMap =
- M.insert name spec tmap }
+ guard $ name == "filecontents" || name == "filecontents*"
+ skipopts
+ fp <- untokenize <$> braced
+ txt <- verbEnv name
+ updateState $ \st ->
+ st{ sFileContents = M.insert fp txt (sFileContents st) }
return mempty
-proof :: PandocMonad m => LP m Blocks
-proof = do
- title <- option (B.text "Proof") opt
- bs <- env "proof" blocks
- return $
- B.divWith ("", ["proof"], []) $
- addQed $ addTitle (B.emph (title <> ".")) bs
-
-addTitle :: Inlines -> Blocks -> Blocks
-addTitle ils bs =
- case B.toList bs of
- (Para xs : rest)
- -> B.fromList (Para (B.toList ils ++ (Space : xs)) : rest)
- _ -> B.para ils <> bs
-
-addQed :: Blocks -> Blocks
-addQed bs =
- case Seq.viewr (B.unMany bs) of
- s Seq.:> Para ils
- -> B.Many (s Seq.|> Para (ils ++ B.toList qedSign))
- _ -> bs <> B.para qedSign
- where
- qedSign = B.str "\xa0\x25FB"
-
environment :: PandocMonad m => LP m Blocks
environment = try $ do
controlSeq "begin"
name <- untokenize <$> braced
M.findWithDefault mzero name environments <|>
- theoremEnvironment name <|>
+ theoremEnvironment blocks opt name <|>
if M.member name (inlineEnvironments
:: M.Map Text (LP PandocPure Inlines))
then mzero
else try (rawEnv name) <|> rawVerbEnv name
-theoremEnvironment :: PandocMonad m => Text -> LP m Blocks
-theoremEnvironment name = do
- tmap <- sTheoremMap <$> getState
- case M.lookup name tmap of
- Nothing -> mzero
- Just tspec -> do
- optTitle <- option mempty $ (\x -> space <> "(" <> x <> ")") <$> opt
- mblabel <- option Nothing $ Just . untokenize <$>
- try (spaces >> controlSeq "label" >> spaces >> braced)
- bs <- env name blocks
- number <-
- if theoremNumber tspec
- then do
- let name' = fromMaybe name $ theoremSeries tspec
- num <- getNextNumber
- (maybe (DottedNum [0]) theoremLastNum .
- M.lookup name' . sTheoremMap)
- updateState $ \s ->
- s{ sTheoremMap =
- M.adjust
- (\spec -> spec{ theoremLastNum = num })
- name'
- (sTheoremMap s)
- }
-
- case mblabel of
- Just ident ->
- updateState $ \s ->
- s{ sLabels = M.insert ident
- (B.toList $
- theoremName tspec <> "\160" <>
- str (renderDottedNum num)) (sLabels s) }
- Nothing -> return ()
- return $ space <> B.text (renderDottedNum num)
- else return mempty
- let titleEmph = case theoremStyle tspec of
- PlainStyle -> B.strong
- DefinitionStyle -> B.strong
- RemarkStyle -> B.emph
- let title = titleEmph (theoremName tspec <> number)
- <> optTitle <> "." <> space
- return $ divWith (fromMaybe "" mblabel, [name], []) $ addTitle title
- $ case theoremStyle tspec of
- PlainStyle -> walk italicize bs
- _ -> bs
-
-italicize :: Block -> Block
-italicize (Para ils) = Para [Emph ils]
-italicize (Plain ils) = Plain [Emph ils]
-italicize x = x
-
-env :: PandocMonad m => Text -> LP m a -> LP m a
-env name p = p <* end_ name
-
rawEnv :: PandocMonad m => Text -> LP m Blocks
rawEnv name = do
exts <- getOption readerExtensions
@@ -1823,15 +1002,17 @@ rawEnv name = do
rawOptions <- mconcat <$> many rawopt
let beginCommand = "\\begin{" <> name <> "}" <> rawOptions
pos1 <- getPosition
- (bs, raw) <- withRaw $ env name blocks
if parseRaw
- then return $ rawBlock "latex"
+ then do
+ (_, raw) <- withRaw $ env name blocks
+ return $ rawBlock "latex"
$ beginCommand <> untokenize raw
else do
+ bs <- env name blocks
report $ SkippedContent beginCommand pos1
pos2 <- getPosition
report $ SkippedContent ("\\end{" <> name <> "}") pos2
- return bs
+ return $ divWith ("",[name],[]) bs
rawVerbEnv :: PandocMonad m => Text -> LP m Blocks
rawVerbEnv name = do
@@ -1890,8 +1071,7 @@ inputMinted = do
pos <- getPosition
attr <- mintedAttr
f <- T.filter (/='"') . untokenize <$> braced
- dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "." <$> lookupEnv "TEXINPUTS"
- mbCode <- readFileFromDirs dirs (T.unpack f)
+ mbCode <- readFileFromTexinputs (T.unpack f)
rawcode <- case mbCode of
Just s -> return s
Nothing -> do
@@ -1989,8 +1169,7 @@ inputListing = do
pos <- getPosition
options <- option [] keyvals
f <- T.filter (/='"') . untokenize <$> braced
- dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "." <$> lookupEnv "TEXINPUTS"
- mbCode <- readFileFromDirs dirs (T.unpack f)
+ mbCode <- readFileFromTexinputs (T.unpack f)
codeLines <- case mbCode of
Just s -> return $ T.lines s
Nothing -> do
@@ -1999,7 +1178,8 @@ inputListing = do
let (ident,classes,kvs) = parseListingsOptions options
let classes' =
(case listingsLanguage options of
- Nothing -> (take 1 (languagesByExtension (T.pack $ takeExtension $ T.unpack f)) <>)
+ Nothing -> (take 1 (languagesByExtension defaultSyntaxMap
+ (T.pack $ takeExtension $ T.unpack f)) <>)
Just _ -> id) classes
let firstline = fromMaybe 1 $ lookup "firstline" options >>= safeRead
let lastline = fromMaybe (length codeLines) $
@@ -2065,358 +1245,23 @@ orderedList' = try $ do
bs <- listenv "enumerate" (many item)
return $ orderedListWith (start, style, delim) bs
--- tables
-
-hline :: PandocMonad m => LP m ()
-hline = try $ do
- spaces
- controlSeq "hline" <|>
- -- booktabs rules:
- controlSeq "toprule" <|>
- controlSeq "bottomrule" <|>
- controlSeq "midrule" <|>
- controlSeq "endhead" <|>
- controlSeq "endfirsthead"
- spaces
- optional opt
- return ()
-
-lbreak :: PandocMonad m => LP m Tok
-lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline")
- <* skipopts <* spaces
-
-amp :: PandocMonad m => LP m Tok
-amp = symbol '&'
-
--- Split a Word into individual Symbols (for parseAligns)
-splitWordTok :: PandocMonad m => LP m ()
-splitWordTok = do
- inp <- getInput
- case inp of
- (Tok spos Word t : rest) ->
- setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest
- _ -> return ()
-
-parseAligns :: PandocMonad m => LP m [(Alignment, ColWidth, ([Tok], [Tok]))]
-parseAligns = try $ do
- let maybeBar = skipMany
- (try $ sp *> (() <$ symbol '|' <|> () <$ (symbol '@' >> braced)))
- let cAlign = AlignCenter <$ symbol 'c'
- let lAlign = AlignLeft <$ symbol 'l'
- let rAlign = AlignRight <$ symbol 'r'
- let parAlign = AlignLeft <$ symbol 'p'
- -- aligns from tabularx
- let xAlign = AlignLeft <$ symbol 'X'
- let mAlign = AlignLeft <$ symbol 'm'
- let bAlign = AlignLeft <$ symbol 'b'
- let alignChar = splitWordTok *> ( cAlign <|> lAlign <|> rAlign <|> parAlign
- <|> xAlign <|> mAlign <|> bAlign )
- let alignPrefix = symbol '>' >> braced
- let alignSuffix = symbol '<' >> braced
- let colWidth = try $ do
- symbol '{'
- ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth")
- spaces
- symbol '}'
- return $ safeRead ds
- let alignSpec = do
- pref <- option [] alignPrefix
- spaces
- al <- alignChar
- width <- colWidth <|> option Nothing (do s <- untokenize <$> braced
- pos <- getPosition
- report $ SkippedContent s pos
- return Nothing)
- spaces
- suff <- option [] alignSuffix
- return (al, width, (pref, suff))
- let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro
- symbol '*'
- spaces
- ds <- trim . untokenize <$> braced
- spaces
- spec <- braced
- case safeRead ds of
- Just n ->
- getInput >>= setInput . (mconcat (replicate n spec) ++)
- Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number"
- bgroup
- spaces
- maybeBar
- aligns' <- many $ try $ spaces >> optional starAlign >>
- (alignSpec <* maybeBar)
- spaces
- egroup
- spaces
- return $ map toSpec aligns'
- where
- toColWidth (Just w) | w > 0 = ColWidth w
- toColWidth _ = ColWidthDefault
- toSpec (x, y, z) = (x, toColWidth y, z)
-
--- N.B. this parser returns a Row that may have erroneous empty cells
--- in it. See the note above fixTableHead for details.
-parseTableRow :: PandocMonad m
- => Text -- ^ table environment name
- -> [([Tok], [Tok])] -- ^ pref/suffixes
- -> LP m Row
-parseTableRow envname prefsufs = do
- notFollowedBy (spaces *> end_ envname)
- -- add prefixes and suffixes in token stream:
- let celltoks (pref, suff) = do
- prefpos <- getPosition
- contents <- mconcat <$>
- many ( snd <$> withRaw (controlSeq "parbox" >> parbox) -- #5711
- <|>
- snd <$> withRaw (inlineEnvironment <|> dollarsMath)
- <|>
- (do notFollowedBy
- (() <$ amp <|> () <$ lbreak <|> end_ envname)
- count 1 anyTok) )
-
- suffpos <- getPosition
- option [] (count 1 amp)
- return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff
- rawcells <- mapM celltoks prefsufs
- oldInput <- getInput
- cells <- mapM (\ts -> setInput ts >> parseTableCell) rawcells
- setInput oldInput
- spaces
- return $ Row nullAttr cells
-
-parseTableCell :: PandocMonad m => LP m Cell
-parseTableCell = do
- spaces
- updateState $ \st -> st{ sInTableCell = True }
- cell' <- multicolumnCell
- <|> multirowCell
- <|> parseSimpleCell
- <|> parseEmptyCell
- updateState $ \st -> st{ sInTableCell = False }
- spaces
- return cell'
- where
- -- The parsing of empty cells is important in LaTeX, especially when dealing
- -- with multirow/multicolumn. See #6603.
- parseEmptyCell = spaces $> emptyCell
-
-cellAlignment :: PandocMonad m => LP m Alignment
-cellAlignment = skipMany (symbol '|') *> alignment <* skipMany (symbol '|')
- where
- alignment = do
- c <- untoken <$> singleChar
- return $ case c of
- "l" -> AlignLeft
- "r" -> AlignRight
- "c" -> AlignCenter
- "*" -> AlignDefault
- _ -> AlignDefault
-
-plainify :: Blocks -> Blocks
-plainify bs = case toList bs of
- [Para ils] -> plain (fromList ils)
- _ -> bs
-
-multirowCell :: PandocMonad m => LP m Cell
-multirowCell = controlSeq "multirow" >> do
- -- Full prototype for \multirow macro is:
- -- \multirow[vpos]{nrows}[bigstruts]{width}[vmove]{text}
- -- However, everything except `nrows` and `text` make
- -- sense in the context of the Pandoc AST
- _ <- optional $ symbol '[' *> cellAlignment <* symbol ']' -- vertical position
- nrows <- fmap (fromMaybe 1 . safeRead . untokenize) braced
- _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']') -- bigstrut-related
- _ <- symbol '{' *> manyTill anyTok (symbol '}') -- Cell width
- _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']') -- Length used for fine-tuning
- content <- symbol '{' *> (plainify <$> blocks) <* symbol '}'
- return $ cell AlignDefault (RowSpan nrows) (ColSpan 1) content
-
-multicolumnCell :: PandocMonad m => LP m Cell
-multicolumnCell = controlSeq "multicolumn" >> do
- span' <- fmap (fromMaybe 1 . safeRead . untokenize) braced
- alignment <- symbol '{' *> cellAlignment <* symbol '}'
-
- let singleCell = do
- content <- plainify <$> blocks
- return $ cell alignment (RowSpan 1) (ColSpan span') content
-
- -- Two possible contents: either a \multirow cell, or content.
- -- E.g. \multicol{1}{c}{\multirow{2}{1em}{content}}
- -- Note that a \multirow cell can be nested in a \multicolumn,
- -- but not the other way around. See #6603
- let nestedCell = do
- (Cell _ _ (RowSpan rs) _ bs) <- multirowCell
- return $ cell
- alignment
- (RowSpan rs)
- (ColSpan span')
- (fromList bs)
-
- symbol '{' *> (nestedCell <|> singleCell) <* symbol '}'
-
--- Parse a simple cell, i.e. not multirow/multicol
-parseSimpleCell :: PandocMonad m => LP m Cell
-parseSimpleCell = simpleCell <$> (plainify <$> blocks)
-
--- LaTeX tables are stored with empty cells underneath multirow cells
--- denoting the grid spaces taken up by them. More specifically, if a
--- cell spans m rows, then it will overwrite all the cells in the
--- columns it spans for (m-1) rows underneath it, requiring padding
--- cells in these places. These padding cells need to be removed for
--- proper table reading. See #6603.
---
--- These fixTable functions do not otherwise fix up malformed
--- input tables: that is left to the table builder.
-fixTableHead :: TableHead -> TableHead
-fixTableHead (TableHead attr rows) = TableHead attr rows'
- where
- rows' = fixTableRows rows
-
-fixTableBody :: TableBody -> TableBody
-fixTableBody (TableBody attr rhc th tb)
- = TableBody attr rhc th' tb'
- where
- th' = fixTableRows th
- tb' = fixTableRows tb
-
-fixTableRows :: [Row] -> [Row]
-fixTableRows = fixTableRows' $ repeat Nothing
- where
- fixTableRows' oldHang (Row attr cells : rs)
- = let (newHang, cells') = fixTableRow oldHang cells
- rs' = fixTableRows' newHang rs
- in Row attr cells' : rs'
- fixTableRows' _ [] = []
-
--- The overhang is represented as Just (relative cell dimensions) or
--- Nothing for an empty grid space.
-fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell])
-fixTableRow oldHang cells
- -- If there's overhang, drop cells until their total width meets the
- -- width of the occupied grid spaces (or we run out)
- | (n, prefHang, restHang) <- splitHang oldHang
- , n > 0
- = let cells' = dropToWidth getCellW n cells
- (restHang', cells'') = fixTableRow restHang cells'
- in (prefHang restHang', cells'')
- -- Otherwise record the overhang of a pending cell and fix the rest
- -- of the row
- | c@(Cell _ _ h w _):cells' <- cells
- = let h' = max 1 h
- w' = max 1 w
- oldHang' = dropToWidth getHangW w' oldHang
- (newHang, cells'') = fixTableRow oldHang' cells'
- in (toHang w' h' <> newHang, c : cells'')
- | otherwise
- = (oldHang, [])
- where
- getCellW (Cell _ _ _ w _) = w
- getHangW = maybe 1 fst
- getCS (ColSpan n) = n
-
- toHang c r
- | r > 1 = [Just (c, r)]
- | otherwise = replicate (getCS c) Nothing
-
- -- Take the prefix of the overhang list representing filled grid
- -- spaces. Also return the remainder and the length of this prefix.
- splitHang = splitHang' 0 id
-
- splitHang' !n l (Just (c, r):xs)
- = splitHang' (n + c) (l . (toHang c (r-1) ++)) xs
- splitHang' n l xs = (n, l, xs)
-
- -- Drop list items until the total width of the dropped items
- -- exceeds the passed width.
- dropToWidth _ n l | n < 1 = l
- dropToWidth wproj n (c:cs) = dropToWidth wproj (n - wproj c) cs
- dropToWidth _ _ [] = []
-
-simpTable :: PandocMonad m => Text -> Bool -> LP m Blocks
-simpTable envname hasWidthParameter = try $ do
- when hasWidthParameter $ () <$ (spaces >> tok)
- skipopts
- colspecs <- parseAligns
- let (aligns, widths, prefsufs) = unzip3 colspecs
- optional $ controlSeq "caption" *> setCaption
- spaces
- optional label
- spaces
- optional lbreak
- spaces
- skipMany hline
- spaces
- header' <- option [] . try . fmap (:[]) $
- parseTableRow envname prefsufs <* lbreak <* many1 hline
- spaces
- rows <- sepEndBy (parseTableRow envname prefsufs)
- (lbreak <* optional (skipMany hline))
- spaces
- optional $ controlSeq "caption" *> setCaption
- spaces
- optional label
- spaces
- optional lbreak
- spaces
- lookAhead $ controlSeq "end" -- make sure we're at end
- let th = fixTableHead $ TableHead nullAttr header'
- let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows]
- let tf = TableFoot nullAttr []
- return $ table emptyCaption (zip aligns widths) th tbs tf
-
-addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
-addTableCaption = walkM go
- where go (Table attr c spec th tb tf) = do
- st <- getState
- let mblabel = sLastLabel st
- capt <- case (sCaption st, mblabel) of
- (Just ils, Nothing) -> return $ caption Nothing (plain ils)
- (Just ils, Just lab) -> do
- num <- getNextNumber sLastTableNum
- setState
- st{ sLastTableNum = num
- , sLabels = M.insert lab
- [Str (renderDottedNum num)]
- (sLabels st) }
- return $ caption Nothing (plain ils) -- add number??
- (Nothing, _) -> return c
- let attr' = case (attr, mblabel) of
- ((_,classes,kvs), Just ident) ->
- (ident,classes,kvs)
- _ -> attr
- return $ addAttrDiv attr' $ Table nullAttr capt spec th tb tf
- go x = return x
-
--- TODO: For now we add a Div to contain table attributes, since
--- most writers don't do anything yet with attributes on Table.
--- This can be removed when that changes.
-addAttrDiv :: Attr -> Block -> Block
-addAttrDiv ("",[],[]) b = b
-addAttrDiv attr b = Div attr [b]
-
block :: PandocMonad m => LP m Blocks
block = do
- res <- (mempty <$ spaces1)
- <|> environment
- <|> macroDef (rawBlock "latex")
- <|> blockCommand
- <|> paragraph
- <|> grouped block
+ Tok _ toktype _ <- lookAhead anyTok
+ res <- (case toktype of
+ Newline -> mempty <$ spaces1
+ Spaces -> mempty <$ spaces1
+ Comment -> mempty <$ spaces1
+ Word -> paragraph
+ CtrlSeq "begin" -> environment
+ CtrlSeq _ -> macroDef (rawBlock "latex")
+ <|> blockCommand
+ _ -> mzero)
+ <|> paragraph
+ <|> grouped block
trace (T.take 60 $ tshow $ B.toList res)
return res
blocks :: PandocMonad m => LP m Blocks
blocks = mconcat <$> many block
-setDefaultLanguage :: PandocMonad m => LP m Blocks
-setDefaultLanguage = do
- o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
- <$> rawopt
- polylang <- untokenize <$> braced
- case M.lookup polylang polyglossiaLangToBCP47 of
- Nothing -> return mempty -- TODO mzero? warning?
- Just langFunc -> do
- let l = langFunc o
- setTranslations l
- updateState $ setMeta "lang" $ str (renderLang l)
- return mempty
diff --git a/src/Text/Pandoc/Readers/LaTeX/Citation.hs b/src/Text/Pandoc/Readers/LaTeX/Citation.hs
new file mode 100644
index 000000000..af97125c6
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Citation.hs
@@ -0,0 +1,210 @@
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Citation
+ ( citationCommands
+ , cites
+ )
+where
+
+import Text.Pandoc.Class
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Builder as B
+import qualified Data.Map as M
+import Data.Text (Text)
+import Control.Applicative ((<|>), optional, many)
+import Control.Monad (mzero)
+import Control.Monad.Trans (lift)
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Error (PandocError(PandocParsecError))
+import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
+ optional, space, spaces, withRaw, (<|>))
+
+citationCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines)
+citationCommands inline =
+ let citation = citationWith inline
+ tok = spaces *> grouped inline
+ in M.fromList
+ [ ("cite", citation "cite" NormalCitation False)
+ , ("Cite", citation "Cite" NormalCitation False)
+ , ("citep", citation "citep" NormalCitation False)
+ , ("citep*", citation "citep*" NormalCitation False)
+ , ("citeal", citation "citeal" NormalCitation False)
+ , ("citealp", citation "citealp" NormalCitation False)
+ , ("citealp*", citation "citealp*" NormalCitation False)
+ , ("autocite", citation "autocite" NormalCitation False)
+ , ("smartcite", citation "smartcite" NormalCitation False)
+ , ("footcite", inNote <$> citation "footcite" NormalCitation False)
+ , ("parencite", citation "parencite" NormalCitation False)
+ , ("supercite", citation "supercite" NormalCitation False)
+ , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False)
+ , ("citeyearpar", citation "citeyearpar" SuppressAuthor False)
+ , ("citeyear", citation "citeyear" SuppressAuthor False)
+ , ("autocite*", citation "autocite*" SuppressAuthor False)
+ , ("cite*", citation "cite*" SuppressAuthor False)
+ , ("parencite*", citation "parencite*" SuppressAuthor False)
+ , ("textcite", citation "textcite" AuthorInText False)
+ , ("citet", citation "citet" AuthorInText False)
+ , ("citet*", citation "citet*" AuthorInText False)
+ , ("citealt", citation "citealt" AuthorInText False)
+ , ("citealt*", citation "citealt*" AuthorInText False)
+ , ("textcites", citation "textcites" AuthorInText True)
+ , ("cites", citation "cites" NormalCitation True)
+ , ("autocites", citation "autocites" NormalCitation True)
+ , ("footcites", inNote <$> citation "footcites" NormalCitation True)
+ , ("parencites", citation "parencites" NormalCitation True)
+ , ("supercites", citation "supercites" NormalCitation True)
+ , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True)
+ , ("Autocite", citation "Autocite" NormalCitation False)
+ , ("Smartcite", citation "Smartcite" NormalCitation False)
+ , ("Footcite", inNote <$> citation "Footcite" NormalCitation False)
+ , ("Parencite", citation "Parencite" NormalCitation False)
+ , ("Supercite", citation "Supercite" NormalCitation False)
+ , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False)
+ , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False)
+ , ("Citeyear", citation "Citeyear" SuppressAuthor False)
+ , ("Autocite*", citation "Autocite*" SuppressAuthor False)
+ , ("Cite*", citation "Cite*" SuppressAuthor False)
+ , ("Parencite*", citation "Parencite*" SuppressAuthor False)
+ , ("Textcite", citation "Textcite" AuthorInText False)
+ , ("Textcites", citation "Textcites" AuthorInText True)
+ , ("Cites", citation "Cites" NormalCitation True)
+ , ("Autocites", citation "Autocites" NormalCitation True)
+ , ("Footcites", inNote <$> citation "Footcites" NormalCitation True)
+ , ("Parencites", citation "Parencites" NormalCitation True)
+ , ("Supercites", citation "Supercites" NormalCitation True)
+ , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True)
+ , ("citetext", complexNatbibCitation inline NormalCitation)
+ , ("citeauthor", (try (tok *> sp *> controlSeq "citetext") *>
+ complexNatbibCitation inline AuthorInText)
+ <|> citation "citeauthor" AuthorInText False)
+ , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>=
+ addMeta "nocite"))
+ ]
+
+-- citations
+
+addPrefix :: [Inline] -> [Citation] -> [Citation]
+addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
+addPrefix _ _ = []
+
+addSuffix :: [Inline] -> [Citation] -> [Citation]
+addSuffix s ks@(_:_) =
+ let k = last ks
+ in init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
+addSuffix _ _ = []
+
+simpleCiteArgs :: forall m . PandocMonad m => LP m Inlines -> LP m [Citation]
+simpleCiteArgs inline = try $ do
+ first <- optionMaybe $ toList <$> opt
+ second <- optionMaybe $ toList <$> opt
+ keys <- try $ bgroup *> manyTill citationLabel egroup
+ let (pre, suf) = case (first , second ) of
+ (Just s , Nothing) -> (mempty, s )
+ (Just s , Just t ) -> (s , t )
+ _ -> (mempty, mempty)
+ conv k = Citation { citationId = k
+ , citationPrefix = []
+ , citationSuffix = []
+ , citationMode = NormalCitation
+ , citationHash = 0
+ , citationNoteNum = 0
+ }
+ return $ addPrefix pre $ addSuffix suf $ map conv keys
+ where
+ opt :: PandocMonad m => LP m Inlines
+ opt = do
+ toks <- try (sp *> bracketedToks <* sp)
+ -- now parse the toks as inlines
+ st <- getState
+ parsed <- lift $
+ runParserT (mconcat <$> many inline) st "bracketed option" toks
+ case parsed of
+ Right result -> return result
+ Left e -> throwError $ PandocParsecError (toSources toks) e
+
+
+
+citationLabel :: PandocMonad m => LP m Text
+citationLabel = do
+ sp
+ untokenize <$>
+ (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar)
+ <* sp
+ <* optional (symbol ',')
+ <* sp)
+ where bibtexKeyChar = ".:;?!`'()/*@_+=-&[]" :: [Char]
+
+cites :: PandocMonad m
+ => LP m Inlines -> CitationMode -> Bool -> LP m [Citation]
+cites inline mode multi = try $ do
+ let paropt = parenWrapped inline
+ cits <- if multi
+ then do
+ multiprenote <- optionMaybe $ toList <$> paropt
+ multipostnote <- optionMaybe $ toList <$> paropt
+ let (pre, suf) = case (multiprenote, multipostnote) of
+ (Just s , Nothing) -> (mempty, s)
+ (Nothing , Just t) -> (mempty, t)
+ (Just s , Just t ) -> (s, t)
+ _ -> (mempty, mempty)
+ tempCits <- many1 $ simpleCiteArgs inline
+ case tempCits of
+ (k:ks) -> case ks of
+ (_:_) -> return $ (addMprenote pre k : init ks) ++
+ [addMpostnote suf (last ks)]
+ _ -> return [addMprenote pre (addMpostnote suf k)]
+ _ -> return [[]]
+ else count 1 $ simpleCiteArgs inline
+ let cs = concat cits
+ return $ case mode of
+ AuthorInText -> case cs of
+ (c:rest) -> c {citationMode = mode} : rest
+ [] -> []
+ _ -> map (\a -> a {citationMode = mode}) cs
+ where mprenote (k:ks) = (k:ks) ++ [Space]
+ mprenote _ = mempty
+ mpostnote (k:ks) = [Str ",", Space] ++ (k:ks)
+ mpostnote _ = mempty
+ addMprenote mpn (k:ks) =
+ let mpnfinal = case citationPrefix k of
+ (_:_) -> mprenote mpn
+ _ -> mpn
+ in addPrefix mpnfinal (k:ks)
+ addMprenote _ _ = []
+ addMpostnote = addSuffix . mpostnote
+
+citationWith :: PandocMonad m
+ => LP m Inlines -> Text -> CitationMode -> Bool -> LP m Inlines
+citationWith inline name mode multi = do
+ (c,raw) <- withRaw $ cites inline mode multi
+ return $ cite c (rawInline "latex" $ "\\" <> name <> untokenize raw)
+
+handleCitationPart :: Inlines -> [Citation]
+handleCitationPart ils =
+ let isCite Cite{} = True
+ isCite _ = False
+ (pref, rest) = break isCite (toList ils)
+ in case rest of
+ (Cite cs _:suff) -> addPrefix pref $ addSuffix suff cs
+ _ -> []
+
+complexNatbibCitation :: PandocMonad m
+ => LP m Inlines -> CitationMode -> LP m Inlines
+complexNatbibCitation inline mode = try $ do
+ (cs, raw) <-
+ withRaw $ concat <$> do
+ bgroup
+ items <- mconcat <$>
+ many1 (notFollowedBy (symbol ';') >> inline)
+ `sepBy1` symbol ';'
+ egroup
+ return $ map handleCitationPart items
+ case cs of
+ [] -> mzero
+ (c:cits) -> return $ cite (c{ citationMode = mode }:cits)
+ (rawInline "latex" $ "\\citetext" <> untokenize raw)
+
+inNote :: Inlines -> Inlines
+inNote ils =
+ note $ para $ ils <> str "."
+
diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
new file mode 100644
index 000000000..7b8bca4af
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
@@ -0,0 +1,397 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ViewPatterns #-}
+{- |
+ Module : Text.Pandoc.Readers.LaTeX.Inline
+ Copyright : Copyright (C) 2006-2021 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+-}
+module Text.Pandoc.Readers.LaTeX.Inline
+ ( acronymCommands
+ , verbCommands
+ , charCommands
+ , accentCommands
+ , nameCommands
+ , biblatexInlineCommands
+ , refCommands
+ , rawInlineOr
+ , listingsLanguage
+ )
+where
+
+import qualified Data.Map as M
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.Pandoc.Builder
+import Text.Pandoc.Shared (toRomanNumeral, safeRead)
+import Text.Pandoc.Readers.LaTeX.Types (Tok (..), TokType (..))
+import Control.Applicative (optional, (<|>))
+import Control.Monad (guard, mzero, mplus, unless)
+import Text.Pandoc.Class.PandocMonad (PandocMonad (..), translateTerm)
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Extensions (extensionEnabled, Extension(..))
+import Text.Pandoc.Parsing (getOption, updateState, getState, notFollowedBy,
+ manyTill, getInput, setInput, incSourceColumn,
+ option, many1, try)
+import Data.Char (isDigit)
+import Text.Pandoc.Highlighting (fromListingsLanguage,)
+import Data.Maybe (maybeToList, fromMaybe)
+import Text.Pandoc.Options (ReaderOptions(..))
+import qualified Data.Text.Normalize as Normalize
+import qualified Text.Pandoc.Translations as Translations
+
+rawInlineOr :: PandocMonad m => Text -> LP m Inlines -> LP m Inlines
+rawInlineOr name' fallback = do
+ parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions
+ if parseRaw
+ then rawInline "latex" <$> getRawCommand name' ("\\" <> name')
+ else fallback
+
+dolabel :: PandocMonad m => LP m Inlines
+dolabel = do
+ v <- braced
+ let refstr = untokenize v
+ updateState $ \st ->
+ st{ sLastLabel = Just refstr }
+ return $ spanWith (refstr,[],[("label", refstr)])
+ $ inBrackets $ str $ untokenize v
+
+doref :: PandocMonad m => Text -> LP m Inlines
+doref cls = do
+ v <- braced
+ let refstr = untokenize v
+ return $ linkWith ("",[],[ ("reference-type", cls)
+ , ("reference", refstr)])
+ ("#" <> refstr)
+ ""
+ (inBrackets $ str refstr)
+
+inBrackets :: Inlines -> Inlines
+inBrackets x = str "[" <> x <> str "]"
+
+doTerm :: PandocMonad m => Translations.Term -> LP m Inlines
+doTerm term = str <$> translateTerm term
+
+lit :: Text -> LP m Inlines
+lit = pure . str
+
+doverb :: PandocMonad m => LP m Inlines
+doverb = do
+ Tok _ Symbol t <- anySymbol
+ marker <- case T.uncons t of
+ Just (c, ts) | T.null ts -> return c
+ _ -> mzero
+ withVerbatimMode $
+ code . untokenize <$>
+ manyTill (notFollowedBy newlineTok >> verbTok marker) (symbol marker)
+
+verbTok :: PandocMonad m => Char -> LP m Tok
+verbTok stopchar = do
+ t@(Tok pos toktype txt) <- anyTok
+ case T.findIndex (== stopchar) txt of
+ Nothing -> return t
+ Just i -> do
+ let (t1, t2) = T.splitAt i txt
+ inp <- getInput
+ setInput $ Tok (incSourceColumn pos i) Symbol (T.singleton stopchar)
+ : totoks (incSourceColumn pos (i + 1)) (T.drop 1 t2) ++ inp
+ return $ Tok pos toktype t1
+
+listingsLanguage :: [(Text, Text)] -> Maybe Text
+listingsLanguage opts =
+ case lookup "language" opts of
+ Nothing -> Nothing
+ Just l -> fromListingsLanguage l `mplus` Just l
+
+dolstinline :: PandocMonad m => LP m Inlines
+dolstinline = do
+ options <- option [] keyvals
+ let classes = maybeToList $ listingsLanguage options
+ doinlinecode classes
+
+domintinline :: PandocMonad m => LP m Inlines
+domintinline = do
+ skipopts
+ cls <- untokenize <$> braced
+ doinlinecode [cls]
+
+doinlinecode :: PandocMonad m => [Text] -> LP m Inlines
+doinlinecode classes = do
+ Tok _ Symbol t <- anySymbol
+ marker <- case T.uncons t of
+ Just (c, ts) | T.null ts -> return c
+ _ -> mzero
+ let stopchar = if marker == '{' then '}' else marker
+ withVerbatimMode $
+ codeWith ("",classes,[]) . T.map nlToSpace . untokenize <$>
+ manyTill (verbTok stopchar) (symbol stopchar)
+
+nlToSpace :: Char -> Char
+nlToSpace '\n' = ' '
+nlToSpace x = x
+
+romanNumeralUpper :: (PandocMonad m) => LP m Inlines
+romanNumeralUpper =
+ str . toRomanNumeral <$> romanNumeralArg
+
+romanNumeralLower :: (PandocMonad m) => LP m Inlines
+romanNumeralLower =
+ str . T.toLower . toRomanNumeral <$> romanNumeralArg
+
+romanNumeralArg :: (PandocMonad m) => LP m Int
+romanNumeralArg = spaces *> (parser <|> inBraces)
+ where
+ inBraces = do
+ symbol '{'
+ spaces
+ res <- parser
+ spaces
+ symbol '}'
+ return res
+ parser = do
+ s <- untokenize <$> many1 (satisfyTok isWordTok)
+ let (digits, rest) = T.span isDigit s
+ unless (T.null rest) $
+ Prelude.fail "Non-digits in argument to \\Rn or \\RN"
+ safeRead digits
+
+accentWith :: PandocMonad m
+ => LP m Inlines -> Char -> Maybe Char -> LP m Inlines
+accentWith tok combiningAccent fallBack = try $ do
+ ils <- tok
+ case toList ils of
+ (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $
+ -- try to normalize to the combined character:
+ Str (Normalize.normalize Normalize.NFC
+ (T.pack [x, combiningAccent]) <> xs) : ys
+ [Space] -> return $ str $ T.singleton
+ $ fromMaybe combiningAccent fallBack
+ [] -> return $ str $ T.singleton
+ $ fromMaybe combiningAccent fallBack
+ _ -> return ils
+
+
+verbCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+verbCommands = M.fromList
+ [ ("verb", doverb)
+ , ("lstinline", dolstinline)
+ , ("mintinline", domintinline)
+ , ("Verb", doverb)
+ ]
+
+accentCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines)
+accentCommands tok =
+ let accent = accentWith tok
+ in M.fromList
+ [ ("aa", lit "å")
+ , ("AA", lit "Å")
+ , ("ss", lit "ß")
+ , ("o", lit "ø")
+ , ("O", lit "Ø")
+ , ("L", lit "Ł")
+ , ("l", lit "ł")
+ , ("ae", lit "æ")
+ , ("AE", lit "Æ")
+ , ("oe", lit "œ")
+ , ("OE", lit "Œ")
+ , ("pounds", lit "£")
+ , ("euro", lit "€")
+ , ("copyright", lit "©")
+ , ("textasciicircum", lit "^")
+ , ("textasciitilde", lit "~")
+ , ("H", accent '\779' Nothing) -- hungarumlaut
+ , ("`", accent '\768' (Just '`')) -- grave
+ , ("'", accent '\769' (Just '\'')) -- acute
+ , ("^", accent '\770' (Just '^')) -- circ
+ , ("~", accent '\771' (Just '~')) -- tilde
+ , ("\"", accent '\776' Nothing) -- umlaut
+ , (".", accent '\775' Nothing) -- dot
+ , ("=", accent '\772' Nothing) -- macron
+ , ("|", accent '\781' Nothing) -- vertical line above
+ , ("b", accent '\817' Nothing) -- macron below
+ , ("c", accent '\807' Nothing) -- cedilla
+ , ("G", accent '\783' Nothing) -- doublegrave
+ , ("h", accent '\777' Nothing) -- hookabove
+ , ("d", accent '\803' Nothing) -- dotbelow
+ , ("f", accent '\785' Nothing) -- inverted breve
+ , ("r", accent '\778' Nothing) -- ringabove
+ , ("t", accent '\865' Nothing) -- double inverted breve
+ , ("U", accent '\782' Nothing) -- double vertical line above
+ , ("v", accent '\780' Nothing) -- hacek
+ , ("u", accent '\774' Nothing) -- breve
+ , ("k", accent '\808' Nothing) -- ogonek
+ , ("textogonekcentered", accent '\808' Nothing) -- ogonek
+ , ("i", lit "ı") -- dotless i
+ , ("j", lit "ȷ") -- dotless j
+ , ("newtie", accent '\785' Nothing) -- inverted breve
+ , ("textcircled", accent '\8413' Nothing) -- combining circle
+ ]
+
+charCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+charCommands = M.fromList
+ [ ("ldots", lit "…")
+ , ("vdots", lit "\8942")
+ , ("dots", lit "…")
+ , ("mdots", lit "…")
+ , ("sim", lit "~")
+ , ("sep", lit ",")
+ , ("P", lit "¶")
+ , ("S", lit "§")
+ , ("$", lit "$")
+ , ("%", lit "%")
+ , ("&", lit "&")
+ , ("#", lit "#")
+ , ("_", lit "_")
+ , ("{", lit "{")
+ , ("}", lit "}")
+ , ("qed", lit "\a0\x25FB")
+ , ("lq", return (str "‘"))
+ , ("rq", return (str "’"))
+ , ("textquoteleft", return (str "‘"))
+ , ("textquoteright", return (str "’"))
+ , ("textquotedblleft", return (str "“"))
+ , ("textquotedblright", return (str "”"))
+ , ("/", pure mempty) -- italic correction
+ , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState
+ guard $ not inTableCell
+ optional rawopt
+ spaces))
+ , (",", lit "\8198")
+ , ("@", pure mempty)
+ , (" ", lit "\160")
+ , ("ps", pure $ str "PS." <> space)
+ , ("TeX", lit "TeX")
+ , ("LaTeX", lit "LaTeX")
+ , ("bar", lit "|")
+ , ("textless", lit "<")
+ , ("textgreater", lit ">")
+ , ("textbackslash", lit "\\")
+ , ("backslash", lit "\\")
+ , ("slash", lit "/")
+ -- fontawesome
+ , ("faCheck", lit "\10003")
+ , ("faClose", lit "\10007")
+ -- hyphenat
+ , ("bshyp", lit "\\\173")
+ , ("fshyp", lit "/\173")
+ , ("dothyp", lit ".\173")
+ , ("colonhyp", lit ":\173")
+ , ("hyp", lit "-")
+ ]
+
+biblatexInlineCommands :: PandocMonad m
+ => LP m Inlines -> M.Map Text (LP m Inlines)
+biblatexInlineCommands tok = M.fromList
+ -- biblatex misc
+ [ ("RN", romanNumeralUpper)
+ , ("Rn", romanNumeralLower)
+ , ("mkbibquote", spanWith nullAttr . doubleQuoted <$> tok)
+ , ("mkbibemph", spanWith nullAttr . emph <$> tok)
+ , ("mkbibitalic", spanWith nullAttr . emph <$> tok)
+ , ("mkbibbold", spanWith nullAttr . strong <$> tok)
+ , ("mkbibparens",
+ spanWith nullAttr . (\x -> str "(" <> x <> str ")") <$> tok)
+ , ("mkbibbrackets",
+ spanWith nullAttr . (\x -> str "[" <> x <> str "]") <$> tok)
+ , ("autocap", spanWith nullAttr <$> tok)
+ , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok)
+ , ("bibstring",
+ (\x -> spanWith ("",[],[("bibstring",x)]) (str x)) . untokenize
+ <$> braced)
+ , ("adddot", pure (str "."))
+ , ("adddotspace", pure (spanWith nullAttr (str "." <> space)))
+ , ("addabbrvspace", pure space)
+ , ("hyphen", pure (str "-"))
+ ]
+
+nameCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+nameCommands = M.fromList
+ [ ("figurename", doTerm Translations.Figure)
+ , ("prefacename", doTerm Translations.Preface)
+ , ("refname", doTerm Translations.References)
+ , ("bibname", doTerm Translations.Bibliography)
+ , ("chaptername", doTerm Translations.Chapter)
+ , ("partname", doTerm Translations.Part)
+ , ("contentsname", doTerm Translations.Contents)
+ , ("listfigurename", doTerm Translations.ListOfFigures)
+ , ("listtablename", doTerm Translations.ListOfTables)
+ , ("indexname", doTerm Translations.Index)
+ , ("abstractname", doTerm Translations.Abstract)
+ , ("tablename", doTerm Translations.Table)
+ , ("enclname", doTerm Translations.Encl)
+ , ("ccname", doTerm Translations.Cc)
+ , ("headtoname", doTerm Translations.To)
+ , ("pagename", doTerm Translations.Page)
+ , ("seename", doTerm Translations.See)
+ , ("seealsoname", doTerm Translations.SeeAlso)
+ , ("proofname", doTerm Translations.Proof)
+ , ("glossaryname", doTerm Translations.Glossary)
+ , ("lstlistingname", doTerm Translations.Listing)
+ ]
+
+refCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+refCommands = M.fromList
+ [ ("label", rawInlineOr "label" dolabel)
+ , ("ref", rawInlineOr "ref" $ doref "ref")
+ , ("cref", rawInlineOr "cref" $ doref "ref") -- from cleveref.sty
+ , ("vref", rawInlineOr "vref" $ doref "ref+page") -- from varioref.sty
+ , ("eqref", rawInlineOr "eqref" $ doref "eqref") -- from amsmath.sty
+ ]
+
+acronymCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+acronymCommands = M.fromList
+ -- glossaries package
+ [ ("gls", doAcronym "short")
+ , ("Gls", doAcronym "short")
+ , ("glsdesc", doAcronym "long")
+ , ("Glsdesc", doAcronym "long")
+ , ("GLSdesc", doAcronym "long")
+ , ("acrlong", doAcronym "long")
+ , ("Acrlong", doAcronym "long")
+ , ("acrfull", doAcronym "full")
+ , ("Acrfull", doAcronym "full")
+ , ("acrshort", doAcronym "abbrv")
+ , ("Acrshort", doAcronym "abbrv")
+ , ("glspl", doAcronymPlural "short")
+ , ("Glspl", doAcronymPlural "short")
+ , ("glsdescplural", doAcronymPlural "long")
+ , ("Glsdescplural", doAcronymPlural "long")
+ , ("GLSdescplural", doAcronymPlural "long")
+ -- acronyms package
+ , ("ac", doAcronym "short")
+ , ("acf", doAcronym "full")
+ , ("acs", doAcronym "abbrv")
+ , ("acl", doAcronym "long")
+ , ("acp", doAcronymPlural "short")
+ , ("acfp", doAcronymPlural "full")
+ , ("acsp", doAcronymPlural "abbrv")
+ , ("aclp", doAcronymPlural "long")
+ , ("Ac", doAcronym "short")
+ , ("Acf", doAcronym "full")
+ , ("Acs", doAcronym "abbrv")
+ , ("Acl", doAcronym "long")
+ , ("Acp", doAcronymPlural "short")
+ , ("Acfp", doAcronymPlural "full")
+ , ("Acsp", doAcronymPlural "abbrv")
+ , ("Aclp", doAcronymPlural "long")
+ ]
+
+doAcronym :: PandocMonad m => Text -> LP m Inlines
+doAcronym form = do
+ acro <- braced
+ return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro),
+ ("acronym-form", "singular+" <> form)])
+ $ str $ untokenize acro]
+
+doAcronymPlural :: PandocMonad m => Text -> LP m Inlines
+doAcronymPlural form = do
+ acro <- braced
+ let plural = str "s"
+ return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro),
+ ("acronym-form", "plural+" <> form)]) $
+ mconcat [str $ untokenize acro, plural]]
+
+
diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
index 814b2fe79..6a8327904 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
@@ -1,7 +1,7 @@
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.LaTeX.Lang
- Copyright : Copyright (C) 2018-2020 John MacFarlane
+ Copyright : Copyright (C) 2018-2021 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -12,144 +12,223 @@ Functions for parsing polyglossia and babel language specifiers to
BCP47 'Lang'.
-}
module Text.Pandoc.Readers.LaTeX.Lang
- ( polyglossiaLangToBCP47
+ ( setDefaultLanguage
+ , polyglossiaLangToBCP47
, babelLangToBCP47
+ , enquoteCommands
+ , inlineLanguageCommands
)
where
import qualified Data.Map as M
+import Data.Text (Text)
import qualified Data.Text as T
-import Text.Pandoc.BCP47 (Lang(..))
+import Text.Pandoc.Shared (extractSpaces)
+import Text.Collate.Lang (Lang(..), renderLang)
+import Text.Pandoc.Class (PandocMonad(..), setTranslations)
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Parsing (updateState, option, getState, QuoteContext(..),
+ withQuoteContext)
+import Text.Pandoc.Builder (Blocks, Inlines, setMeta, str, spanWith,
+ singleQuoted, doubleQuoted)
+
+enquote :: PandocMonad m
+ => LP m Inlines
+ -> Bool -> Maybe Text -> LP m Inlines
+enquote tok starred mblang = do
+ skipopts
+ let lang = mblang >>= babelLangToBCP47
+ let langspan = case lang of
+ Nothing -> id
+ Just l -> spanWith ("",[],[("lang", renderLang l)])
+ quoteContext <- sQuoteContext <$> getState
+ if starred || quoteContext == InDoubleQuote
+ then singleQuoted . langspan <$> withQuoteContext InSingleQuote tok
+ else doubleQuoted . langspan <$> withQuoteContext InDoubleQuote tok
+
+enquoteCommands :: PandocMonad m
+ => LP m Inlines -> M.Map Text (LP m Inlines)
+enquoteCommands tok = M.fromList
+ [ ("enquote*", enquote tok True Nothing)
+ , ("enquote", enquote tok False Nothing)
+ -- foreignquote is supposed to use native quote marks
+ , ("foreignquote*", braced >>= enquote tok True . Just . untokenize)
+ , ("foreignquote", braced >>= enquote tok False . Just . untokenize)
+ -- hypehnquote uses regular quotes
+ , ("hyphenquote*", braced >>= enquote tok True . Just . untokenize)
+ , ("hyphenquote", braced >>= enquote tok False . Just . untokenize)
+ ]
+
+foreignlanguage :: PandocMonad m => LP m Inlines -> LP m Inlines
+foreignlanguage tok = do
+ babelLang <- untokenize <$> braced
+ case babelLangToBCP47 babelLang of
+ Just lang -> spanWith ("", [], [("lang", renderLang lang)]) <$> tok
+ _ -> tok
+
+inlineLanguageCommands :: PandocMonad m
+ => LP m Inlines -> M.Map Text (LP m Inlines)
+inlineLanguageCommands tok =
+ M.fromList $
+ ("foreignlanguage", foreignlanguage tok) :
+ (mk <$> M.toList polyglossiaLangToBCP47)
+ where
+ mk (polyglossia, bcp47Func) =
+ ("text" <> polyglossia, inlineLanguage tok bcp47Func)
+
+inlineLanguage :: PandocMonad m
+ => LP m Inlines -> (Text -> Lang) -> LP m Inlines
+inlineLanguage tok bcp47Func = do
+ o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
+ <$> rawopt
+ let lang = renderLang $ bcp47Func o
+ extractSpaces (spanWith ("", [], [("lang", lang)])) <$> tok
+
+setDefaultLanguage :: PandocMonad m => LP m Blocks
+setDefaultLanguage = do
+ o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
+ <$> rawopt
+ polylang <- untokenize <$> braced
+ case M.lookup polylang polyglossiaLangToBCP47 of
+ Nothing -> return mempty -- TODO mzero? warning?
+ Just langFunc -> do
+ let l = langFunc o
+ setTranslations l
+ updateState $ setMeta "lang" $ str (renderLang l)
+ return mempty
polyglossiaLangToBCP47 :: M.Map T.Text (T.Text -> Lang)
polyglossiaLangToBCP47 = M.fromList
[ ("arabic", \o -> case T.filter (/=' ') o of
- "locale=algeria" -> Lang "ar" "" "DZ" []
- "locale=mashriq" -> Lang "ar" "" "SY" []
- "locale=libya" -> Lang "ar" "" "LY" []
- "locale=morocco" -> Lang "ar" "" "MA" []
- "locale=mauritania" -> Lang "ar" "" "MR" []
- "locale=tunisia" -> Lang "ar" "" "TN" []
- _ -> Lang "ar" "" "" [])
+ "locale=algeria" -> Lang "ar" Nothing (Just "DZ") [] [] []
+ "locale=mashriq" -> Lang "ar" Nothing (Just "SY") [] [] []
+ "locale=libya" -> Lang "ar" Nothing (Just "LY") [] [] []
+ "locale=morocco" -> Lang "ar" Nothing (Just "MA") [] [] []
+ "locale=mauritania" -> Lang "ar" Nothing (Just "MR") [] [] []
+ "locale=tunisia" -> Lang "ar" Nothing (Just "TN") [] [] []
+ _ -> Lang "ar" Nothing (Just "") [] [] [])
, ("german", \o -> case T.filter (/=' ') o of
- "spelling=old" -> Lang "de" "" "DE" ["1901"]
+ "spelling=old" -> Lang "de" Nothing (Just "DE") ["1901"] [] []
"variant=austrian,spelling=old"
- -> Lang "de" "" "AT" ["1901"]
- "variant=austrian" -> Lang "de" "" "AT" []
+ -> Lang "de" Nothing (Just "AT") ["1901"] [] []
+ "variant=austrian" -> Lang "de" Nothing (Just "AT") [] [] []
"variant=swiss,spelling=old"
- -> Lang "de" "" "CH" ["1901"]
- "variant=swiss" -> Lang "de" "" "CH" []
- _ -> Lang "de" "" "" [])
- , ("lsorbian", \_ -> Lang "dsb" "" "" [])
+ -> Lang "de" Nothing (Just "CH") ["1901"] [] []
+ "variant=swiss" -> Lang "de" Nothing (Just "CH") [] [] []
+ _ -> Lang "de" Nothing Nothing [] [] [])
+ , ("lsorbian", \_ -> Lang "dsb" Nothing Nothing [] [] [])
, ("greek", \o -> case T.filter (/=' ') o of
- "variant=poly" -> Lang "el" "" "polyton" []
- "variant=ancient" -> Lang "grc" "" "" []
- _ -> Lang "el" "" "" [])
+ "variant=poly" -> Lang "el" Nothing (Just "polyton") [] [] []
+ "variant=ancient" -> Lang "grc" Nothing Nothing [] [] []
+ _ -> Lang "el" Nothing Nothing [] [] [])
, ("english", \o -> case T.filter (/=' ') o of
- "variant=australian" -> Lang "en" "" "AU" []
- "variant=canadian" -> Lang "en" "" "CA" []
- "variant=british" -> Lang "en" "" "GB" []
- "variant=newzealand" -> Lang "en" "" "NZ" []
- "variant=american" -> Lang "en" "" "US" []
- _ -> Lang "en" "" "" [])
- , ("usorbian", \_ -> Lang "hsb" "" "" [])
+ "variant=australian" -> Lang "en" Nothing (Just "AU") [] [] []
+ "variant=canadian" -> Lang "en" Nothing (Just "CA") [] [] []
+ "variant=british" -> Lang "en" Nothing (Just "GB") [] [] []
+ "variant=newzealand" -> Lang "en" Nothing (Just "NZ") [] [] []
+ "variant=american" -> Lang "en" Nothing (Just "US") [] [] []
+ _ -> Lang "en" Nothing (Just "") [] [] [])
+ , ("usorbian", \_ -> Lang "hsb" Nothing Nothing [] [] [])
, ("latin", \o -> case T.filter (/=' ') o of
- "variant=classic" -> Lang "la" "" "" ["x-classic"]
- _ -> Lang "la" "" "" [])
- , ("slovenian", \_ -> Lang "sl" "" "" [])
- , ("serbianc", \_ -> Lang "sr" "cyrl" "" [])
- , ("pinyin", \_ -> Lang "zh" "Latn" "" ["pinyin"])
- , ("afrikaans", \_ -> Lang "af" "" "" [])
- , ("amharic", \_ -> Lang "am" "" "" [])
- , ("assamese", \_ -> Lang "as" "" "" [])
- , ("asturian", \_ -> Lang "ast" "" "" [])
- , ("bulgarian", \_ -> Lang "bg" "" "" [])
- , ("bengali", \_ -> Lang "bn" "" "" [])
- , ("tibetan", \_ -> Lang "bo" "" "" [])
- , ("breton", \_ -> Lang "br" "" "" [])
- , ("catalan", \_ -> Lang "ca" "" "" [])
- , ("welsh", \_ -> Lang "cy" "" "" [])
- , ("czech", \_ -> Lang "cs" "" "" [])
- , ("coptic", \_ -> Lang "cop" "" "" [])
- , ("danish", \_ -> Lang "da" "" "" [])
- , ("divehi", \_ -> Lang "dv" "" "" [])
- , ("esperanto", \_ -> Lang "eo" "" "" [])
- , ("spanish", \_ -> Lang "es" "" "" [])
- , ("estonian", \_ -> Lang "et" "" "" [])
- , ("basque", \_ -> Lang "eu" "" "" [])
- , ("farsi", \_ -> Lang "fa" "" "" [])
- , ("finnish", \_ -> Lang "fi" "" "" [])
- , ("french", \_ -> Lang "fr" "" "" [])
- , ("friulan", \_ -> Lang "fur" "" "" [])
- , ("irish", \_ -> Lang "ga" "" "" [])
- , ("scottish", \_ -> Lang "gd" "" "" [])
- , ("ethiopic", \_ -> Lang "gez" "" "" [])
- , ("galician", \_ -> Lang "gl" "" "" [])
- , ("hebrew", \_ -> Lang "he" "" "" [])
- , ("hindi", \_ -> Lang "hi" "" "" [])
- , ("croatian", \_ -> Lang "hr" "" "" [])
- , ("magyar", \_ -> Lang "hu" "" "" [])
- , ("armenian", \_ -> Lang "hy" "" "" [])
- , ("interlingua", \_ -> Lang "ia" "" "" [])
- , ("indonesian", \_ -> Lang "id" "" "" [])
- , ("icelandic", \_ -> Lang "is" "" "" [])
- , ("italian", \_ -> Lang "it" "" "" [])
- , ("japanese", \_ -> Lang "jp" "" "" [])
- , ("khmer", \_ -> Lang "km" "" "" [])
- , ("kurmanji", \_ -> Lang "kmr" "" "" [])
- , ("kannada", \_ -> Lang "kn" "" "" [])
- , ("korean", \_ -> Lang "ko" "" "" [])
- , ("lao", \_ -> Lang "lo" "" "" [])
- , ("lithuanian", \_ -> Lang "lt" "" "" [])
- , ("latvian", \_ -> Lang "lv" "" "" [])
- , ("malayalam", \_ -> Lang "ml" "" "" [])
- , ("mongolian", \_ -> Lang "mn" "" "" [])
- , ("marathi", \_ -> Lang "mr" "" "" [])
- , ("dutch", \_ -> Lang "nl" "" "" [])
- , ("nynorsk", \_ -> Lang "nn" "" "" [])
- , ("norsk", \_ -> Lang "no" "" "" [])
- , ("nko", \_ -> Lang "nqo" "" "" [])
- , ("occitan", \_ -> Lang "oc" "" "" [])
- , ("panjabi", \_ -> Lang "pa" "" "" [])
- , ("polish", \_ -> Lang "pl" "" "" [])
- , ("piedmontese", \_ -> Lang "pms" "" "" [])
- , ("portuguese", \_ -> Lang "pt" "" "" [])
- , ("romansh", \_ -> Lang "rm" "" "" [])
- , ("romanian", \_ -> Lang "ro" "" "" [])
- , ("russian", \_ -> Lang "ru" "" "" [])
- , ("sanskrit", \_ -> Lang "sa" "" "" [])
- , ("samin", \_ -> Lang "se" "" "" [])
- , ("slovak", \_ -> Lang "sk" "" "" [])
- , ("albanian", \_ -> Lang "sq" "" "" [])
- , ("serbian", \_ -> Lang "sr" "" "" [])
- , ("swedish", \_ -> Lang "sv" "" "" [])
- , ("syriac", \_ -> Lang "syr" "" "" [])
- , ("tamil", \_ -> Lang "ta" "" "" [])
- , ("telugu", \_ -> Lang "te" "" "" [])
- , ("thai", \_ -> Lang "th" "" "" [])
- , ("turkmen", \_ -> Lang "tk" "" "" [])
- , ("turkish", \_ -> Lang "tr" "" "" [])
- , ("ukrainian", \_ -> Lang "uk" "" "" [])
- , ("urdu", \_ -> Lang "ur" "" "" [])
- , ("vietnamese", \_ -> Lang "vi" "" "" [])
+ "variant=classic" -> Lang "la" Nothing Nothing ["x-classic"] [] []
+ _ -> Lang "la" Nothing Nothing [] [] [])
+ , ("slovenian", \_ -> Lang "sl" Nothing Nothing [] [] [])
+ , ("serbianc", \_ -> Lang "sr" (Just "Cyrl") Nothing [] [] [])
+ , ("pinyin", \_ -> Lang "zh" (Just "Latn") Nothing ["pinyin"] [] [])
+ , ("afrikaans", \_ -> simpleLang "af")
+ , ("amharic", \_ -> simpleLang "am")
+ , ("assamese", \_ -> simpleLang "as")
+ , ("asturian", \_ -> simpleLang "ast")
+ , ("bulgarian", \_ -> simpleLang "bg")
+ , ("bengali", \_ -> simpleLang "bn")
+ , ("tibetan", \_ -> simpleLang "bo")
+ , ("breton", \_ -> simpleLang "br")
+ , ("catalan", \_ -> simpleLang "ca")
+ , ("welsh", \_ -> simpleLang "cy")
+ , ("czech", \_ -> simpleLang "cs")
+ , ("coptic", \_ -> simpleLang "cop")
+ , ("danish", \_ -> simpleLang "da")
+ , ("divehi", \_ -> simpleLang "dv")
+ , ("esperanto", \_ -> simpleLang "eo")
+ , ("spanish", \_ -> simpleLang "es")
+ , ("estonian", \_ -> simpleLang "et")
+ , ("basque", \_ -> simpleLang "eu")
+ , ("farsi", \_ -> simpleLang "fa")
+ , ("finnish", \_ -> simpleLang "fi")
+ , ("french", \_ -> simpleLang "fr")
+ , ("friulan", \_ -> simpleLang "fur")
+ , ("irish", \_ -> simpleLang "ga")
+ , ("scottish", \_ -> simpleLang "gd")
+ , ("ethiopic", \_ -> simpleLang "gez")
+ , ("galician", \_ -> simpleLang "gl")
+ , ("hebrew", \_ -> simpleLang "he")
+ , ("hindi", \_ -> simpleLang "hi")
+ , ("croatian", \_ -> simpleLang "hr")
+ , ("magyar", \_ -> simpleLang "hu")
+ , ("armenian", \_ -> simpleLang "hy")
+ , ("interlingua", \_ -> simpleLang "ia")
+ , ("indonesian", \_ -> simpleLang "id")
+ , ("icelandic", \_ -> simpleLang "is")
+ , ("italian", \_ -> simpleLang "it")
+ , ("japanese", \_ -> simpleLang "jp")
+ , ("khmer", \_ -> simpleLang "km")
+ , ("kurmanji", \_ -> simpleLang "kmr")
+ , ("kannada", \_ -> simpleLang "kn")
+ , ("korean", \_ -> simpleLang "ko")
+ , ("lao", \_ -> simpleLang "lo")
+ , ("lithuanian", \_ -> simpleLang "lt")
+ , ("latvian", \_ -> simpleLang "lv")
+ , ("malayalam", \_ -> simpleLang "ml")
+ , ("mongolian", \_ -> simpleLang "mn")
+ , ("marathi", \_ -> simpleLang "mr")
+ , ("dutch", \_ -> simpleLang "nl")
+ , ("nynorsk", \_ -> simpleLang "nn")
+ , ("norsk", \_ -> simpleLang "no")
+ , ("nko", \_ -> simpleLang "nqo")
+ , ("occitan", \_ -> simpleLang "oc")
+ , ("panjabi", \_ -> simpleLang "pa")
+ , ("polish", \_ -> simpleLang "pl")
+ , ("piedmontese", \_ -> simpleLang "pms")
+ , ("portuguese", \_ -> simpleLang "pt")
+ , ("romansh", \_ -> simpleLang "rm")
+ , ("romanian", \_ -> simpleLang "ro")
+ , ("russian", \_ -> simpleLang "ru")
+ , ("sanskrit", \_ -> simpleLang "sa")
+ , ("samin", \_ -> simpleLang "se")
+ , ("slovak", \_ -> simpleLang "sk")
+ , ("albanian", \_ -> simpleLang "sq")
+ , ("serbian", \_ -> simpleLang "sr")
+ , ("swedish", \_ -> simpleLang "sv")
+ , ("syriac", \_ -> simpleLang "syr")
+ , ("tamil", \_ -> simpleLang "ta")
+ , ("telugu", \_ -> simpleLang "te")
+ , ("thai", \_ -> simpleLang "th")
+ , ("turkmen", \_ -> simpleLang "tk")
+ , ("turkish", \_ -> simpleLang "tr")
+ , ("ukrainian", \_ -> simpleLang "uk")
+ , ("urdu", \_ -> simpleLang "ur")
+ , ("vietnamese", \_ -> simpleLang "vi")
]
+simpleLang :: Text -> Lang
+simpleLang l = Lang l Nothing Nothing [] [] []
+
babelLangToBCP47 :: T.Text -> Maybe Lang
babelLangToBCP47 s =
case s of
- "austrian" -> Just $ Lang "de" "" "AT" ["1901"]
- "naustrian" -> Just $ Lang "de" "" "AT" []
- "swissgerman" -> Just $ Lang "de" "" "CH" ["1901"]
- "nswissgerman" -> Just $ Lang "de" "" "CH" []
- "german" -> Just $ Lang "de" "" "DE" ["1901"]
- "ngerman" -> Just $ Lang "de" "" "DE" []
- "lowersorbian" -> Just $ Lang "dsb" "" "" []
- "uppersorbian" -> Just $ Lang "hsb" "" "" []
- "polutonikogreek" -> Just $ Lang "el" "" "" ["polyton"]
- "slovene" -> Just $ Lang "sl" "" "" []
- "australian" -> Just $ Lang "en" "" "AU" []
- "canadian" -> Just $ Lang "en" "" "CA" []
- "british" -> Just $ Lang "en" "" "GB" []
- "newzealand" -> Just $ Lang "en" "" "NZ" []
- "american" -> Just $ Lang "en" "" "US" []
- "classiclatin" -> Just $ Lang "la" "" "" ["x-classic"]
+ "austrian" -> Just $ Lang "de" Nothing (Just "AT") ["1901"] [] []
+ "naustrian" -> Just $ Lang "de" Nothing (Just "AT") [] [] []
+ "swissgerman" -> Just $ Lang "de" Nothing (Just "CH") ["1901"] [] []
+ "nswissgerman" -> Just $ Lang "de" Nothing (Just "CH") [] [] []
+ "german" -> Just $ Lang "de" Nothing (Just "DE") ["1901"] [] []
+ "ngerman" -> Just $ Lang "de" Nothing (Just "DE") [] [] []
+ "lowersorbian" -> Just $ Lang "dsb" Nothing Nothing [] [] []
+ "uppersorbian" -> Just $ Lang "hsb" Nothing Nothing [] [] []
+ "polutonikogreek" -> Just $ Lang "el" Nothing Nothing ["polyton"] [] []
+ "slovene" -> Just $ simpleLang "sl"
+ "australian" -> Just $ Lang "en" Nothing (Just "AU") [] [] []
+ "canadian" -> Just $ Lang "en" Nothing (Just "CA") [] [] []
+ "british" -> Just $ Lang "en" Nothing (Just "GB") [] [] []
+ "newzealand" -> Just $ Lang "en" Nothing (Just "NZ") [] [] []
+ "american" -> Just $ Lang "en" Nothing (Just "US") [] [] []
+ "classiclatin" -> Just $ Lang "la" Nothing Nothing ["x-classic"] [] []
_ -> ($ "") <$> M.lookup s polyglossiaLangToBCP47
diff --git a/src/Text/Pandoc/Readers/LaTeX/Macro.hs b/src/Text/Pandoc/Readers/LaTeX/Macro.hs
new file mode 100644
index 000000000..5495a8e74
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Macro.hs
@@ -0,0 +1,184 @@
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Macro
+ ( macroDef
+ )
+where
+import Text.Pandoc.Extensions (Extension(..))
+import Text.Pandoc.Logging (LogMessage(MacroAlreadyDefined))
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Readers.LaTeX.Types
+import Text.Pandoc.Class
+import Text.Pandoc.Shared (safeRead)
+import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline,
+ optional, space, spaces, withRaw, (<|>))
+import Control.Applicative ((<|>), optional)
+import qualified Data.Map as M
+import Data.Text (Text)
+import qualified Data.Text as T
+
+macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a
+macroDef constructor = do
+ (_, s) <- withRaw (commandDef <|> environmentDef)
+ (constructor (untokenize s) <$
+ guardDisabled Ext_latex_macros)
+ <|> return mempty
+ where commandDef = do
+ nameMacroPairs <- newcommand <|> letmacro <|> defmacro <|> newif
+ guardDisabled Ext_latex_macros <|>
+ mapM_ (\(name, macro') ->
+ updateState (\s -> s{ sMacros = M.insert name macro'
+ (sMacros s) })) nameMacroPairs
+ environmentDef = do
+ mbenv <- newenvironment
+ case mbenv of
+ Nothing -> return ()
+ Just (name, macro1, macro2) ->
+ guardDisabled Ext_latex_macros <|>
+ do updateState $ \s -> s{ sMacros =
+ M.insert name macro1 (sMacros s) }
+ updateState $ \s -> s{ sMacros =
+ M.insert ("end" <> name) macro2 (sMacros s) }
+ -- @\newenvironment{envname}[n-args][default]{begin}{end}@
+ -- is equivalent to
+ -- @\newcommand{\envname}[n-args][default]{begin}@
+ -- @\newcommand{\endenvname}@
+
+letmacro :: PandocMonad m => LP m [(Text, Macro)]
+letmacro = do
+ controlSeq "let"
+ (name, contents) <- withVerbatimMode $ do
+ Tok _ (CtrlSeq name) _ <- anyControlSeq
+ optional $ symbol '='
+ spaces
+ -- we first parse in verbatim mode, and then expand macros,
+ -- because we don't want \let\foo\bar to turn into
+ -- \let\foo hello if we have previously \def\bar{hello}
+ contents <- bracedOrToken
+ return (name, contents)
+ contents' <- doMacros' 0 contents
+ return [(name, Macro ExpandWhenDefined [] Nothing contents')]
+
+defmacro :: PandocMonad m => LP m [(Text, Macro)]
+defmacro = do
+ -- we use withVerbatimMode, because macros are to be expanded
+ -- at point of use, not point of definition
+ controlSeq "def"
+ withVerbatimMode $ do
+ Tok _ (CtrlSeq name) _ <- anyControlSeq
+ argspecs <- many (argspecArg <|> argspecPattern)
+ contents <- bracedOrToken
+ return [(name, Macro ExpandWhenUsed argspecs Nothing contents)]
+
+-- \newif\iffoo' defines:
+-- \iffoo to be \iffalse
+-- \footrue to be a command that defines \iffoo to be \iftrue
+-- \foofalse to be a command that defines \iffoo to be \iffalse
+newif :: PandocMonad m => LP m [(Text, Macro)]
+newif = do
+ controlSeq "newif"
+ withVerbatimMode $ do
+ Tok pos (CtrlSeq name) _ <- anyControlSeq
+ -- \def\iffoo\iffalse
+ -- \def\footrue{\def\iffoo\iftrue}
+ -- \def\foofalse{\def\iffoo\iffalse}
+ let base = T.drop 2 name
+ return [ (name, Macro ExpandWhenUsed [] Nothing
+ [Tok pos (CtrlSeq "iffalse") "\\iffalse"])
+ , (base <> "true",
+ Macro ExpandWhenUsed [] Nothing
+ [ Tok pos (CtrlSeq "def") "\\def"
+ , Tok pos (CtrlSeq name) ("\\" <> name)
+ , Tok pos (CtrlSeq "iftrue") "\\iftrue"
+ ])
+ , (base <> "false",
+ Macro ExpandWhenUsed [] Nothing
+ [ Tok pos (CtrlSeq "def") "\\def"
+ , Tok pos (CtrlSeq name) ("\\" <> name)
+ , Tok pos (CtrlSeq "iffalse") "\\iffalse"
+ ])
+ ]
+
+argspecArg :: PandocMonad m => LP m ArgSpec
+argspecArg = do
+ Tok _ (Arg i) _ <- satisfyTok isArgTok
+ return $ ArgNum i
+
+argspecPattern :: PandocMonad m => LP m ArgSpec
+argspecPattern =
+ Pattern <$> many1 (satisfyTok (\(Tok _ toktype' txt) ->
+ (toktype' == Symbol || toktype' == Word) &&
+ (txt /= "{" && txt /= "\\" && txt /= "}")))
+
+newcommand :: PandocMonad m => LP m [(Text, Macro)]
+newcommand = do
+ Tok pos (CtrlSeq mtype) _ <- controlSeq "newcommand" <|>
+ controlSeq "renewcommand" <|>
+ controlSeq "providecommand" <|>
+ controlSeq "DeclareMathOperator" <|>
+ controlSeq "DeclareRobustCommand"
+ withVerbatimMode $ do
+ Tok _ (CtrlSeq name) txt <- do
+ optional (symbol '*')
+ anyControlSeq <|>
+ (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}')
+ spaces
+ numargs <- option 0 $ try bracketedNum
+ let argspecs = map ArgNum [1..numargs]
+ spaces
+ optarg <- option Nothing $ Just <$> try bracketedToks
+ spaces
+ contents' <- bracedOrToken
+ let contents =
+ case mtype of
+ "DeclareMathOperator" ->
+ Tok pos (CtrlSeq "mathop") "\\mathop"
+ : Tok pos Symbol "{"
+ : Tok pos (CtrlSeq "mathrm") "\\mathrm"
+ : Tok pos Symbol "{"
+ : (contents' ++
+ [ Tok pos Symbol "}", Tok pos Symbol "}" ])
+ _ -> contents'
+ macros <- sMacros <$> getState
+ case M.lookup name macros of
+ Just macro
+ | mtype == "newcommand" -> do
+ report $ MacroAlreadyDefined txt pos
+ return [(name, macro)]
+ | mtype == "providecommand" -> return [(name, macro)]
+ _ -> return [(name, Macro ExpandWhenUsed argspecs optarg contents)]
+
+newenvironment :: PandocMonad m => LP m (Maybe (Text, Macro, Macro))
+newenvironment = do
+ pos <- getPosition
+ Tok _ (CtrlSeq mtype) _ <- controlSeq "newenvironment" <|>
+ controlSeq "renewenvironment" <|>
+ controlSeq "provideenvironment"
+ withVerbatimMode $ do
+ optional $ symbol '*'
+ spaces
+ name <- untokenize <$> braced
+ spaces
+ numargs <- option 0 $ try bracketedNum
+ spaces
+ optarg <- option Nothing $ Just <$> try bracketedToks
+ let argspecs = map (\i -> ArgNum i) [1..numargs]
+ startcontents <- spaces >> bracedOrToken
+ endcontents <- spaces >> bracedOrToken
+ macros <- sMacros <$> getState
+ case M.lookup name macros of
+ Just _
+ | mtype == "newenvironment" -> do
+ report $ MacroAlreadyDefined name pos
+ return Nothing
+ | mtype == "provideenvironment" ->
+ return Nothing
+ _ -> return $ Just (name,
+ Macro ExpandWhenUsed argspecs optarg startcontents,
+ Macro ExpandWhenUsed [] Nothing endcontents)
+
+bracketedNum :: PandocMonad m => LP m Int
+bracketedNum = do
+ ds <- untokenize <$> bracketedToks
+ case safeRead ds of
+ Just i -> return i
+ _ -> return 0
diff --git a/src/Text/Pandoc/Readers/LaTeX/Math.hs b/src/Text/Pandoc/Readers/LaTeX/Math.hs
new file mode 100644
index 000000000..5b49a0376
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Math.hs
@@ -0,0 +1,221 @@
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Math
+ ( dollarsMath
+ , inlineEnvironments
+ , inlineEnvironment
+ , mathInline
+ , mathDisplay
+ , theoremstyle
+ , theoremEnvironment
+ , newtheorem
+ , proof
+ )
+where
+import Data.Maybe (fromMaybe)
+import Text.Pandoc.Walk (walk)
+import Text.Pandoc.Builder as B
+import qualified Data.Sequence as Seq
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Readers.LaTeX.Types
+import Text.Pandoc.Class
+import Text.Pandoc.Shared (trimMath, stripTrailingNewlines)
+import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline,
+ optional, space, spaces, withRaw, (<|>))
+import Control.Applicative ((<|>), optional)
+import Control.Monad (guard, mzero)
+import qualified Data.Map as M
+import Data.Text (Text)
+
+dollarsMath :: PandocMonad m => LP m Inlines
+dollarsMath = do
+ symbol '$'
+ display <- option False (True <$ symbol '$')
+ (do contents <- try $ untokenize <$> pDollarsMath 0
+ if display
+ then mathDisplay contents <$ symbol '$'
+ else return $ mathInline contents)
+ <|> (guard display >> return (mathInline ""))
+
+-- Int is number of embedded groupings
+pDollarsMath :: PandocMonad m => Int -> LP m [Tok]
+pDollarsMath n = do
+ tk@(Tok _ toktype t) <- anyTok
+ case toktype of
+ Symbol | t == "$"
+ , n == 0 -> return []
+ | t == "\\" -> do
+ tk' <- anyTok
+ (tk :) . (tk' :) <$> pDollarsMath n
+ | t == "{" -> (tk :) <$> pDollarsMath (n+1)
+ | t == "}" ->
+ if n > 0
+ then (tk :) <$> pDollarsMath (n-1)
+ else mzero
+ _ -> (tk :) <$> pDollarsMath n
+
+mathDisplay :: Text -> Inlines
+mathDisplay = displayMath . trimMath
+
+mathInline :: Text -> Inlines
+mathInline = math . trimMath
+
+mathEnvWith :: PandocMonad m
+ => (Inlines -> a) -> Maybe Text -> Text -> LP m a
+mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name
+ where inner x = case innerEnv of
+ Nothing -> x
+ Just y -> "\\begin{" <> y <> "}\n" <> x <>
+ "\\end{" <> y <> "}"
+
+mathEnv :: PandocMonad m => Text -> LP m Text
+mathEnv name = do
+ skipopts
+ optional blankline
+ res <- manyTill anyTok (end_ name)
+ return $ stripTrailingNewlines $ untokenize res
+
+inlineEnvironment :: PandocMonad m => LP m Inlines
+inlineEnvironment = try $ do
+ controlSeq "begin"
+ name <- untokenize <$> braced
+ M.findWithDefault mzero name inlineEnvironments
+
+inlineEnvironments :: PandocMonad m => M.Map Text (LP m Inlines)
+inlineEnvironments = M.fromList [
+ ("displaymath", mathEnvWith id Nothing "displaymath")
+ , ("math", math <$> mathEnv "math")
+ , ("equation", mathEnvWith id Nothing "equation")
+ , ("equation*", mathEnvWith id Nothing "equation*")
+ , ("gather", mathEnvWith id (Just "gathered") "gather")
+ , ("gather*", mathEnvWith id (Just "gathered") "gather*")
+ , ("multline", mathEnvWith id (Just "gathered") "multline")
+ , ("multline*", mathEnvWith id (Just "gathered") "multline*")
+ , ("eqnarray", mathEnvWith id (Just "aligned") "eqnarray")
+ , ("eqnarray*", mathEnvWith id (Just "aligned") "eqnarray*")
+ , ("align", mathEnvWith id (Just "aligned") "align")
+ , ("align*", mathEnvWith id (Just "aligned") "align*")
+ , ("alignat", mathEnvWith id (Just "aligned") "alignat")
+ , ("alignat*", mathEnvWith id (Just "aligned") "alignat*")
+ , ("dmath", mathEnvWith id Nothing "dmath")
+ , ("dmath*", mathEnvWith id Nothing "dmath*")
+ , ("dgroup", mathEnvWith id (Just "aligned") "dgroup")
+ , ("dgroup*", mathEnvWith id (Just "aligned") "dgroup*")
+ , ("darray", mathEnvWith id (Just "aligned") "darray")
+ , ("darray*", mathEnvWith id (Just "aligned") "darray*")
+ ]
+
+theoremstyle :: PandocMonad m => LP m Blocks
+theoremstyle = do
+ stylename <- untokenize <$> braced
+ let mbstyle = case stylename of
+ "plain" -> Just PlainStyle
+ "definition" -> Just DefinitionStyle
+ "remark" -> Just RemarkStyle
+ _ -> Nothing
+ case mbstyle of
+ Nothing -> return ()
+ Just sty -> updateState $ \s -> s{ sLastTheoremStyle = sty }
+ return mempty
+
+newtheorem :: PandocMonad m => LP m Inlines -> LP m Blocks
+newtheorem inline = do
+ number <- option True (False <$ symbol '*' <* sp)
+ name <- untokenize <$> braced
+ sp
+ series <- option Nothing $ Just . untokenize <$> bracketedToks
+ sp
+ showName <- tokWith inline
+ sp
+ syncTo <- option Nothing $ Just . untokenize <$> bracketedToks
+ sty <- sLastTheoremStyle <$> getState
+ let spec = TheoremSpec { theoremName = showName
+ , theoremStyle = sty
+ , theoremSeries = series
+ , theoremSyncTo = syncTo
+ , theoremNumber = number
+ , theoremLastNum = DottedNum [0] }
+ tmap <- sTheoremMap <$> getState
+ updateState $ \s -> s{ sTheoremMap =
+ M.insert name spec tmap }
+ return mempty
+
+theoremEnvironment :: PandocMonad m
+ => LP m Blocks -> LP m Inlines -> Text -> LP m Blocks
+theoremEnvironment blocks opt name = do
+ tmap <- sTheoremMap <$> getState
+ case M.lookup name tmap of
+ Nothing -> mzero
+ Just tspec -> do
+ optTitle <- option mempty $ (\x -> space <> "(" <> x <> ")") <$> opt
+ mblabel <- option Nothing $ Just . untokenize <$>
+ try (spaces >> controlSeq "label" >> spaces >> braced)
+ bs <- env name blocks
+ number <-
+ if theoremNumber tspec
+ then do
+ let name' = fromMaybe name $ theoremSeries tspec
+ num <- getNextNumber
+ (maybe (DottedNum [0]) theoremLastNum .
+ M.lookup name' . sTheoremMap)
+ updateState $ \s ->
+ s{ sTheoremMap =
+ M.adjust
+ (\spec -> spec{ theoremLastNum = num })
+ name'
+ (sTheoremMap s)
+ }
+
+ case mblabel of
+ Just ident ->
+ updateState $ \s ->
+ s{ sLabels = M.insert ident
+ (B.toList $
+ theoremName tspec <> "\160" <>
+ str (renderDottedNum num)) (sLabels s) }
+ Nothing -> return ()
+ return $ space <> B.text (renderDottedNum num)
+ else return mempty
+ let titleEmph = case theoremStyle tspec of
+ PlainStyle -> B.strong
+ DefinitionStyle -> B.strong
+ RemarkStyle -> B.emph
+ let title = titleEmph (theoremName tspec <> number)
+ <> optTitle <> "." <> space
+ return $ divWith (fromMaybe "" mblabel, [name], []) $ addTitle title
+ $ case theoremStyle tspec of
+ PlainStyle -> walk italicize bs
+ _ -> bs
+
+
+
+proof :: PandocMonad m => LP m Blocks -> LP m Inlines -> LP m Blocks
+proof blocks opt = do
+ title <- option (B.text "Proof") opt
+ bs <- env "proof" blocks
+ return $
+ B.divWith ("", ["proof"], []) $
+ addQed $ addTitle (B.emph (title <> ".")) bs
+
+addTitle :: Inlines -> Blocks -> Blocks
+addTitle ils bs =
+ case B.toList bs of
+ (Para xs : rest)
+ -> B.fromList (Para (B.toList ils ++ (Space : xs)) : rest)
+ _ -> B.para ils <> bs
+
+addQed :: Blocks -> Blocks
+addQed bs =
+ case Seq.viewr (B.unMany bs) of
+ s Seq.:> Para ils
+ -> B.Many (s Seq.|> Para (ils ++ B.toList qedSign))
+ _ -> bs <> B.para qedSign
+ where
+ qedSign = B.str "\xa0\x25FB"
+
+italicize :: Block -> Block
+italicize x@(Para [Image{}]) = x -- see #6925
+italicize (Para ils) = Para [Emph ils]
+italicize (Plain ils) = Plain [Emph ils]
+italicize x = x
+
+
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 563d32883..9dac4d6ef 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -1,11 +1,12 @@
{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{- |
Module : Text.Pandoc.Readers.LaTeX.Parsing
- Copyright : Copyright (C) 2006-2020 John MacFarlane
+ Copyright : Copyright (C) 2006-2021 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -27,11 +28,15 @@ module Text.Pandoc.Readers.LaTeX.Parsing
, rawLaTeXParser
, applyMacros
, tokenize
+ , tokenizeSources
+ , getInputTokens
, untokenize
, untoken
, totoks
, toksToString
, satisfyTok
+ , parseFromToks
+ , disablingWithRaw
, doMacros
, doMacros'
, setpos
@@ -52,6 +57,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
, comment
, anyTok
, singleChar
+ , tokWith
, specialChars
, endline
, blankline
@@ -78,6 +84,11 @@ module Text.Pandoc.Readers.LaTeX.Parsing
, rawopt
, overlaySpecification
, getNextNumber
+ , label
+ , setCaption
+ , resetCaption
+ , env
+ , addMeta
) where
import Control.Applicative (many, (<|>))
@@ -87,13 +98,15 @@ import Control.Monad.Trans (lift)
import Data.Char (chr, isAlphaNum, isDigit, isLetter, ord)
import Data.Default
import Data.List (intercalate)
+import qualified Data.IntMap as IntMap
import qualified Data.Map as M
import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as T
import Text.Pandoc.Builder
import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
-import Text.Pandoc.Error (PandocError (PandocMacroLoop))
+import Text.Pandoc.Error
+ (PandocError (PandocMacroLoop,PandocShouldNeverHappenError))
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
@@ -102,7 +115,6 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
ArgSpec (..), Tok (..), TokType (..))
import Text.Pandoc.Shared
import Text.Parsec.Pos
--- import Debug.Trace
newtype DottedNum = DottedNum [Int]
deriving (Show, Eq)
@@ -151,7 +163,9 @@ data LaTeXState = LaTeXState{ sOptions :: ReaderOptions
, sLabels :: M.Map Text [Inline]
, sHasChapters :: Bool
, sToggles :: M.Map Text Bool
- , sExpanded :: Bool
+ , sFileContents :: M.Map Text Text
+ , sEnableWithRaw :: Bool
+ , sRawTokens :: IntMap.IntMap [Tok]
}
deriving Show
@@ -176,7 +190,9 @@ defaultLaTeXState = LaTeXState{ sOptions = def
, sLabels = M.empty
, sHasChapters = False
, sToggles = M.empty
- , sExpanded = False
+ , sFileContents = M.empty
+ , sEnableWithRaw = True
+ , sRawTokens = IntMap.empty
}
instance PandocMonad m => HasQuoteContext LaTeXState m where
@@ -232,21 +248,25 @@ withVerbatimMode parser = do
updateState $ \st -> st{ sVerbatimMode = False }
return result
-rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
+rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s, Show a)
=> [Tok] -> Bool -> LP m a -> LP m a
- -> ParserT Text s m (a, Text)
+ -> ParserT Sources s m (a, Text)
rawLaTeXParser toks retokenize parser valParser = do
pstate <- getState
let lstate = def{ sOptions = extractReaderOptions pstate }
let lstate' = lstate { sMacros = extractMacros pstate }
+ let setStartPos = case toks of
+ Tok pos _ _ : _ -> setPosition pos
+ _ -> return ()
+ let preparser = setStartPos >> parser
let rawparser = (,) <$> withRaw valParser <*> getState
- res' <- lift $ runParserT (snd <$> withRaw parser) lstate "chunk" toks
+ res' <- lift $ runParserT (withRaw (preparser >> getPosition))
+ lstate "chunk" toks
case res' of
Left _ -> mzero
- Right toks' -> do
+ Right (endpos, toks') -> do
res <- lift $ runParserT (do when retokenize $ do
-- retokenize, applying macros
- doMacros
ts <- many (satisfyTok (const True))
setInput ts
rawparser)
@@ -255,7 +275,13 @@ rawLaTeXParser toks retokenize parser valParser = do
Left _ -> mzero
Right ((val, raw), st) -> do
updateState (updateMacros (sMacros st <>))
- _ <- takeP (T.length (untokenize toks'))
+ let skipTilPos stopPos = do
+ anyChar
+ pos <- getPosition
+ if pos >= stopPos
+ then return ()
+ else skipTilPos stopPos
+ skipTilPos endpos
let result = untokenize raw
-- ensure we end with space if input did, see #4442
let result' =
@@ -268,7 +294,7 @@ rawLaTeXParser toks retokenize parser valParser = do
return (val, result')
applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
- => Text -> ParserT Text s m Text
+ => Text -> ParserT Sources s m Text
applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
do let retokenize = untokenize <$> many (satisfyTok (const True))
pstate <- getState
@@ -279,6 +305,31 @@ applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
Left e -> Prelude.fail (show e)
Right s' -> return s'
+{-
+When tokenize or untokenize change, test with this
+QuickCheck property:
+
+> tokUntokRoundtrip :: String -> Bool
+> tokUntokRoundtrip s =
+> let t = T.pack s in untokenize (tokenize "random" t) == t
+-}
+
+tokenizeSources :: Sources -> [Tok]
+tokenizeSources = concatMap tokenizeSource . unSources
+ where
+ tokenizeSource (pos, t) = totoks pos t
+
+-- Return tokens from input sources. Ensure that starting position is
+-- correct.
+getInputTokens :: PandocMonad m => ParserT Sources s m [Tok]
+getInputTokens = do
+ pos <- getPosition
+ ss <- getInput
+ return $
+ case ss of
+ Sources [] -> []
+ Sources ((_,t):rest) -> tokenizeSources $ Sources ((pos,t):rest)
+
tokenize :: SourceName -> Text -> [Tok]
tokenize sourcename = totoks (initialPos sourcename)
@@ -402,41 +453,62 @@ untoken t = untokenAccum t mempty
toksToString :: [Tok] -> String
toksToString = T.unpack . untokenize
+parseFromToks :: PandocMonad m => LP m a -> [Tok] -> LP m a
+parseFromToks parser toks = do
+ oldInput <- getInput
+ setInput toks
+ oldpos <- getPosition
+ case toks of
+ Tok pos _ _ : _ -> setPosition pos
+ _ -> return ()
+ result <- disablingWithRaw parser
+ setInput oldInput
+ setPosition oldpos
+ return result
+
+disablingWithRaw :: PandocMonad m => LP m a -> LP m a
+disablingWithRaw parser = do
+ oldEnableWithRaw <- sEnableWithRaw <$> getState
+ updateState $ \st -> st{ sEnableWithRaw = False }
+ result <- parser
+ updateState $ \st -> st{ sEnableWithRaw = oldEnableWithRaw }
+ return result
+
satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok
satisfyTok f = do
doMacros -- apply macros on remaining input stream
res <- tokenPrim (T.unpack . untoken) updatePos matcher
- updateState $ \st -> st{ sExpanded = False }
- return res
+ updateState $ \st ->
+ if sEnableWithRaw st
+ then st{ sRawTokens = IntMap.map (res:) $ sRawTokens st }
+ else st
+ return $! res
where matcher t | f t = Just t
| otherwise = Nothing
updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos
updatePos _spos _ (Tok pos _ _ : _) = pos
- updatePos spos _ [] = incSourceColumn spos 1
+ updatePos spos (Tok _ _ t) [] = incSourceColumn spos (T.length t)
doMacros :: PandocMonad m => LP m ()
doMacros = do
- expanded <- sExpanded <$> getState
- verbatimMode <- sVerbatimMode <$> getState
- unless (expanded || verbatimMode) $ do
- getInput >>= doMacros' 1 >>= setInput
- updateState $ \st -> st{ sExpanded = True }
+ st <- getState
+ unless (sVerbatimMode st) $
+ getInput >>= doMacros' 1 >>= setInput
doMacros' :: PandocMonad m => Int -> [Tok] -> LP m [Tok]
doMacros' n inp =
case inp of
Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
Tok _ Word name : Tok _ Symbol "}" : ts
- -> handleMacros n spos name ts
+ -> handleMacros n spos name ts <|> return inp
Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
Tok _ Word name : Tok _ Symbol "}" : ts
- -> handleMacros n spos ("end" <> name) ts
+ -> handleMacros n spos ("end" <> name) ts <|> return inp
Tok _ (CtrlSeq "expandafter") _ : t : ts
-> combineTok t <$> doMacros' n ts
Tok spos (CtrlSeq name) _ : ts
- -> handleMacros n spos name ts
+ -> handleMacros n spos name ts <|> return inp
_ -> return inp
- <|> return inp
where
combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts)
@@ -482,7 +554,7 @@ doMacros' n inp =
$ throwError $ PandocMacroLoop name
macros <- sMacros <$> getState
case M.lookup name macros of
- Nothing -> mzero
+ Nothing -> trySpecialMacro name ts
Just (Macro expansionPoint argspecs optarg newtoks) -> do
let getargs' = do
args <-
@@ -510,6 +582,41 @@ doMacros' n inp =
ExpandWhenUsed -> doMacros' (n' + 1) result
ExpandWhenDefined -> return result
+-- | Certain macros do low-level tex manipulations that can't
+-- be represented in our Macro type, so we handle them here.
+trySpecialMacro :: PandocMonad m => Text -> [Tok] -> LP m [Tok]
+trySpecialMacro "xspace" ts = do
+ ts' <- doMacros' 1 ts
+ case ts' of
+ Tok pos Word t : _
+ | startsWithAlphaNum t -> return $ Tok pos Spaces " " : ts'
+ _ -> return ts'
+trySpecialMacro "iftrue" ts = handleIf True ts
+trySpecialMacro "iffalse" ts = handleIf False ts
+trySpecialMacro _ _ = mzero
+
+handleIf :: PandocMonad m => Bool -> [Tok] -> LP m [Tok]
+handleIf b ts = do
+ res' <- lift $ runParserT (ifParser b) defaultLaTeXState "tokens" ts
+ case res' of
+ Left _ -> Prelude.fail "Could not parse conditional"
+ Right ts' -> return ts'
+
+ifParser :: PandocMonad m => Bool -> LP m [Tok]
+ifParser b = do
+ ifToks <- many (notFollowedBy (controlSeq "else" <|> controlSeq "fi")
+ *> anyTok)
+ elseToks <- (controlSeq "else" >> manyTill anyTok (controlSeq "fi"))
+ <|> ([] <$ controlSeq "fi")
+ rest <- getInput
+ return $ (if b then ifToks else elseToks) ++ rest
+
+startsWithAlphaNum :: Text -> Bool
+startsWithAlphaNum t =
+ case T.uncons t of
+ Just (c, _) | isAlphaNum c -> True
+ _ -> False
+
setpos :: SourcePos -> Tok -> Tok
setpos spos (Tok _ tt txt) = Tok spos tt txt
@@ -592,18 +699,22 @@ isCommentTok _ = False
anyTok :: PandocMonad m => LP m Tok
anyTok = satisfyTok (const True)
+singleCharTok :: PandocMonad m => LP m Tok
+singleCharTok =
+ satisfyTok $ \case
+ Tok _ Word t -> T.length t == 1
+ Tok _ Symbol t -> not (T.any (`Set.member` specialChars) t)
+ _ -> False
+
singleChar :: PandocMonad m => LP m Tok
-singleChar = try $ do
- Tok pos toktype t <- satisfyTok (tokTypeIn [Word, Symbol])
- guard $ not $ toktype == Symbol &&
- T.any (`Set.member` specialChars) t
- if T.length t > 1
- then do
- let (t1, t2) = (T.take 1 t, T.drop 1 t)
- inp <- getInput
- setInput $ Tok (incSourceColumn pos 1) toktype t2 : inp
- return $ Tok pos toktype t1
- else return $ Tok pos toktype t
+singleChar = singleCharTok <|> singleCharFromWord
+ where
+ singleCharFromWord = do
+ Tok pos toktype t <- disablingWithRaw $ satisfyTok isWordTok
+ let (t1, t2) = (T.take 1 t, T.drop 1 t)
+ inp <- getInput
+ setInput $ Tok pos toktype t1 : Tok (incSourceColumn pos 1) toktype t2 : inp
+ anyTok
specialChars :: Set.Set Char
specialChars = Set.fromList "#$%&~_^\\{}"
@@ -646,28 +757,25 @@ grouped parser = try $ do
-- {{a,b}} should be parsed the same as {a,b}
try (grouped parser <* egroup) <|> (mconcat <$> manyTill parser egroup)
-braced' :: PandocMonad m => LP m Tok -> Int -> LP m [Tok]
-braced' getTok n =
- handleEgroup <|> handleBgroup <|> handleOther
- where handleEgroup = do
- t <- symbol '}'
- if n == 1
- then return []
- else (t:) <$> braced' getTok (n - 1)
- handleBgroup = do
- t <- symbol '{'
- (t:) <$> braced' getTok (n + 1)
- handleOther = do
- t <- getTok
- (t:) <$> braced' getTok n
+braced' :: PandocMonad m => LP m Tok -> LP m [Tok]
+braced' getTok = symbol '{' *> go (1 :: Int)
+ where
+ go n = do
+ t <- getTok
+ case t of
+ Tok _ Symbol "}"
+ | n > 1 -> (t:) <$> go (n - 1)
+ | otherwise -> return []
+ Tok _ Symbol "{" -> (t:) <$> go (n + 1)
+ _ -> (t:) <$> go n
braced :: PandocMonad m => LP m [Tok]
-braced = symbol '{' *> braced' anyTok 1
+braced = braced' anyTok
-- URLs require special handling, because they can contain %
-- characters. So we retonenize comments as we go...
bracedUrl :: PandocMonad m => LP m [Tok]
-bracedUrl = bgroup *> braced' (retokenizeComment >> anyTok) 1
+bracedUrl = braced' (retokenizeComment >> anyTok)
-- For handling URLs, which allow literal % characters...
retokenizeComment :: PandocMonad m => LP m ()
@@ -723,16 +831,29 @@ ignore raw = do
withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok])
withRaw parser = do
- inp <- getInput
+ rawTokensMap <- sRawTokens <$> getState
+ let key = case IntMap.lookupMax rawTokensMap of
+ Nothing -> 0
+ Just (n,_) -> n + 1
+ -- insert empty list at key
+ updateState $ \st -> st{ sRawTokens =
+ IntMap.insert key [] $ sRawTokens st }
result <- parser
- nxtpos <- option Nothing ((\(Tok pos' _ _) -> Just pos') <$> lookAhead anyTok)
- let raw = takeWhile (\(Tok pos _ _) -> maybe True
- (\p -> sourceName p /= sourceName pos || pos < p) nxtpos) inp
+ mbRevToks <- IntMap.lookup key . sRawTokens <$> getState
+ raw <- case mbRevToks of
+ Just revtoks -> do
+ updateState $ \st -> st{ sRawTokens =
+ IntMap.delete key $ sRawTokens st}
+ return $ reverse revtoks
+ Nothing ->
+ throwError $ PandocShouldNeverHappenError $
+ "sRawTokens has nothing at key " <> T.pack (show key)
return (result, raw)
keyval :: PandocMonad m => LP m (Text, Text)
keyval = try $ do
- Tok _ Word key <- satisfyTok isWordTok
+ key <- untokenize <$> many1 (notFollowedBy (symbol '=') >>
+ (symbol '-' <|> symbol '_' <|> satisfyTok isWordTok))
sp
val <- option mempty $ do
symbol '='
@@ -792,7 +913,7 @@ getRawCommand name txt = do
(_, rawargs) <- withRaw $
case name of
"write" -> do
- void $ satisfyTok isWordTok -- digits
+ void $ many $ satisfyTok isDigitTok -- digits
void braced
"titleformat" -> do
void braced
@@ -807,6 +928,10 @@ getRawCommand name txt = do
void $ many braced
return $ txt <> untokenize rawargs
+isDigitTok :: Tok -> Bool
+isDigitTok (Tok _ Word t) = T.all isDigit t
+isDigitTok _ = False
+
skipopts :: PandocMonad m => LP m ()
skipopts = skipMany (void overlaySpecification <|> void rawopt)
@@ -874,3 +999,35 @@ getNextNumber getCurrentNum = do
Just n -> [n, 1]
Nothing -> [1]
+label :: PandocMonad m => LP m ()
+label = do
+ controlSeq "label"
+ t <- braced
+ updateState $ \st -> st{ sLastLabel = Just $ untokenize t }
+
+setCaption :: PandocMonad m => LP m Inlines -> LP m ()
+setCaption inline = try $ do
+ skipopts
+ ils <- tokWith inline
+ optional $ try $ spaces *> label
+ updateState $ \st -> st{ sCaption = Just ils }
+
+resetCaption :: PandocMonad m => LP m ()
+resetCaption = updateState $ \st -> st{ sCaption = Nothing
+ , sLastLabel = Nothing }
+
+env :: PandocMonad m => Text -> LP m a -> LP m a
+env name p = p <* end_ name
+
+tokWith :: PandocMonad m => LP m Inlines -> LP m Inlines
+tokWith inlineParser = try $ spaces >>
+ grouped inlineParser
+ <|> (lookAhead anyControlSeq >> inlineParser)
+ <|> singleChar'
+ where singleChar' = do
+ Tok _ _ t <- singleChar
+ return $ str t
+
+addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> LP m ()
+addMeta field val = updateState $ \st ->
+ st{ sMeta = addMetaField field val $ sMeta st }
diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
index db9c276e7..b8bf0ce7f 100644
--- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
@@ -1,12 +1,7 @@
+{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE OverloadedStrings #-}
module Text.Pandoc.Readers.LaTeX.SIunitx
- ( dosi
- , doSI
- , doSIrange
- , doSInum
- , doSInumlist
- , doSIang
- )
+ ( siunitxCommands )
where
import Text.Pandoc.Builder
import Text.Pandoc.Readers.LaTeX.Parsing
@@ -15,14 +10,32 @@ import Text.Pandoc.Class
import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline,
optional, space, spaces, withRaw, (<|>))
import Control.Applicative ((<|>))
+import Control.Monad (void)
import qualified Data.Map as M
import Data.Char (isDigit)
import Data.Text (Text)
import qualified Data.Text as T
import Data.List (intersperse)
+import qualified Data.Sequence as Seq
+import Text.Pandoc.Walk (walk)
+
+siunitxCommands :: PandocMonad m
+ => LP m Inlines -> M.Map Text (LP m Inlines)
+siunitxCommands tok = M.fromList
+ [ ("si", dosi tok)
+ , ("SI", doSI tok)
+ , ("SIrange", doSIrange True tok)
+ , ("numrange", doSIrange False tok)
+ , ("numlist", doSInumlist)
+ , ("SIlist", doSIlist tok)
+ , ("num", doSInum)
+ , ("ang", doSIang)
+ ]
dosi :: PandocMonad m => LP m Inlines -> LP m Inlines
-dosi tok = grouped (siUnit tok) <|> siUnit tok
+dosi tok = do
+ options <- option [] keyvals
+ grouped (siUnit options tok) <|> siUnit options tok
-- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €"
doSI :: PandocMonad m => LP m Inlines -> LP m Inlines
@@ -57,23 +70,50 @@ doSInumlist = do
mconcat (intersperse (str "," <> space) (init xs)) <>
text ", & " <> last xs
+doSIlist :: PandocMonad m => LP m Inlines -> LP m Inlines
+doSIlist tok = do
+ options <- option [] keyvals
+ nums <- map tonum . T.splitOn ";" . untokenize <$> braced
+ unit <- grouped (siUnit options tok) <|> siUnit options tok
+ let xs = map (<> (str "\xa0" <> unit)) nums
+ case xs of
+ [] -> return mempty
+ [x] -> return x
+ _ -> return $
+ mconcat (intersperse (str "," <> space) (init xs)) <>
+ text ", & " <> last xs
+
parseNum :: Parser Text () Inlines
parseNum = (mconcat <$> many parseNumPart) <* eof
+minus :: Text
+minus = "\x2212"
+
+hyphenToMinus :: Inline -> Inline
+hyphenToMinus (Str t) = Str (T.replace "-" minus t)
+hyphenToMinus x = x
+
parseNumPart :: Parser Text () Inlines
parseNumPart =
parseDecimalNum <|>
parseComma <|>
parsePlusMinus <|>
+ parsePM <|>
parseI <|>
parseExp <|>
parseX <|>
parseSpace
where
- parseDecimalNum = do
- pref <- option mempty $ (mempty <$ char '+') <|> ("\x2212" <$ char '-')
- basenum <- (pref <>) . T.pack
- <$> many1 (satisfy (\c -> isDigit c || c == '.'))
+ parseDecimalNum, parsePlusMinus, parsePM,
+ parseComma, parseI, parseX,
+ parseExp, parseSpace :: Parser Text () Inlines
+ parseDecimalNum = try $ do
+ pref <- option mempty $ (mempty <$ char '+') <|> (minus <$ char '-')
+ basenum' <- many1 (satisfy (\c -> isDigit c || c == '.'))
+ let basenum = pref <> T.pack
+ (case basenum' of
+ '.':_ -> '0':basenum'
+ _ -> basenum')
uncertainty <- option mempty $ T.pack <$> parseParens
if T.null uncertainty
then return $ str basenum
@@ -91,6 +131,7 @@ parseNumPart =
| otherwise -> "." <> t
parseComma = str "." <$ char ','
parsePlusMinus = str "\xa0\xb1\xa0" <$ try (string "+-")
+ parsePM = str "\xa0\xb1\xa0" <$ try (string "\\pm")
parseParens =
char '(' *> many1 (satisfy (\c -> isDigit c || c == '.')) <* char ')'
parseI = str "i" <$ char 'i'
@@ -103,11 +144,14 @@ doSIang :: PandocMonad m => LP m Inlines
doSIang = do
skipopts
ps <- T.splitOn ";" . untokenize <$> braced
+ let dropPlus t = case T.uncons t of
+ Just ('+',t') -> t'
+ _ -> t
case ps ++ repeat "" of
(d:m:s:_) -> return $
- (if T.null d then mempty else str d <> str "\xb0") <>
- (if T.null m then mempty else str m <> str "\x2032") <>
- (if T.null s then mempty else str s <> str "\x2033")
+ (if T.null d then mempty else str (dropPlus d) <> str "\xb0") <>
+ (if T.null m then mempty else str (dropPlus m) <> str "\x2032") <>
+ (if T.null s then mempty else str (dropPlus s) <> str "\x2033")
_ -> return mempty
-- converts e.g. \SIrange{100}{200}{\ms} to "100 ms--200 ms"
@@ -136,40 +180,99 @@ doSIrange includeUnits tok = do
emptyOr160 :: Inlines -> Inlines
emptyOr160 x = if x == mempty then x else str "\160"
-siUnit :: PandocMonad m => LP m Inlines -> LP m Inlines
-siUnit tok = try (do
- Tok _ (CtrlSeq name) _ <- anyControlSeq
- case name of
- "square" -> do
- unit <- siUnit tok
- return $ unit <> superscript "2"
- "cubic" -> do
- unit <- siUnit tok
- return $ unit <> superscript "3"
- "raisetothe" -> do
- n <- tok
- unit <- siUnit tok
- return $ unit <> superscript n
- _ ->
- case M.lookup name siUnitMap of
- Just il ->
- option il $
- choice
- [ (il <> superscript "2") <$ controlSeq "squared"
- , (il <> superscript "3") <$ controlSeq "cubed"
- , (\n -> il <> superscript n) <$> (controlSeq "tothe" *> tok)
- ]
- Nothing -> fail "not an siunit unit command")
- <|> (lookAhead anyControlSeq >> tok)
- <|> (do Tok _ Word t <- satisfyTok isWordTok
- return $ str t)
- <|> (symbol '^' *> (superscript <$> tok))
- <|> (symbol '_' *> (subscript <$> tok))
- <|> ("\xa0" <$ symbol '.')
- <|> ("\xa0" <$ symbol '~')
- <|> tok
- <|> (do Tok _ _ t <- anyTok
- return (str t))
+siUnit :: forall m. PandocMonad m => [(Text,Text)] -> LP m Inlines -> LP m Inlines
+siUnit options tok = mconcat . intersperse (str "\xa0") <$> many1 siUnitPart
+ where
+ siUnitPart :: LP m Inlines
+ siUnitPart = try $ do
+ skipMany (void (symbol '.') <|> void (symbol '~') <|> spaces1)
+ x <- ((siPrefix <*> siBase)
+ <|> (do u <- siBase <|> tok
+ option u $ siSuffix <*> pure u))
+ option x (siInfix x)
+ siInfix :: Inlines -> LP m Inlines
+ siInfix u1 = try $
+ (do _ <- controlSeq "per"
+ u2 <- siUnitPart
+ let useSlash = lookup "per-mode" options == Just "symbol"
+ if useSlash
+ then return (u1 <> str "/" <> u2)
+ else return (u1 <> str "\xa0" <> negateExponent u2))
+ <|> (do _ <- symbol '/'
+ u2 <- siUnitPart
+ return (u1 <> str "/" <> u2))
+ siPrefix :: LP m (Inlines -> Inlines)
+ siPrefix =
+ (do _ <- controlSeq "square"
+ skipopts
+ return (<> superscript "2"))
+ <|> (do _ <- controlSeq "cubic"
+ skipopts
+ return (<> superscript "3"))
+ <|> (do _ <- controlSeq "raisetothe"
+ skipopts
+ n <- walk hyphenToMinus <$> tok
+ return (<> superscript n))
+ siSuffix :: LP m (Inlines -> Inlines)
+ siSuffix =
+ (do _ <- controlSeq "squared"
+ skipopts
+ return (<> superscript "2"))
+ <|> (do _ <- controlSeq "cubed"
+ skipopts
+ return (<> superscript "3"))
+ <|> (do _ <- controlSeq "tothe"
+ skipopts
+ n <- walk hyphenToMinus <$> tok
+ return (<> superscript n))
+ <|> (symbol '^' *> (do n <- walk hyphenToMinus <$> tok
+ return (<> superscript n)))
+ <|> (symbol '_' *> (do n <- walk hyphenToMinus <$> tok
+ return (<> subscript n)))
+ negateExponent :: Inlines -> Inlines
+ negateExponent ils =
+ case Seq.viewr (unMany ils) of
+ xs Seq.:> Superscript ss -> (Many xs) <>
+ superscript (str minus <> fromList ss)
+ _ -> ils <> superscript (str (minus <> "1"))
+ siBase :: LP m Inlines
+ siBase =
+ ((try
+ (do Tok _ (CtrlSeq name) _ <- anyControlSeq
+ case M.lookup name siUnitModifierMap of
+ Just il -> (il <>) <$> siBase
+ Nothing ->
+ case M.lookup name siUnitMap of
+ Just il -> pure il
+ Nothing -> fail "not a unit command"))
+ <|> (do Tok _ Word t <- satisfyTok isWordTok
+ return $ str t)
+ )
+
+siUnitModifierMap :: M.Map Text Inlines
+siUnitModifierMap = M.fromList
+ [ ("atto", str "a")
+ , ("centi", str "c")
+ , ("deca", str "d")
+ , ("deci", str "d")
+ , ("deka", str "d")
+ , ("exa", str "E")
+ , ("femto", str "f")
+ , ("giga", str "G")
+ , ("hecto", str "h")
+ , ("kilo", str "k")
+ , ("mega", str "M")
+ , ("micro", str "μ")
+ , ("milli", str "m")
+ , ("nano", str "n")
+ , ("peta", str "P")
+ , ("pico", str "p")
+ , ("tera", str "T")
+ , ("yocto", str "y")
+ , ("yotta", str "Y")
+ , ("zepto", str "z")
+ , ("zetta", str "Z")
+ ]
siUnitMap :: M.Map Text Inlines
siUnitMap = M.fromList
@@ -269,7 +372,6 @@ siUnitMap = M.fromList
, ("arcsecond", str "″")
, ("astronomicalunit", str "ua")
, ("atomicmassunit", str "u")
- , ("atto", str "a")
, ("bar", str "bar")
, ("barn", str "b")
, ("becquerel", str "Bq")
@@ -277,51 +379,38 @@ siUnitMap = M.fromList
, ("bohr", emph (str "a") <> subscript (str "0"))
, ("candela", str "cd")
, ("celsius", str "°C")
- , ("centi", str "c")
, ("clight", emph (str "c") <> subscript (str "0"))
, ("coulomb", str "C")
, ("dalton", str "Da")
, ("day", str "d")
- , ("deca", str "d")
- , ("deci", str "d")
, ("decibel", str "db")
, ("degreeCelsius",str "°C")
, ("degree", str "°")
- , ("deka", str "d")
, ("electronmass", emph (str "m") <> subscript (str "e"))
, ("electronvolt", str "eV")
, ("elementarycharge", emph (str "e"))
- , ("exa", str "E")
, ("farad", str "F")
- , ("femto", str "f")
- , ("giga", str "G")
, ("gram", str "g")
, ("gray", str "Gy")
, ("hartree", emph (str "E") <> subscript (str "h"))
, ("hectare", str "ha")
- , ("hecto", str "h")
, ("henry", str "H")
, ("hertz", str "Hz")
, ("hour", str "h")
, ("joule", str "J")
, ("katal", str "kat")
, ("kelvin", str "K")
- , ("kilo", str "k")
, ("kilogram", str "kg")
, ("knot", str "kn")
, ("liter", str "L")
, ("litre", str "l")
, ("lumen", str "lm")
, ("lux", str "lx")
- , ("mega", str "M")
, ("meter", str "m")
, ("metre", str "m")
- , ("micro", str "μ")
- , ("milli", str "m")
, ("minute", str "min")
, ("mmHg", str "mmHg")
, ("mole", str "mol")
- , ("nano", str "n")
, ("nauticalmile", str "M")
, ("neper", str "Np")
, ("newton", str "N")
@@ -329,25 +418,17 @@ siUnitMap = M.fromList
, ("Pa", str "Pa")
, ("pascal", str "Pa")
, ("percent", str "%")
- , ("per", str "/")
- , ("peta", str "P")
- , ("pico", str "p")
, ("planckbar", emph (str "\x210f"))
, ("radian", str "rad")
, ("second", str "s")
, ("siemens", str "S")
, ("sievert", str "Sv")
, ("steradian", str "sr")
- , ("tera", str "T")
, ("tesla", str "T")
, ("tonne", str "t")
, ("volt", str "V")
, ("watt", str "W")
, ("weber", str "Wb")
- , ("yocto", str "y")
- , ("yotta", str "Y")
- , ("zepto", str "z")
- , ("zetta", str "Z")
]
diff --git a/src/Text/Pandoc/Readers/LaTeX/Table.hs b/src/Text/Pandoc/Readers/LaTeX/Table.hs
new file mode 100644
index 000000000..f56728fe1
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Table.hs
@@ -0,0 +1,379 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Table
+ ( tableEnvironments )
+where
+
+import Data.Functor (($>))
+import Text.Pandoc.Class
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Readers.LaTeX.Types
+import Text.Pandoc.Builder as B
+import qualified Data.Map as M
+import Data.Text (Text)
+import Data.Maybe (fromMaybe)
+import qualified Data.Text as T
+import Control.Applicative ((<|>), optional, many)
+import Control.Monad (when, void)
+import Text.Pandoc.Shared (safeRead, trim)
+import Text.Pandoc.Logging (LogMessage(SkippedContent))
+import Text.Pandoc.Walk (walkM)
+import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
+ optional, space, spaces, withRaw, (<|>))
+
+tableEnvironments :: PandocMonad m
+ => LP m Blocks
+ -> LP m Inlines
+ -> M.Map Text (LP m Blocks)
+tableEnvironments blocks inline =
+ M.fromList
+ [ ("longtable", env "longtable" $
+ resetCaption *>
+ simpTable blocks inline "longtable" False >>= addTableCaption)
+ , ("table", env "table" $
+ skipopts *> resetCaption *> blocks >>= addTableCaption)
+ , ("tabular*", env "tabular*" $ simpTable blocks inline "tabular*" True)
+ , ("tabularx", env "tabularx" $ simpTable blocks inline "tabularx" True)
+ , ("tabular", env "tabular" $ simpTable blocks inline "tabular" False)
+ ]
+
+hline :: PandocMonad m => LP m ()
+hline = try $ do
+ spaces
+ controlSeq "hline" <|>
+ (controlSeq "cline" <* braced) <|>
+ -- booktabs rules:
+ controlSeq "toprule" <|>
+ controlSeq "bottomrule" <|>
+ controlSeq "midrule" <|>
+ controlSeq "endhead" <|>
+ controlSeq "endfirsthead"
+ spaces
+ optional rawopt
+ return ()
+
+lbreak :: PandocMonad m => LP m Tok
+lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline")
+ <* skipopts <* spaces
+
+amp :: PandocMonad m => LP m Tok
+amp = symbol '&'
+
+-- Split a Word into individual Symbols (for parseAligns)
+splitWordTok :: PandocMonad m => LP m ()
+splitWordTok = do
+ inp <- getInput
+ case inp of
+ (Tok spos Word t : rest) ->
+ setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest
+ _ -> return ()
+
+parseAligns :: PandocMonad m => LP m [(Alignment, ColWidth, ([Tok], [Tok]))]
+parseAligns = try $ do
+ let maybeBar = skipMany
+ (try $ sp *> (() <$ symbol '|' <|> () <$ (symbol '@' >> braced)))
+ let cAlign = AlignCenter <$ symbol 'c'
+ let lAlign = AlignLeft <$ symbol 'l'
+ let rAlign = AlignRight <$ symbol 'r'
+ let parAlign = AlignLeft <$ symbol 'p'
+ -- aligns from tabularx
+ let xAlign = AlignLeft <$ symbol 'X'
+ let mAlign = AlignLeft <$ symbol 'm'
+ let bAlign = AlignLeft <$ symbol 'b'
+ let alignChar = splitWordTok *> ( cAlign <|> lAlign <|> rAlign <|> parAlign
+ <|> xAlign <|> mAlign <|> bAlign )
+ let alignPrefix = symbol '>' >> braced
+ let alignSuffix = symbol '<' >> braced
+ let colWidth = try $ do
+ symbol '{'
+ ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth")
+ spaces
+ symbol '}'
+ return $ safeRead ds
+ let alignSpec = do
+ pref <- option [] alignPrefix
+ spaces
+ al <- alignChar
+ width <- colWidth <|> option Nothing (do s <- untokenize <$> braced
+ pos <- getPosition
+ report $ SkippedContent s pos
+ return Nothing)
+ spaces
+ suff <- option [] alignSuffix
+ return (al, width, (pref, suff))
+ let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro
+ symbol '*'
+ spaces
+ ds <- trim . untokenize <$> braced
+ spaces
+ spec <- braced
+ case safeRead ds of
+ Just n ->
+ getInput >>= setInput . (mconcat (replicate n spec) ++)
+ Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number"
+ bgroup
+ spaces
+ maybeBar
+ aligns' <- many $ try $ spaces >> optional starAlign >>
+ (alignSpec <* maybeBar)
+ spaces
+ egroup
+ spaces
+ return $ map toSpec aligns'
+ where
+ toColWidth (Just w) | w > 0 = ColWidth w
+ toColWidth _ = ColWidthDefault
+ toSpec (x, y, z) = (x, toColWidth y, z)
+
+-- N.B. this parser returns a Row that may have erroneous empty cells
+-- in it. See the note above fixTableHead for details.
+parseTableRow :: PandocMonad m
+ => LP m Blocks -- ^ block parser
+ -> LP m Inlines -- ^ inline parser
+ -> Text -- ^ table environment name
+ -> [([Tok], [Tok])] -- ^ pref/suffixes
+ -> LP m Row
+parseTableRow blocks inline envname prefsufs = do
+ notFollowedBy (spaces *> end_ envname)
+ -- contexts that can contain & that is not colsep:
+ let canContainAmp (Tok _ (CtrlSeq "begin") _) = True
+ canContainAmp (Tok _ (CtrlSeq "verb") _) = True
+ canContainAmp (Tok _ (CtrlSeq "Verb") _) = True
+ canContainAmp _ = False
+ -- add prefixes and suffixes in token stream:
+ let celltoks (pref, suff) = do
+ prefpos <- getPosition
+ contents <- mconcat <$>
+ many ( snd <$> withRaw
+ ((lookAhead (controlSeq "parbox") >>
+ void blocks) -- #5711
+ <|>
+ (lookAhead (satisfyTok canContainAmp) >> void inline)
+ <|>
+ (lookAhead (symbol '$') >> void inline))
+ <|>
+ (do notFollowedBy
+ (() <$ amp <|> () <$ lbreak <|> end_ envname)
+ count 1 anyTok) )
+
+ suffpos <- getPosition
+ option [] (count 1 amp)
+ return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff
+ rawcells <- mapM celltoks prefsufs
+ cells <- mapM (parseFromToks (parseTableCell blocks)) rawcells
+ spaces
+ return $ Row nullAttr cells
+
+parseTableCell :: PandocMonad m => LP m Blocks -> LP m Cell
+parseTableCell blocks = do
+ spaces
+ updateState $ \st -> st{ sInTableCell = True }
+ cell' <- multicolumnCell blocks
+ <|> multirowCell blocks
+ <|> parseSimpleCell
+ <|> parseEmptyCell
+ updateState $ \st -> st{ sInTableCell = False }
+ spaces
+ return cell'
+ where
+ -- The parsing of empty cells is important in LaTeX, especially when dealing
+ -- with multirow/multicolumn. See #6603.
+ parseEmptyCell = spaces $> emptyCell
+ parseSimpleCell = simpleCell <$> (plainify <$> blocks)
+
+
+cellAlignment :: PandocMonad m => LP m Alignment
+cellAlignment = skipMany (symbol '|') *> alignment <* skipMany (symbol '|')
+ where
+ alignment = do
+ c <- untoken <$> singleChar
+ return $ case c of
+ "l" -> AlignLeft
+ "r" -> AlignRight
+ "c" -> AlignCenter
+ "*" -> AlignDefault
+ _ -> AlignDefault
+
+plainify :: Blocks -> Blocks
+plainify bs = case toList bs of
+ [Para ils] -> plain (fromList ils)
+ _ -> bs
+
+multirowCell :: PandocMonad m => LP m Blocks -> LP m Cell
+multirowCell blocks = controlSeq "multirow" >> do
+ -- Full prototype for \multirow macro is:
+ -- \multirow[vpos]{nrows}[bigstruts]{width}[vmove]{text}
+ -- However, everything except `nrows` and `text` make
+ -- sense in the context of the Pandoc AST
+ _ <- optional $ symbol '[' *> cellAlignment <* symbol ']' -- vertical position
+ nrows <- fmap (fromMaybe 1 . safeRead . untokenize) braced
+ _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']') -- bigstrut-related
+ _ <- symbol '{' *> manyTill anyTok (symbol '}') -- Cell width
+ _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']') -- Length used for fine-tuning
+ content <- symbol '{' *> (plainify <$> blocks) <* symbol '}'
+ return $ cell AlignDefault (RowSpan nrows) (ColSpan 1) content
+
+multicolumnCell :: PandocMonad m => LP m Blocks -> LP m Cell
+multicolumnCell blocks = controlSeq "multicolumn" >> do
+ span' <- fmap (fromMaybe 1 . safeRead . untokenize) braced
+ alignment <- symbol '{' *> cellAlignment <* symbol '}'
+
+ let singleCell = do
+ content <- plainify <$> blocks
+ return $ cell alignment (RowSpan 1) (ColSpan span') content
+
+ -- Two possible contents: either a \multirow cell, or content.
+ -- E.g. \multicol{1}{c}{\multirow{2}{1em}{content}}
+ -- Note that a \multirow cell can be nested in a \multicolumn,
+ -- but not the other way around. See #6603
+ let nestedCell = do
+ (Cell _ _ (RowSpan rs) _ bs) <- multirowCell blocks
+ return $ cell
+ alignment
+ (RowSpan rs)
+ (ColSpan span')
+ (fromList bs)
+
+ symbol '{' *> (nestedCell <|> singleCell) <* symbol '}'
+
+-- LaTeX tables are stored with empty cells underneath multirow cells
+-- denoting the grid spaces taken up by them. More specifically, if a
+-- cell spans m rows, then it will overwrite all the cells in the
+-- columns it spans for (m-1) rows underneath it, requiring padding
+-- cells in these places. These padding cells need to be removed for
+-- proper table reading. See #6603.
+--
+-- These fixTable functions do not otherwise fix up malformed
+-- input tables: that is left to the table builder.
+fixTableHead :: TableHead -> TableHead
+fixTableHead (TableHead attr rows) = TableHead attr rows'
+ where
+ rows' = fixTableRows rows
+
+fixTableBody :: TableBody -> TableBody
+fixTableBody (TableBody attr rhc th tb)
+ = TableBody attr rhc th' tb'
+ where
+ th' = fixTableRows th
+ tb' = fixTableRows tb
+
+fixTableRows :: [Row] -> [Row]
+fixTableRows = fixTableRows' $ repeat Nothing
+ where
+ fixTableRows' oldHang (Row attr cells : rs)
+ = let (newHang, cells') = fixTableRow oldHang cells
+ rs' = fixTableRows' newHang rs
+ in Row attr cells' : rs'
+ fixTableRows' _ [] = []
+
+-- The overhang is represented as Just (relative cell dimensions) or
+-- Nothing for an empty grid space.
+fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell])
+fixTableRow oldHang cells
+ -- If there's overhang, drop cells until their total width meets the
+ -- width of the occupied grid spaces (or we run out)
+ | (n, prefHang, restHang) <- splitHang oldHang
+ , n > 0
+ = let cells' = dropToWidth getCellW n cells
+ (restHang', cells'') = fixTableRow restHang cells'
+ in (prefHang restHang', cells'')
+ -- Otherwise record the overhang of a pending cell and fix the rest
+ -- of the row
+ | c@(Cell _ _ h w _):cells' <- cells
+ = let h' = max 1 h
+ w' = max 1 w
+ oldHang' = dropToWidth getHangW w' oldHang
+ (newHang, cells'') = fixTableRow oldHang' cells'
+ in (toHang w' h' <> newHang, c : cells'')
+ | otherwise
+ = (oldHang, [])
+ where
+ getCellW (Cell _ _ _ w _) = w
+ getHangW = maybe 1 fst
+ getCS (ColSpan n) = n
+
+ toHang c r
+ | r > 1 = [Just (c, r)]
+ | otherwise = replicate (getCS c) Nothing
+
+ -- Take the prefix of the overhang list representing filled grid
+ -- spaces. Also return the remainder and the length of this prefix.
+ splitHang = splitHang' 0 id
+
+ splitHang' !n l (Just (c, r):xs)
+ = splitHang' (n + c) (l . (toHang c (r-1) ++)) xs
+ splitHang' n l xs = (n, l, xs)
+
+ -- Drop list items until the total width of the dropped items
+ -- exceeds the passed width.
+ dropToWidth _ n l | n < 1 = l
+ dropToWidth wproj n (c:cs) = dropToWidth wproj (n - wproj c) cs
+ dropToWidth _ _ [] = []
+
+simpTable :: PandocMonad m
+ => LP m Blocks
+ -> LP m Inlines
+ -> Text
+ -> Bool
+ -> LP m Blocks
+simpTable blocks inline envname hasWidthParameter = try $ do
+ when hasWidthParameter $ () <$ tokWith inline
+ skipopts
+ colspecs <- parseAligns
+ let (aligns, widths, prefsufs) = unzip3 colspecs
+ optional $ controlSeq "caption" *> setCaption inline
+ spaces
+ optional label
+ spaces
+ optional lbreak
+ spaces
+ skipMany hline
+ spaces
+ header' <- option [] . try . fmap (:[]) $
+ parseTableRow blocks inline envname prefsufs <*
+ lbreak <* many1 hline
+ spaces
+ rows <- sepEndBy (parseTableRow blocks inline envname prefsufs)
+ (lbreak <* optional (skipMany hline))
+ spaces
+ optional $ controlSeq "caption" *> setCaption inline
+ spaces
+ optional label
+ spaces
+ optional lbreak
+ spaces
+ lookAhead $ controlSeq "end" -- make sure we're at end
+ let th = fixTableHead $ TableHead nullAttr header'
+ let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows]
+ let tf = TableFoot nullAttr []
+ return $ table emptyCaption (zip aligns widths) th tbs tf
+
+addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
+addTableCaption = walkM go
+ where go (Table attr c spec th tb tf) = do
+ st <- getState
+ let mblabel = sLastLabel st
+ capt <- case (sCaption st, mblabel) of
+ (Just ils, Nothing) -> return $ caption Nothing (plain ils)
+ (Just ils, Just lab) -> do
+ num <- getNextNumber sLastTableNum
+ setState
+ st{ sLastTableNum = num
+ , sLabels = M.insert lab
+ [Str (renderDottedNum num)]
+ (sLabels st) }
+ return $ caption Nothing (plain ils) -- add number??
+ (Nothing, _) -> return c
+ let attr' = case (attr, mblabel) of
+ ((_,classes,kvs), Just ident) ->
+ (ident,classes,kvs)
+ _ -> attr
+ return $ addAttrDiv attr' $ Table nullAttr capt spec th tb tf
+ go x = return x
+
+-- TODO: For now we add a Div to contain table attributes, since
+-- most writers don't do anything yet with attributes on Table.
+-- This can be removed when that changes.
+addAttrDiv :: Attr -> Block -> Block
+addAttrDiv ("",[],[]) b = b
+addAttrDiv attr b = Div attr [b]
diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs
index a017a2afb..c20b72bc5 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Types.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs
@@ -1,6 +1,7 @@
+{-# LANGUAGE FlexibleInstances #-}
{- |
Module : Text.Pandoc.Readers.LaTeX.Types
- Copyright : Copyright (C) 2017-2020 John MacFarlane
+ Copyright : Copyright (C) 2017-2021 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -18,7 +19,9 @@ module Text.Pandoc.Readers.LaTeX.Types ( Tok(..)
)
where
import Data.Text (Text)
-import Text.Parsec.Pos (SourcePos)
+import Text.Parsec.Pos (SourcePos, sourceName)
+import Text.Pandoc.Sources
+import Data.List (groupBy)
data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |
Esc1 | Esc2 | Arg Int
@@ -27,6 +30,16 @@ data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |
data Tok = Tok SourcePos TokType Text
deriving (Eq, Ord, Show)
+instance ToSources [Tok] where
+ toSources = Sources
+ . map (\ts -> case ts of
+ Tok p _ _ : _ -> (p, mconcat $ map tokToText ts)
+ _ -> error "toSources [Tok] encountered empty group")
+ . groupBy (\(Tok p1 _ _) (Tok p2 _ _) -> sourceName p1 == sourceName p2)
+
+tokToText :: Tok -> Text
+tokToText (Tok _ _ t) = t
+
data ExpansionPoint = ExpandWhenDefined | ExpandWhenUsed
deriving (Eq, Ord, Show)
diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs
index 21b8feaab..1141af66f 100644
--- a/src/Text/Pandoc/Readers/Man.hs
+++ b/src/Text/Pandoc/Readers/Man.hs
@@ -20,7 +20,7 @@ import Control.Monad (liftM, mzero, guard, void)
import Control.Monad.Trans (lift)
import Control.Monad.Except (throwError)
import Data.Maybe (catMaybes, isJust)
-import Data.List (intersperse, intercalate)
+import Data.List (intersperse)
import qualified Data.Text as T
import Text.Pandoc.Builder as B
import Text.Pandoc.Class.PandocMonad (PandocMonad(..), report)
@@ -29,9 +29,8 @@ import Text.Pandoc.Logging (LogMessage(..))
import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Walk (query)
-import Text.Pandoc.Shared (crFilter, mapLeft)
+import Text.Pandoc.Shared (mapLeft)
import Text.Pandoc.Readers.Roff -- TODO explicit imports
-import Text.Parsec hiding (tokenPrim)
import qualified Text.Parsec as Parsec
import Text.Parsec.Pos (updatePosString)
import qualified Data.Foldable as Foldable
@@ -50,13 +49,20 @@ type ManParser m = ParserT [RoffToken] ManState m
-- | Read man (troff) from an input string and return a Pandoc document.
-readMan :: PandocMonad m => ReaderOptions -> T.Text -> m Pandoc
-readMan opts txt = do
- tokenz <- lexRoff (initialPos "input") (crFilter txt)
+readMan :: (PandocMonad m, ToSources a)
+ => ReaderOptions
+ -> a
+ -> m Pandoc
+readMan opts s = do
+ let Sources inps = toSources s
+ tokenz <- mconcat <$> mapM (uncurry lexRoff) inps
let state = def {readerOptions = opts} :: ManState
+ let fixError (PandocParsecError _ e) = PandocParsecError (Sources inps) e
+ fixError e = e
eitherdoc <- readWithMTokens parseMan state
(Foldable.toList . unRoffTokens $ tokenz)
- either throwError return eitherdoc
+ either (throwError . fixError) return eitherdoc
+
readWithMTokens :: PandocMonad m
=> ParserT [RoffToken] ManState m a -- ^ parser
@@ -64,9 +70,10 @@ readWithMTokens :: PandocMonad m
-> [RoffToken] -- ^ input
-> m (Either PandocError a)
readWithMTokens parser state input =
- let leftF = PandocParsecError . T.pack . intercalate "\n" $ show <$> input
+ let leftF = PandocParsecError mempty
in mapLeft leftF `liftM` runParserT parser state "source" input
+
parseMan :: PandocMonad m => ManParser m Pandoc
parseMan = do
bs <- many parseBlock <* eof
@@ -89,7 +96,7 @@ parseBlock = choice [ parseList
parseTable :: PandocMonad m => ManParser m Blocks
parseTable = do
- modifyState $ \st -> st { tableCellsPlain = True }
+ updateState $ \st -> st { tableCellsPlain = True }
let isTbl Tbl{} = True
isTbl _ = False
Tbl _opts rows pos <- msatisfy isTbl
@@ -135,7 +142,7 @@ parseTable = do
case res' of
Left _ -> Prelude.fail "Could not parse table cell"
Right x -> do
- modifyState $ \s -> s{ tableCellsPlain = False }
+ updateState $ \s -> s{ tableCellsPlain = False }
return x
Right x -> return x
@@ -222,7 +229,7 @@ parseTitle = do
setMeta "section" (linePartsToInlines y)
[x] -> setMeta "title" (linePartsToInlines x)
[] -> id
- modifyState $ \st -> st{ metadata = adjustMeta $ metadata st }
+ updateState $ \st -> st{ metadata = adjustMeta $ metadata st }
return mempty
linePartsToInlines :: [LinePart] -> Inlines
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 5888bf095..2dc7ddf52 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -4,7 +4,7 @@
{-# LANGUAGE ViewPatterns #-}
{- |
Module : Text.Pandoc.Readers.Markdown
- Copyright : Copyright (C) 2006-2020 John MacFarlane
+ Copyright : Copyright (C) 2006-2021 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -21,15 +21,17 @@ module Text.Pandoc.Readers.Markdown (
import Control.Monad
import Control.Monad.Except (throwError)
import Data.Char (isAlphaNum, isPunctuation, isSpace)
-import Data.List (transpose, elemIndex, sortOn)
+import Data.List (transpose, elemIndex, sortOn, foldl')
+import Data.List.NonEmpty (NonEmpty(..))
import qualified Data.Map as M
import Data.Maybe
import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as T
-import qualified Data.Text.Lazy as TL
import qualified Data.ByteString.Lazy as BL
-import System.FilePath (addExtension, takeExtension)
+import System.FilePath (addExtension, takeExtension, takeDirectory)
+import qualified System.FilePath.Windows as Windows
+import qualified System.FilePath.Posix as Posix
import Text.HTML.TagSoup hiding (Row)
import Text.Pandoc.Builder (Blocks, Inlines)
import qualified Text.Pandoc.Builder as B
@@ -45,20 +47,22 @@ import Text.Pandoc.Readers.HTML (htmlInBalanced, htmlTag, isBlockTag,
isCommentTag, isInlineTag, isTextTag)
import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline)
import Text.Pandoc.Shared
-import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.XML (fromEntities)
-import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs)
+import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs, yamlMetaBlock)
+-- import Debug.Trace (traceShowId)
-type MarkdownParser m = ParserT Text ParserState m
+type MarkdownParser m = ParserT Sources ParserState m
+
+type F = Future ParserState
-- | Read markdown from an input string and return a Pandoc document.
-readMarkdown :: PandocMonad m
+readMarkdown :: (PandocMonad m, ToSources a)
=> ReaderOptions -- ^ Reader options
- -> Text -- ^ String to parse (assuming @'\n'@ line endings)
+ -> a -- ^ Input
-> m Pandoc
readMarkdown opts s = do
parsed <- readWithM parseMarkdown def{ stateOptions = opts }
- (crFilter s <> "\n\n")
+ (ensureFinalNewlines 3 (toSources s))
case parsed of
Right result -> return result
Left e -> throwError e
@@ -79,7 +83,7 @@ yamlToMeta opts mbfp bstr = do
meta <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks) bstr
setPosition oldPos
return $ runF meta defaultParserState
- parsed <- readWithM parser def{ stateOptions = opts } ""
+ parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text)
case parsed of
Right result -> return result
Left e -> throwError e
@@ -95,14 +99,12 @@ yamlToRefs :: PandocMonad m
-> m [MetaValue]
yamlToRefs idpred opts mbfp bstr = do
let parser = do
- oldPos <- getPosition
case mbfp of
Nothing -> return ()
Just fp -> setPosition $ initialPos fp
refs <- yamlBsToRefs (fmap B.toMetaValue <$> parseBlocks) idpred bstr
- setPosition oldPos
return $ runF refs defaultParserState
- parsed <- readWithM parser def{ stateOptions = opts } ""
+ parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text)
case parsed of
Right result -> return result
Left e -> throwError e
@@ -145,14 +147,14 @@ inList = do
ctx <- stateParserContext <$> getState
guard (ctx == ListItemState)
-spnl :: PandocMonad m => ParserT Text st m ()
+spnl :: PandocMonad m => ParserT Sources st m ()
spnl = try $ do
skipSpaces
optional newline
skipSpaces
notFollowedBy (char '\n')
-spnl' :: PandocMonad m => ParserT Text st m Text
+spnl' :: PandocMonad m => ParserT Sources st m Text
spnl' = try $ do
xs <- many spaceChar
ys <- option "" $ try $ (:) <$> newline
@@ -247,51 +249,48 @@ titleBlock :: PandocMonad m => MarkdownParser m ()
titleBlock = pandocTitleBlock <|> mmdTitleBlock
pandocTitleBlock :: PandocMonad m => MarkdownParser m ()
-pandocTitleBlock = try $ do
+pandocTitleBlock = do
guardEnabled Ext_pandoc_title_block
lookAhead (char '%')
- title <- option mempty titleLine
- author <- option (return []) authorsLine
- date <- option mempty dateLine
- optional blanklines
- let meta' = do title' <- title
- author' <- author
- date' <- date
- return $
- (if null title' then id else B.setMeta "title" title')
- . (if null author' then id else B.setMeta "author" author')
- . (if null date' then id else B.setMeta "date" date')
- $ nullMeta
- updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
-
-yamlMetaBlock :: PandocMonad m => MarkdownParser m (F Blocks)
-yamlMetaBlock = try $ do
+ try $ do
+ title <- option mempty titleLine
+ author <- option (return []) authorsLine
+ date <- option mempty dateLine
+ optional blanklines
+ let meta' = do title' <- title
+ author' <- author
+ date' <- date
+ return $
+ (if null title'
+ then id
+ else B.setMeta "title" title')
+ . (if null author'
+ then id
+ else B.setMeta "author" author')
+ . (if null date'
+ then id
+ else B.setMeta "date" date')
+ $ nullMeta
+ updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
+
+yamlMetaBlock' :: PandocMonad m => MarkdownParser m (F Blocks)
+yamlMetaBlock' = do
guardEnabled Ext_yaml_metadata_block
- string "---"
- blankline
- notFollowedBy blankline -- if --- is followed by a blank it's an HRULE
- rawYamlLines <- manyTill anyLine stopLine
- -- by including --- and ..., we allow yaml blocks with just comments:
- let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."]))
- optional blanklines
- newMetaF <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks)
- $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
+ newMetaF <- yamlMetaBlock (fmap B.toMetaValue <$> parseBlocks)
-- Since `<>` is left-biased, existing values are not touched:
updateState $ \st -> st{ stateMeta' = stateMeta' st <> newMetaF }
return mempty
-stopLine :: PandocMonad m => MarkdownParser m ()
-stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
-
mmdTitleBlock :: PandocMonad m => MarkdownParser m ()
-mmdTitleBlock = try $ do
+mmdTitleBlock = do
guardEnabled Ext_mmd_title_block
- firstPair <- kvPair False
- restPairs <- many (kvPair True)
- let kvPairs = firstPair : restPairs
- blanklines
- updateState $ \st -> st{ stateMeta' = stateMeta' st <>
- return (Meta $ M.fromList kvPairs) }
+ try $ do
+ firstPair <- kvPair False
+ restPairs <- many (kvPair True)
+ let kvPairs = firstPair : restPairs
+ blanklines
+ updateState $ \st -> st{ stateMeta' = stateMeta' st <>
+ return (Meta $ M.fromList kvPairs) }
kvPair :: PandocMonad m => Bool -> MarkdownParser m (Text, MetaValue)
kvPair allowEmpty = try $ do
@@ -300,7 +299,7 @@ kvPair allowEmpty = try $ do
(try $ newline >> lookAhead (blankline <|> nonspaceChar))
guard $ allowEmpty || not (T.null val)
let key' = T.concat $ T.words $ T.toLower key
- let val' = MetaBlocks $ B.toList $ B.plain $ B.text val
+ let val' = MetaInlines $ B.toList $ B.text val
return (key',val')
parseMarkdown :: PandocMonad m => MarkdownParser m Pandoc
@@ -334,10 +333,14 @@ referenceKey = try $ do
skipMany spaceChar
notFollowedBy' referenceTitle
notFollowedBy' $ guardEnabled Ext_link_attributes >> attributes
+ notFollowedBy' $ guardEnabled Ext_mmd_link_attributes >>
+ try (spnl <* keyValAttr)
notFollowedBy' (() <$ reference)
many1Char $ notFollowedBy space >> litChar
let betweenAngles = try $ char '<' >> manyTillChar litChar (char '>')
- src <- try betweenAngles <|> sourceURL
+ rebase <- option False (True <$ guardEnabled Ext_rebase_relative_paths)
+ src <- (if rebase then rebasePath pos else id) <$>
+ (try betweenAngles <|> sourceURL)
tit <- option "" referenceTitle
attr <- option nullAttr $ try $
do guardEnabled Ext_link_attributes
@@ -346,7 +349,7 @@ referenceKey = try $ do
addKvs <- option [] $ guardEnabled Ext_mmd_link_attributes
>> many (try $ spnl >> keyValAttr)
blanklines
- let attr' = extractIdClass $ foldl (\x f -> f x) attr addKvs
+ let attr' = extractIdClass $ foldl' (\x f -> f x) attr addKvs
target = (escapeURI $ trimr src, tit)
st <- getState
let oldkeys = stateKeys st
@@ -442,7 +445,7 @@ block :: PandocMonad m => MarkdownParser m (F Blocks)
block = do
res <- choice [ mempty <$ blanklines
, codeBlockFenced
- , yamlMetaBlock
+ , yamlMetaBlock'
-- note: bulletList needs to be before header because of
-- the possibility of empty list items: -
, bulletList
@@ -568,7 +571,7 @@ registerImplicitHeader raw attr@(ident, _, _)
-- hrule block
--
-hrule :: PandocMonad m => ParserT Text st m (F Blocks)
+hrule :: PandocMonad m => ParserT Sources st m (F Blocks)
hrule = try $ do
skipSpaces
start <- satisfy isHruleChar
@@ -588,7 +591,7 @@ indentedLine = indentSpaces >> anyLineNewline
blockDelimiter :: PandocMonad m
=> (Char -> Bool)
-> Maybe Int
- -> ParserT Text ParserState m Int
+ -> ParserT Sources ParserState m Int
blockDelimiter f len = try $ do
skipNonindentSpaces
c <- lookAhead (satisfy f)
@@ -602,7 +605,7 @@ attributes = try $ do
spnl
attrs <- many (attribute <* spnl)
char '}'
- return $ foldl (\x f -> f x) nullAttr attrs
+ return $ foldl' (\x f -> f x) nullAttr attrs
attribute :: PandocMonad m => MarkdownParser m (Attr -> Attr)
attribute = identifierAttr <|> classAttr <|> keyValAttr <|> specialAttr
@@ -659,15 +662,15 @@ codeBlockFenced :: PandocMonad m => MarkdownParser m (F Blocks)
codeBlockFenced = try $ do
indentchars <- nonindentSpaces
let indentLevel = T.length indentchars
- c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~'))
+ c <- (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~'))
<|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`'))
size <- blockDelimiter (== c) Nothing
skipMany spaceChar
rawattr <-
- (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute))
+ (Left <$> (guardEnabled Ext_raw_attribute >> try rawAttribute))
<|>
(Right <$> option ("",[],[])
- (try (guardEnabled Ext_fenced_code_attributes >> attributes)
+ ((guardEnabled Ext_fenced_code_attributes >> try attributes)
<|> ((\x -> ("",[toLanguageId x],[])) <$> many1Char nonspaceChar)))
blankline
contents <- T.intercalate "\n" <$>
@@ -732,7 +735,7 @@ lhsCodeBlockBirdWith c = try $ do
blanklines
return $ T.intercalate "\n" lns'
-birdTrackLine :: PandocMonad m => Char -> ParserT Text st m Text
+birdTrackLine :: PandocMonad m => Char -> ParserT Sources st m Text
birdTrackLine c = try $ do
char c
-- allow html tags on left margin:
@@ -1025,7 +1028,7 @@ para = try $ do
option (B.plain <$> result)
$ try $ do
newline
- (blanklines >> return mempty)
+ (mempty <$ blanklines)
<|> (guardDisabled Ext_blank_before_blockquote >> () <$ lookAhead blockQuote)
<|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced)
<|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header)
@@ -1118,6 +1121,7 @@ rawTeXBlock = do
rawHtmlBlocks :: PandocMonad m => MarkdownParser m (F Blocks)
rawHtmlBlocks = do
(TagOpen tagtype _, raw) <- htmlTag isBlockTag
+ let selfClosing = "/>" `T.isSuffixOf` raw
-- we don't want '<td> text' to be a code block:
skipMany spaceChar
indentlevel <- (blankline >> length <$> many (char ' ')) <|> return 0
@@ -1131,7 +1135,9 @@ rawHtmlBlocks = do
gobbleAtMostSpaces indentlevel
notFollowedBy' closer
block
- contents <- mconcat <$> many block'
+ contents <- if selfClosing
+ then return mempty
+ else mconcat <$> many block'
result <-
try
(do gobbleAtMostSpaces indentlevel
@@ -1155,11 +1161,12 @@ stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s
--
lineBlock :: PandocMonad m => MarkdownParser m (F Blocks)
-lineBlock = try $ do
+lineBlock = do
guardEnabled Ext_line_blocks
- lines' <- lineBlockLines >>=
- mapM (parseFromString' (trimInlinesF <$> inlines))
- return $ B.lineBlock <$> sequence lines'
+ try $ do
+ lines' <- lineBlockLines >>=
+ mapM (parseFromString' (trimInlinesF <$> inlines))
+ return $ B.lineBlock <$> sequence lines'
--
-- Tables
@@ -1169,7 +1176,7 @@ lineBlock = try $ do
-- and the length including trailing space.
dashedLine :: PandocMonad m
=> Char
- -> ParserT Text st m (Int, Int)
+ -> ParserT Sources st m (Int, Int)
dashedLine ch = do
dashes <- many1 (char ch)
sp <- many spaceChar
@@ -1238,7 +1245,7 @@ rawTableLine :: PandocMonad m
-> MarkdownParser m [Text]
rawTableLine indices = do
notFollowedBy' (blanklines' <|> tableFooter)
- line <- take1WhileP (/='\n') <* newline
+ line <- anyLine
return $ map trim $ tail $
splitTextByIndices (init indices) line
@@ -1261,11 +1268,12 @@ multilineRow indices = do
-- Parses a table caption: inlines beginning with 'Table:'
-- and followed by blank lines.
tableCaption :: PandocMonad m => MarkdownParser m (F Inlines)
-tableCaption = try $ do
+tableCaption = do
guardEnabled Ext_table_captions
- skipNonindentSpaces
- (string ":" <* notFollowedBy (satisfy isPunctuation)) <|> string "Table:"
- trimInlinesF <$> inlines1 <* blanklines
+ try $ do
+ skipNonindentSpaces
+ (string ":" <* notFollowedBy (satisfy isPunctuation)) <|> string "Table:"
+ trimInlinesF <$> inlines1 <* blanklines
-- Parse a simple table with '---' header and one line per row.
simpleTable :: PandocMonad m
@@ -1351,7 +1359,7 @@ pipeTable = try $ do
lines' <- many pipeTableRow
let lines'' = map (take (length aligns) <$>) lines'
let maxlength = maximum $
- map (\x -> T.length . stringify $ runF x def) (heads' : lines'')
+ fmap (\x -> T.length . stringify $ runF x def) (heads' :| lines'')
numColumns <- getOption readerColumns
let widths = if maxlength > numColumns
then map (\len ->
@@ -1388,7 +1396,7 @@ pipeTableCell =
return $ B.plain <$> result)
<|> return mempty
-pipeTableHeaderPart :: PandocMonad m => ParserT Text st m (Alignment, Int)
+pipeTableHeaderPart :: PandocMonad m => ParserT Sources st m (Alignment, Int)
pipeTableHeaderPart = try $ do
skipMany spaceChar
left <- optionMaybe (char ':')
@@ -1404,10 +1412,14 @@ pipeTableHeaderPart = try $ do
(Just _,Just _) -> AlignCenter, len)
-- Succeed only if current line contains a pipe.
-scanForPipe :: PandocMonad m => ParserT Text st m ()
+scanForPipe :: PandocMonad m => ParserT Sources st m ()
scanForPipe = do
- inp <- getInput
- case T.break (\c -> c == '\n' || c == '|') inp of
+ Sources inps <- getInput
+ let ln = case inps of
+ [] -> ""
+ ((_,t):(_,t'):_) | T.null t -> t'
+ ((_,t):_) -> t
+ case T.break (\c -> c == '\n' || c == '|') ln of
(_, T.uncons -> Just ('|', _)) -> return ()
_ -> mzero
@@ -1434,15 +1446,14 @@ table :: PandocMonad m => MarkdownParser m (F Blocks)
table = try $ do
frontCaption <- option Nothing (Just <$> tableCaption)
(aligns, widths, heads, lns) <-
- try (guardEnabled Ext_pipe_tables >> scanForPipe >> pipeTable) <|>
- try (guardEnabled Ext_multiline_tables >>
- multilineTable False) <|>
- try (guardEnabled Ext_simple_tables >>
- (simpleTable True <|> simpleTable False)) <|>
- try (guardEnabled Ext_multiline_tables >>
- multilineTable True) <|>
- try (guardEnabled Ext_grid_tables >>
- (gridTable False <|> gridTable True)) <?> "table"
+ (guardEnabled Ext_pipe_tables >> try (scanForPipe >> pipeTable)) <|>
+ (guardEnabled Ext_multiline_tables >> try (multilineTable False)) <|>
+ (guardEnabled Ext_simple_tables >>
+ try (simpleTable True <|> simpleTable False)) <|>
+ (guardEnabled Ext_multiline_tables >>
+ try (multilineTable True)) <|>
+ (guardEnabled Ext_grid_tables >>
+ try (gridTable False <|> gridTable True)) <?> "table"
optional blanklines
caption <- case frontCaption of
Nothing -> option (return mempty) tableCaption
@@ -1476,35 +1487,37 @@ inlines1 :: PandocMonad m => MarkdownParser m (F Inlines)
inlines1 = mconcat <$> many1 inline
inline :: PandocMonad m => MarkdownParser m (F Inlines)
-inline = choice [ whitespace
- , bareURL
- , str
- , endline
- , code
- , strongOrEmph
- , note
- , cite
- , bracketedSpan
- , link
- , image
- , math
- , strikeout
- , subscript
- , superscript
- , inlineNote -- after superscript because of ^[link](/foo)^
- , autoLink
- , spanHtml
- , rawHtmlInline
- , escapedNewline
- , escapedChar
- , rawLaTeXInline'
- , exampleRef
- , smart
- , return . B.singleton <$> charRef
- , emoji
- , symbol
- , ltSign
- ] <?> "inline"
+inline = do
+ c <- lookAhead anyChar
+ ((case c of
+ ' ' -> whitespace
+ '\t' -> whitespace
+ '\n' -> endline
+ '`' -> code
+ '_' -> strongOrEmph
+ '*' -> strongOrEmph
+ '^' -> superscript <|> inlineNote -- in this order bc ^[link](/foo)^
+ '[' -> note <|> cite <|> bracketedSpan <|> link
+ '!' -> image
+ '$' -> math
+ '~' -> strikeout <|> subscript
+ '<' -> autoLink <|> spanHtml <|> rawHtmlInline <|> ltSign
+ '\\' -> math <|> escapedNewline <|> escapedChar <|> rawLaTeXInline'
+ '@' -> cite <|> exampleRef
+ '"' -> smart
+ '\'' -> smart
+ '\8216' -> smart
+ '\145' -> smart
+ '\8220' -> smart
+ '\147' -> smart
+ '-' -> smart
+ '.' -> smart
+ '&' -> return . B.singleton <$> charRef
+ ':' -> emoji
+ _ -> mzero)
+ <|> bareURL
+ <|> str
+ <|> symbol) <?> "inline"
escapedChar' :: PandocMonad m => MarkdownParser m Char
escapedChar' = try $ do
@@ -1515,11 +1528,12 @@ escapedChar' = try $ do
<|> oneOf "\\`*_{}[]()>#+-.!~\""
escapedNewline :: PandocMonad m => MarkdownParser m (F Inlines)
-escapedNewline = try $ do
+escapedNewline = do
guardEnabled Ext_escaped_line_breaks
- char '\\'
- lookAhead (char '\n') -- don't consume the newline (see #3730)
- return $ return B.linebreak
+ try $ do
+ char '\\'
+ lookAhead (char '\n') -- don't consume the newline (see #3730)
+ return $ return B.linebreak
escapedChar :: PandocMonad m => MarkdownParser m (F Inlines)
escapedChar = do
@@ -1541,19 +1555,20 @@ ltSign = do
-- whole document has been parsed. But we need this parser
-- here in case citations is disabled.
exampleRef :: PandocMonad m => MarkdownParser m (F Inlines)
-exampleRef = try $ do
+exampleRef = do
guardEnabled Ext_example_lists
- char '@'
- lab <- mconcat . map T.pack <$>
- many (many1 alphaNum <|>
- try (do c <- char '_' <|> char '-'
- cs <- many1 alphaNum
- return (c:cs)))
- return $ do
- st <- askF
- return $ case M.lookup lab (stateExamples st) of
- Just n -> B.str $ tshow n
- Nothing -> B.str $ "@" <> lab
+ try $ do
+ char '@'
+ lab <- mconcat . map T.pack <$>
+ many (many1 alphaNum <|>
+ try (do c <- char '_' <|> char '-'
+ cs <- many1 alphaNum
+ return (c:cs)))
+ return $ do
+ st <- askF
+ return $ case M.lookup lab (stateExamples st) of
+ Just n -> B.str $ tshow n
+ Nothing -> B.str $ "@" <> lab
symbol :: PandocMonad m => MarkdownParser m (F Inlines)
symbol = do
@@ -1580,10 +1595,10 @@ code = try $ do
>> count (length starts) (char '`')
>> notFollowedBy (char '`'))
rawattr <-
- (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute))
+ (Left <$> (guardEnabled Ext_raw_attribute >> try rawAttribute))
<|>
(Right <$> option ("",[],[])
- (try (guardEnabled Ext_inline_code_attributes >> attributes)))
+ (guardEnabled Ext_inline_code_attributes >> try attributes))
return $ return $
case rawattr of
Left syn -> B.rawInline syn result
@@ -1676,38 +1691,40 @@ strikeout = fmap B.strikeout <$>
strikeEnd = try $ string "~~"
superscript :: PandocMonad m => MarkdownParser m (F Inlines)
-superscript = fmap B.superscript <$> try (do
+superscript = do
guardEnabled Ext_superscript
- char '^'
- mconcat <$> many1Till (do notFollowedBy spaceChar
- notFollowedBy newline
- inline) (char '^'))
+ fmap B.superscript <$> try (do
+ char '^'
+ mconcat <$> many1Till (do notFollowedBy spaceChar
+ notFollowedBy newline
+ inline) (char '^'))
subscript :: PandocMonad m => MarkdownParser m (F Inlines)
-subscript = fmap B.subscript <$> try (do
+subscript = do
guardEnabled Ext_subscript
- char '~'
- mconcat <$> many1Till (do notFollowedBy spaceChar
- notFollowedBy newline
- inline) (char '~'))
+ fmap B.subscript <$> try (do
+ char '~'
+ mconcat <$> many1Till (do notFollowedBy spaceChar
+ notFollowedBy newline
+ inline) (char '~'))
whitespace :: PandocMonad m => MarkdownParser m (F Inlines)
whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak)
regsp = skipMany spaceChar >> return B.space
-nonEndline :: PandocMonad m => ParserT Text st m Char
+nonEndline :: PandocMonad m => ParserT Sources st m Char
nonEndline = satisfy (/='\n')
str :: PandocMonad m => MarkdownParser m (F Inlines)
str = do
result <- mconcat <$> many1
- ( take1WhileP isAlphaNum
+ ( T.pack <$> (many1 alphaNum)
<|> "." <$ try (char '.' <* notFollowedBy (char '.')) )
updateLastStrPos
(do guardEnabled Ext_smart
abbrevs <- getOption readerAbbreviations
- if not (T.null result) && T.last result == '.' && result `Set.member` abbrevs
+ if result `Set.member` abbrevs
then try (do ils <- whitespace
notFollowedBy (() <$ cite <|> () <$ note)
-- ?? lookAhead alphaNum
@@ -1790,15 +1807,16 @@ link = try $ do
regLink B.linkWith lab <|> referenceLink B.linkWith (lab,raw)
bracketedSpan :: PandocMonad m => MarkdownParser m (F Inlines)
-bracketedSpan = try $ do
+bracketedSpan = do
guardEnabled Ext_bracketed_spans
- (lab,_) <- reference
- attr <- attributes
- return $ if isSmallCaps attr
- then B.smallcaps <$> lab
- else if isUnderline attr
- then B.underline <$> lab
- else B.spanWith attr <$> lab
+ try $ do
+ (lab,_) <- reference
+ attr <- attributes
+ return $ if isSmallCaps attr
+ then B.smallcaps <$> lab
+ else if isUnderline attr
+ then B.underline <$> lab
+ else B.spanWith attr <$> lab
-- | We treat a span as SmallCaps if class is "smallcaps" (and
-- no other attributes are set or if style is "font-variant:small-caps"
@@ -1825,9 +1843,12 @@ regLink :: PandocMonad m
-> MarkdownParser m (F Inlines)
regLink constructor lab = try $ do
(src, tit) <- source
+ rebase <- option False (True <$ guardEnabled Ext_rebase_relative_paths)
+ pos <- getPosition
+ let src' = if rebase then rebasePath pos src else src
attr <- option nullAttr $
guardEnabled Ext_link_attributes >> attributes
- return $ constructor attr src tit <$> lab
+ return $ constructor attr src' tit <$> lab
-- a link like [this][ref] or [this][] or [this]
referenceLink :: PandocMonad m
@@ -1867,7 +1888,8 @@ referenceLink constructor (lab, raw) = do
Just ((src, tit), _) -> constructor nullAttr src tit <$> lab
Nothing -> makeFallback
else makeFallback
- Just ((src,tit), attr) -> constructor attr src tit <$> lab
+ Just ((src,tit), attr) ->
+ constructor attr src tit <$> lab
dropBrackets :: Text -> Text
dropBrackets = dropRB . dropLB
@@ -1877,12 +1899,13 @@ dropBrackets = dropRB . dropLB
dropLB xs = xs
bareURL :: PandocMonad m => MarkdownParser m (F Inlines)
-bareURL = try $ do
+bareURL = do
guardEnabled Ext_autolink_bare_uris
getState >>= guard . stateAllowLinks
- (cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress)
- notFollowedBy $ try $ spaces >> htmlTag (~== TagClose ("a" :: Text))
- return $ return $ B.linkWith ("",[cls],[]) src "" (B.str orig)
+ try $ do
+ (cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress)
+ notFollowedBy $ try $ spaces >> htmlTag (~== TagClose ("a" :: Text))
+ return $ return $ B.linkWith ("",[cls],[]) src "" (B.str orig)
autoLink :: PandocMonad m => MarkdownParser m (F Inlines)
autoLink = try $ do
@@ -1899,15 +1922,33 @@ autoLink = try $ do
return $ return $ B.linkWith attr (src <> escapeURI extra) ""
(B.str $ orig <> extra)
+-- | Rebase a relative path, by adding the (relative) directory
+-- of the containing source position. Absolute links and URLs
+-- are untouched.
+rebasePath :: SourcePos -> Text -> Text
+rebasePath pos path = do
+ let fp = sourceName pos
+ isFragment = T.take 1 path == "#"
+ path' = T.unpack path
+ isAbsolutePath = Posix.isAbsolute path' || Windows.isAbsolute path'
+ in if T.null path || isFragment || isAbsolutePath || isURI path
+ then path
+ else
+ case takeDirectory fp of
+ "" -> path
+ "." -> path
+ d -> T.pack d <> "/" <> path
+
image :: PandocMonad m => MarkdownParser m (F Inlines)
image = try $ do
char '!'
(lab,raw) <- reference
defaultExt <- getOption readerDefaultImageExtension
- let constructor attr' src = case takeExtension (T.unpack src) of
- "" -> B.imageWith attr' (T.pack $ addExtension (T.unpack src)
- $ T.unpack defaultExt)
- _ -> B.imageWith attr' src
+ let constructor attr' src =
+ case takeExtension (T.unpack src) of
+ "" -> B.imageWith attr' (T.pack $ addExtension (T.unpack src)
+ $ T.unpack defaultExt)
+ _ -> B.imageWith attr' src
regLink constructor lab <|> referenceLink constructor (lab,raw)
note :: PandocMonad m => MarkdownParser m (F Inlines)
@@ -1935,23 +1976,25 @@ note = try $ do
return $ B.note $ walk adjustCite contents'
inlineNote :: PandocMonad m => MarkdownParser m (F Inlines)
-inlineNote = try $ do
+inlineNote = do
guardEnabled Ext_inline_notes
- char '^'
- updateState $ \st -> st{ stateInNote = True
- , stateNoteNumber = stateNoteNumber st + 1 }
- contents <- inlinesInBalancedBrackets
- updateState $ \st -> st{ stateInNote = False }
- return $ B.note . B.para <$> contents
+ try $ do
+ char '^'
+ updateState $ \st -> st{ stateInNote = True
+ , stateNoteNumber = stateNoteNumber st + 1 }
+ contents <- inlinesInBalancedBrackets
+ updateState $ \st -> st{ stateInNote = False }
+ return $ B.note . B.para <$> contents
rawLaTeXInline' :: PandocMonad m => MarkdownParser m (F Inlines)
-rawLaTeXInline' = try $ do
+rawLaTeXInline' = do
guardEnabled Ext_raw_tex
notFollowedBy' rawConTeXtEnvironment
- s <- rawLaTeXInline
- return $ return $ B.rawInline "tex" s -- "tex" because it might be context
+ try $ do
+ s <- rawLaTeXInline
+ return $ return $ B.rawInline "tex" s -- "tex" because it might be context
-rawConTeXtEnvironment :: PandocMonad m => ParserT Text st m Text
+rawConTeXtEnvironment :: PandocMonad m => ParserT Sources st m Text
rawConTeXtEnvironment = try $ do
string "\\start"
completion <- inBrackets (letter <|> digit <|> spaceChar)
@@ -1960,7 +2003,7 @@ rawConTeXtEnvironment = try $ do
(try $ string "\\stop" >> textStr completion)
return $ "\\start" <> completion <> T.concat contents <> "\\stop" <> completion
-inBrackets :: PandocMonad m => ParserT Text st m Char -> ParserT Text st m Text
+inBrackets :: PandocMonad m => ParserT Sources st m Char -> ParserT Sources st m Text
inBrackets parser = do
char '['
contents <- manyChar parser
@@ -1968,55 +2011,60 @@ inBrackets parser = do
return $ "[" <> contents <> "]"
spanHtml :: PandocMonad m => MarkdownParser m (F Inlines)
-spanHtml = try $ do
+spanHtml = do
guardEnabled Ext_native_spans
- (TagOpen _ attrs, _) <- htmlTag (~== TagOpen ("span" :: Text) [])
- contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose ("span" :: Text)))
- let ident = fromMaybe "" $ lookup "id" attrs
- let classes = maybe [] T.words $ lookup "class" attrs
- let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
- return $ if isSmallCaps (ident, classes, keyvals)
- then B.smallcaps <$> contents
- else if isUnderline (ident, classes, keyvals)
- then B.underline <$> contents
- else B.spanWith (ident, classes, keyvals) <$> contents
+ try $ do
+ (TagOpen _ attrs, _) <- htmlTag (~== TagOpen ("span" :: Text) [])
+ contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose ("span" :: Text)))
+ let ident = fromMaybe "" $ lookup "id" attrs
+ let classes = maybe [] T.words $ lookup "class" attrs
+ let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+ return $ if isSmallCaps (ident, classes, keyvals)
+ then B.smallcaps <$> contents
+ else if isUnderline (ident, classes, keyvals)
+ then B.underline <$> contents
+ else B.spanWith (ident, classes, keyvals) <$> contents
divHtml :: PandocMonad m => MarkdownParser m (F Blocks)
-divHtml = try $ do
+divHtml = do
guardEnabled Ext_native_divs
- (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen ("div" :: Text) [])
- -- we set stateInHtmlBlock so that closing tags that can be either block or
- -- inline will not be parsed as inline tags
- oldInHtmlBlock <- stateInHtmlBlock <$> getState
- updateState $ \st -> st{ stateInHtmlBlock = Just "div" }
- bls <- option "" (blankline >> option "" blanklines)
- contents <- mconcat <$>
- many (notFollowedBy' (htmlTag (~== TagClose ("div" :: Text))) >> block)
- closed <- option False (True <$ htmlTag (~== TagClose ("div" :: Text)))
- if closed
- then do
- updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
- let ident = fromMaybe "" $ lookup "id" attrs
- let classes = maybe [] T.words $ lookup "class" attrs
- let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
- return $ B.divWith (ident, classes, keyvals) <$> contents
- else -- avoid backtracing
- return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents
+ try $ do
+ (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen ("div" :: Text) [])
+ -- we set stateInHtmlBlock so that closing tags that can be either block
+ -- or inline will not be parsed as inline tags
+ oldInHtmlBlock <- stateInHtmlBlock <$> getState
+ updateState $ \st -> st{ stateInHtmlBlock = Just "div" }
+ bls <- option "" (blankline >> option "" blanklines)
+ contents <- mconcat <$>
+ many (notFollowedBy' (htmlTag (~== TagClose ("div" :: Text))) >> block)
+ closed <- option False (True <$ htmlTag (~== TagClose ("div" :: Text)))
+ if closed
+ then do
+ updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
+ let ident = fromMaybe "" $ lookup "id" attrs
+ let classes = maybe [] T.words $ lookup "class" attrs
+ let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+ return $ B.divWith (ident, classes, keyvals) <$> contents
+ else -- avoid backtracing
+ return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents
divFenced :: PandocMonad m => MarkdownParser m (F Blocks)
-divFenced = try $ do
+divFenced = do
guardEnabled Ext_fenced_divs
- string ":::"
- skipMany (char ':')
- skipMany spaceChar
- attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1Char nonspaceChar)
- skipMany spaceChar
- skipMany (char ':')
- blankline
- updateState $ \st -> st{ stateFencedDivLevel = stateFencedDivLevel st + 1 }
- bs <- mconcat <$> manyTill block divFenceEnd
- updateState $ \st -> st{ stateFencedDivLevel = stateFencedDivLevel st - 1 }
- return $ B.divWith attribs <$> bs
+ try $ do
+ string ":::"
+ skipMany (char ':')
+ skipMany spaceChar
+ attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1Char nonspaceChar)
+ skipMany spaceChar
+ skipMany (char ':')
+ blankline
+ updateState $ \st ->
+ st{ stateFencedDivLevel = stateFencedDivLevel st + 1 }
+ bs <- mconcat <$> manyTill block divFenceEnd
+ updateState $ \st ->
+ st{ stateFencedDivLevel = stateFencedDivLevel st - 1 }
+ return $ B.divWith attribs <$> bs
divFenceEnd :: PandocMonad m => MarkdownParser m ()
divFenceEnd = try $ do
@@ -2048,14 +2096,15 @@ emojiChars :: [Char]
emojiChars = ['a'..'z'] ++ ['0'..'9'] ++ ['_','+','-']
emoji :: PandocMonad m => MarkdownParser m (F Inlines)
-emoji = try $ do
+emoji = do
guardEnabled Ext_emoji
- char ':'
- emojikey <- many1Char (oneOf emojiChars)
- char ':'
- case emojiToInline emojikey of
- Just i -> return (return $ B.singleton i)
- Nothing -> mzero
+ try $ do
+ char ':'
+ emojikey <- many1Char (oneOf emojiChars)
+ char ':'
+ case emojiToInline emojikey of
+ Just i -> return (return $ B.singleton i)
+ Nothing -> mzero
-- Citations
@@ -2074,7 +2123,7 @@ cite = do
textualCite :: PandocMonad m => MarkdownParser m (F Inlines)
textualCite = try $ do
- (suppressAuthor, key) <- citeKey
+ (suppressAuthor, key) <- citeKey True
-- If this is a reference to an earlier example list item,
-- then don't parse it as a citation. If the example list
-- item comes later, we'll parse it here and figure out in
@@ -2154,7 +2203,7 @@ prefix = trimInlinesF . mconcat <$>
manyTill inline (char ']'
<|> lookAhead
(try $ do optional (try (char ';' >> spnl))
- citeKey
+ citeKey True
return ']'))
citeList :: PandocMonad m => MarkdownParser m (F [Citation])
@@ -2163,7 +2212,7 @@ citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl)
citation :: PandocMonad m => MarkdownParser m (F Citation)
citation = try $ do
pref <- prefix
- (suppress_author, key) <- citeKey
+ (suppress_author, key) <- citeKey True
suff <- suffix
noteNum <- stateNoteNumber <$> getState
return $ do
@@ -2182,28 +2231,30 @@ citation = try $ do
smart :: PandocMonad m => MarkdownParser m (F Inlines)
smart = do
guardEnabled Ext_smart
- doubleQuoted <|> singleQuoted <|>
- choice (map (return <$>) [apostrophe, dash, ellipses])
+ doubleQuoted <|> singleQuoted <|> (return <$> doubleCloseQuote) <|>
+ (return <$> apostrophe) <|> (return <$> dash) <|> (return <$> ellipses)
singleQuoted :: PandocMonad m => MarkdownParser m (F Inlines)
-singleQuoted = try $ do
+singleQuoted = do
singleQuoteStart
- withQuoteContext InSingleQuote $
+ (try (withQuoteContext InSingleQuote $
fmap B.singleQuoted . trimInlinesF . mconcat <$>
- many1Till inline singleQuoteEnd
+ many1Till inline singleQuoteEnd))
+ <|> (return (return (B.str "\8217")))
-- doubleQuoted will handle regular double-quoted sections, as well
-- as dialogues with an open double-quote without a close double-quote
-- in the same paragraph.
doubleQuoted :: PandocMonad m => MarkdownParser m (F Inlines)
-doubleQuoted = try $ do
+doubleQuoted = do
doubleQuoteStart
- withQuoteContext InDoubleQuote $
+ (try (withQuoteContext InDoubleQuote $
fmap B.doubleQuoted . trimInlinesF . mconcat <$>
- many1Till inline doubleQuoteEnd
+ many1Till inline doubleQuoteEnd))
+ <|> (return (return (B.str "\8220")))
toRow :: [Blocks] -> Row
toRow = Row nullAttr . map B.simpleCell
toHeaderRow :: [Blocks] -> [Row]
-toHeaderRow l = [toRow l | not (null l)]
+toHeaderRow l = [toRow l | not (null l) && not (all null l)]
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index e8985ab2c..825e4a2eb 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -1,7 +1,7 @@
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.MediaWiki
- Copyright : Copyright (C) 2012-2020 John MacFarlane
+ Copyright : Copyright (C) 2012-2021 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -36,17 +36,18 @@ import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (nested)
import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag)
-import Text.Pandoc.Shared (crFilter, safeRead, stringify, stripTrailingNewlines,
+import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines,
trim, splitTextBy, tshow)
import Text.Pandoc.Walk (walk)
import Text.Pandoc.XML (fromEntities)
-- | Read mediawiki from an input string and return a Pandoc document.
-readMediaWiki :: PandocMonad m
- => ReaderOptions -- ^ Reader options
- -> Text -- ^ String to parse (assuming @'\n'@ line endings)
+readMediaWiki :: (PandocMonad m, ToSources a)
+ => ReaderOptions
+ -> a
-> m Pandoc
readMediaWiki opts s = do
+ let sources = toSources s
parsed <- readWithM parseMediaWiki MWState{ mwOptions = opts
, mwMaxNestingLevel = 4
, mwNextLinkNumber = 1
@@ -55,7 +56,7 @@ readMediaWiki opts s = do
, mwLogMessages = []
, mwInTT = False
}
- (crFilter s <> "\n")
+ sources
case parsed of
Right result -> return result
Left e -> throwError e
@@ -69,7 +70,7 @@ data MWState = MWState { mwOptions :: ReaderOptions
, mwInTT :: Bool
}
-type MWParser m = ParserT Text MWState m
+type MWParser m = ParserT Sources MWState m
instance HasReaderOptions MWState where
extractReaderOptions = mwOptions
@@ -112,12 +113,14 @@ newBlockTags = ["haskell","syntaxhighlight","source","gallery","references"]
isBlockTag' :: Tag Text -> Bool
isBlockTag' tag@(TagOpen t _) = (isBlockTag tag || t `elem` newBlockTags) &&
t `notElem` eitherBlockOrInline
+isBlockTag' (TagClose "ref") = True -- needed so 'special' doesn't parse it
isBlockTag' tag@(TagClose t) = (isBlockTag tag || t `elem` newBlockTags) &&
t `notElem` eitherBlockOrInline
isBlockTag' tag = isBlockTag tag
isInlineTag' :: Tag Text -> Bool
isInlineTag' (TagComment _) = True
+isInlineTag' (TagClose "ref") = False -- see below inlineTag
isInlineTag' t = not (isBlockTag' t)
eitherBlockOrInline :: [Text]
@@ -554,11 +557,17 @@ variable = try $ do
contents <- manyTillChar anyChar (try $ string "}}}")
return $ "{{{" <> contents <> "}}}"
+singleParaToPlain :: Blocks -> Blocks
+singleParaToPlain bs =
+ case B.toList bs of
+ [Para ils] -> B.fromList [Plain ils]
+ _ -> bs
+
inlineTag :: PandocMonad m => MWParser m Inlines
inlineTag = do
(tag, _) <- lookAhead $ htmlTag isInlineTag'
case tag of
- TagOpen "ref" _ -> B.note . B.plain <$> inlinesInTags "ref"
+ TagOpen "ref" _ -> B.note . singleParaToPlain <$> blocksInTags "ref"
TagOpen "nowiki" _ -> try $ do
(_,raw) <- htmlTag (~== tag)
if T.any (== '/') raw
@@ -678,19 +687,17 @@ url = do
-- | Parses a list of inlines between start and end delimiters.
inlinesBetween :: (PandocMonad m, Show b) => MWParser m a -> MWParser m b -> MWParser m Inlines
inlinesBetween start end =
- trimInlines . mconcat <$> try (start >> many1Till inner end)
- where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
- innerSpace = try $ whitespace <* notFollowedBy' end
+ trimInlines . mconcat <$> try (start >> many1Till inline end)
emph :: PandocMonad m => MWParser m Inlines
emph = B.emph <$> nested (inlinesBetween start end)
- where start = sym "''" >> lookAhead nonspaceChar
+ where start = sym "''"
end = try $ notFollowedBy' (() <$ strong) >> sym "''"
strong :: PandocMonad m => MWParser m Inlines
strong = B.strong <$> nested (inlinesBetween start end)
- where start = sym "'''" >> lookAhead nonspaceChar
- end = try $ sym "'''"
+ where start = sym "'''"
+ end = sym "'''"
doubleQuotes :: PandocMonad m => MWParser m Inlines
doubleQuotes = do
diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs
index a64b130e5..cbc523b25 100644
--- a/src/Text/Pandoc/Readers/Metadata.hs
+++ b/src/Text/Pandoc/Readers/Metadata.hs
@@ -2,7 +2,7 @@
{-# LANGUAGE ScopedTypeVariables #-}
{- |
Module : Text.Pandoc.Readers.Metadata
- Copyright : Copyright (C) 2006-2020 John MacFarlane
+ Copyright : Copyright (C) 2006-2021 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -14,6 +14,7 @@ Parse YAML/JSON metadata to 'Pandoc' 'Meta'.
module Text.Pandoc.Readers.Metadata (
yamlBsToMeta,
yamlBsToRefs,
+ yamlMetaBlock,
yamlMap ) where
import Control.Monad
@@ -30,11 +31,13 @@ import Text.Pandoc.Definition
import Text.Pandoc.Error
import Text.Pandoc.Parsing hiding (tableWith)
import Text.Pandoc.Shared
+import qualified Data.Text.Lazy as TL
+import qualified Text.Pandoc.UTF8 as UTF8
-yamlBsToMeta :: PandocMonad m
- => ParserT Text ParserState m (F MetaValue)
+yamlBsToMeta :: (PandocMonad m, HasLastStrPosition st)
+ => ParserT Sources st m (Future st MetaValue)
-> BL.ByteString
- -> ParserT Text ParserState m (F Meta)
+ -> ParserT Sources st m (Future st Meta)
yamlBsToMeta pMetaValue bstr = do
case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
Right (YAML.Doc (YAML.Mapping _ _ o):_)
@@ -42,6 +45,9 @@ yamlBsToMeta pMetaValue bstr = do
Right [] -> return . return $ mempty
Right [YAML.Doc (YAML.Scalar _ YAML.SNull)]
-> return . return $ mempty
+ -- the following is what we get from a comment:
+ Right [YAML.Doc (YAML.Scalar _ (YAML.SUnknown _ ""))]
+ -> return . return $ mempty
Right _ -> Prelude.fail "expected YAML object"
Left (yamlpos, err')
-> do pos <- getPosition
@@ -63,11 +69,11 @@ lookupYAML t (YAML.Mapping _ _ m) =
lookupYAML _ _ = Nothing
-- Returns filtered list of references.
-yamlBsToRefs :: PandocMonad m
- => ParserT Text ParserState m (F MetaValue)
+yamlBsToRefs :: (PandocMonad m, HasLastStrPosition st)
+ => ParserT Sources st m (Future st MetaValue)
-> (Text -> Bool) -- ^ Filter for id
-> BL.ByteString
- -> ParserT Text ParserState m (F [MetaValue])
+ -> ParserT Sources st m (Future st [MetaValue])
yamlBsToRefs pMetaValue idpred bstr =
case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
Right (YAML.Doc o@YAML.Mapping{}:_)
@@ -95,8 +101,12 @@ yamlBsToRefs pMetaValue idpred bstr =
Right [YAML.Doc (YAML.Scalar _ YAML.SNull)]
-> return . return $ mempty
Right _ -> Prelude.fail "expecting YAML object"
- Left (_pos, err')
- -> Prelude.fail err'
+ Left (yamlpos, err')
+ -> do pos <- getPosition
+ setPosition $ incSourceLine
+ (setSourceColumn pos (YE.posColumn yamlpos))
+ (YE.posLine yamlpos - 1)
+ Prelude.fail err'
nodeToKey :: YAML.Node YE.Pos -> Maybe Text
@@ -104,10 +114,10 @@ nodeToKey (YAML.Scalar _ (YAML.SStr t)) = Just t
nodeToKey (YAML.Scalar _ (YAML.SUnknown _ t)) = Just t
nodeToKey _ = Nothing
-normalizeMetaValue :: PandocMonad m
- => ParserT Text ParserState m (F MetaValue)
+normalizeMetaValue :: (PandocMonad m, HasLastStrPosition st)
+ => ParserT Sources st m (Future st MetaValue)
-> Text
- -> ParserT Text ParserState m (F MetaValue)
+ -> ParserT Sources st m (Future st MetaValue)
normalizeMetaValue pMetaValue x =
-- Note: a standard quoted or unquoted YAML value will
-- not end in a newline, but a "block" set off with
@@ -129,10 +139,10 @@ checkBoolean t
| t == T.pack "false" || t == T.pack "False" || t == T.pack "FALSE" = Just False
| otherwise = Nothing
-yamlToMetaValue :: PandocMonad m
- => ParserT Text ParserState m (F MetaValue)
+yamlToMetaValue :: (PandocMonad m, HasLastStrPosition st)
+ => ParserT Sources st m (Future st MetaValue)
-> YAML.Node YE.Pos
- -> ParserT Text ParserState m (F MetaValue)
+ -> ParserT Sources st m (Future st MetaValue)
yamlToMetaValue pMetaValue (YAML.Scalar _ x) =
case x of
YAML.SStr t -> normalizeMetaValue pMetaValue t
@@ -152,10 +162,10 @@ yamlToMetaValue pMetaValue (YAML.Mapping _ _ o) =
fmap MetaMap <$> yamlMap pMetaValue o
yamlToMetaValue _ _ = return $ return $ MetaString ""
-yamlMap :: PandocMonad m
- => ParserT Text ParserState m (F MetaValue)
+yamlMap :: (PandocMonad m, HasLastStrPosition st)
+ => ParserT Sources st m (Future st MetaValue)
-> M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos)
- -> ParserT Text ParserState m (F (M.Map Text MetaValue))
+ -> ParserT Sources st m (Future st (M.Map Text MetaValue))
yamlMap pMetaValue o = do
kvs <- forM (M.toList o) $ \(key, v) -> do
k <- maybe (throwError $ PandocParseError
@@ -171,3 +181,20 @@ yamlMap pMetaValue o = do
return $ do
v' <- fv
return (k, v')
+
+-- | Parse a YAML metadata block using the supplied 'MetaValue' parser.
+yamlMetaBlock :: (HasLastStrPosition st, PandocMonad m)
+ => ParserT Sources st m (Future st MetaValue)
+ -> ParserT Sources st m (Future st Meta)
+yamlMetaBlock parser = try $ do
+ string "---"
+ blankline
+ notFollowedBy blankline -- if --- is followed by a blank it's an HRULE
+ rawYamlLines <- manyTill anyLine stopLine
+ -- by including --- and ..., we allow yaml blocks with just comments:
+ let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."]))
+ optional blanklines
+ yamlBsToMeta parser $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
+
+stopLine :: Monad m => ParserT Sources st m ()
+stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs
index b4eea9d3a..a0d4534f1 100644
--- a/src/Text/Pandoc/Readers/Muse.hs
+++ b/src/Text/Pandoc/Readers/Muse.hs
@@ -36,19 +36,20 @@ import Text.Pandoc.Definition
import Text.Pandoc.Error (PandocError (PandocParsecError))
import Text.Pandoc.Logging
import Text.Pandoc.Options
-import Text.Pandoc.Parsing hiding (F)
-import Text.Pandoc.Shared (crFilter, trimr, tshow)
+import Text.Pandoc.Parsing
+import Text.Pandoc.Shared (trimr, tshow)
-- | Read Muse from an input string and return a Pandoc document.
-readMuse :: PandocMonad m
+readMuse :: (PandocMonad m, ToSources a)
=> ReaderOptions
- -> Text
+ -> a
-> m Pandoc
readMuse opts s = do
- let input = crFilter s
- res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts } "source" input
+ let sources = toSources s
+ res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts }
+ (initialSourceName sources) sources
case res of
- Left e -> throwError $ PandocParsecError input e
+ Left e -> throwError $ PandocParsecError sources e
Right d -> return d
type F = Future MuseState
@@ -82,7 +83,7 @@ instance Default MuseEnv where
, museInPara = False
}
-type MuseParser m = ParserT Text MuseState (ReaderT MuseEnv m)
+type MuseParser m = ParserT Sources MuseState (ReaderT MuseEnv m)
instance HasReaderOptions MuseState where
extractReaderOptions = museOptions
@@ -155,7 +156,7 @@ firstColumn = getPosition >>= \pos -> guard (sourceColumn pos == 1)
-- * Parsers
-- | Parse end-of-line, which can be either a newline or end-of-file.
-eol :: Stream s m Char => ParserT s st m ()
+eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
eol = void newline <|> eof
getIndent :: PandocMonad m
diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs
index 42843381a..58f235e81 100644
--- a/src/Text/Pandoc/Readers/Native.hs
+++ b/src/Text/Pandoc/Readers/Native.hs
@@ -1,7 +1,7 @@
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Native
- Copyright : Copyright (C) 2011-2020 John MacFarlane
+ Copyright : Copyright (C) 2011-2021 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -21,6 +21,7 @@ import Control.Monad.Except (throwError)
import Data.Text (Text)
import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Error
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
-- | Read native formatted text and return a Pandoc document.
-- The input may be a full pandoc document, a block list, a block,
@@ -32,14 +33,15 @@ import Text.Pandoc.Error
--
-- > Pandoc nullMeta [Plain [Str "hi"]]
--
-readNative :: PandocMonad m
+readNative :: (PandocMonad m, ToSources a)
=> ReaderOptions
- -> Text -- ^ String to parse (assuming @'\n'@ line endings)
+ -> a
-> m Pandoc
readNative _ s =
- case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) of
- Right doc -> return doc
- Left _ -> throwError $ PandocParseError "couldn't read native"
+ let t = sourcesToText . toSources $ s
+ in case maybe (Pandoc nullMeta <$> readBlocks t) Right (safeRead t) of
+ Right doc -> return doc
+ Left _ -> throwError $ PandocParseError "couldn't read native"
readBlocks :: Text -> Either PandocError [Block]
readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s)
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index 903cdf4a1..668c9ca11 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -1,7 +1,7 @@
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.OPML
- Copyright : Copyright (C) 2013-2020 John MacFarlane
+ Copyright : Copyright (C) 2013-2021 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -13,20 +13,21 @@ Conversion of OPML to 'Pandoc' document.
module Text.Pandoc.Readers.OPML ( readOPML ) where
import Control.Monad.State.Strict
-import Data.Char (toUpper)
import Data.Default
-import Data.Generics
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import qualified Data.Text as T
-import Text.HTML.TagSoup.Entity (lookupEntity)
+import qualified Data.Text.Lazy as TL
import Text.Pandoc.Builder
import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Options
+import Text.Pandoc.Error (PandocError(..))
import Text.Pandoc.Readers.HTML (readHtml)
import Text.Pandoc.Readers.Markdown (readMarkdown)
-import Text.Pandoc.Shared (crFilter, blocksToInlines')
-import Text.XML.Light
+import Text.Pandoc.Shared (blocksToInlines')
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
+import Text.Pandoc.XML.Light
+import Control.Monad.Except (throwError)
type OPML m = StateT OPMLState m
@@ -46,42 +47,27 @@ instance Default OPMLState where
, opmlOptions = def
}
-readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readOPML :: (PandocMonad m, ToSources a)
+ => ReaderOptions
+ -> a
+ -> m Pandoc
readOPML opts inp = do
- (bs, st') <- runStateT
- (mapM parseBlock $ normalizeTree $
- parseXML (T.unpack (crFilter inp))) def{ opmlOptions = opts }
+ let sources = toSources inp
+ (bs, st') <-
+ runStateT (case parseXMLContents (TL.fromStrict . sourcesToText $ sources) of
+ Left msg -> throwError $ PandocXMLError "" msg
+ Right ns -> mapM parseBlock ns)
+ def{ opmlOptions = opts }
return $
setTitle (opmlDocTitle st') $
setAuthors (opmlDocAuthors st') $
setDate (opmlDocDate st') $
doc $ mconcat bs
--- normalize input, consolidating adjacent Text and CRef elements
-normalizeTree :: [Content] -> [Content]
-normalizeTree = everywhere (mkT go)
- where go :: [Content] -> [Content]
- go (Text (CData CDataRaw _ _):xs) = xs
- go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
- Text (CData CDataText (s1 ++ s2) z):xs
- go (Text (CData CDataText s1 z):CRef r:xs) =
- Text (CData CDataText (s1 ++ convertEntity r) z):xs
- go (CRef r:Text (CData CDataText s1 z):xs) =
- Text (CData CDataText (convertEntity r ++ s1) z):xs
- go (CRef r1:CRef r2:xs) =
- Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
- go xs = xs
-
-convertEntity :: String -> String
-convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
-
-- convenience function to get an attribute value, defaulting to ""
-attrValue :: String -> Element -> Text
+attrValue :: Text -> Element -> Text
attrValue attr elt =
- maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
-
-textContent :: Element -> Text
-textContent = T.pack . strContent
+ fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
-- exceptT :: PandocMonad m => Either PandocError a -> OPML m a
-- exceptT = either throwError return
@@ -105,11 +91,11 @@ parseBlock :: PandocMonad m => Content -> OPML m Blocks
parseBlock (Elem e) =
case qName (elName e) of
"ownerName" -> mempty <$ modify (\st ->
- st{opmlDocAuthors = [text $ textContent e]})
+ st{opmlDocAuthors = [text $ strContent e]})
"dateModified" -> mempty <$ modify (\st ->
- st{opmlDocDate = text $ textContent e})
+ st{opmlDocDate = text $ strContent e})
"title" -> mempty <$ modify (\st ->
- st{opmlDocTitle = text $ textContent e})
+ st{opmlDocTitle = text $ strContent e})
"outline" -> gets opmlSectionLevel >>= sect . (+1)
"?xml" -> return mempty
_ -> getBlocks e
diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs
index 7b8bfd4b5..c274b6fd4 100644
--- a/src/Text/Pandoc/Readers/Odt.hs
+++ b/src/Text/Pandoc/Readers/Odt.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Reader.Odt
@@ -15,7 +14,7 @@ Entry point to the odt reader.
module Text.Pandoc.Readers.Odt ( readOdt ) where
import Codec.Archive.Zip
-import qualified Text.XML.Light as XML
+import Text.Pandoc.XML.Light
import qualified Data.ByteString.Lazy as B
@@ -23,6 +22,8 @@ import System.FilePath
import Control.Monad.Except (throwError)
+import qualified Data.Text as T
+
import Text.Pandoc.Class.PandocMonad (PandocMonad)
import qualified Text.Pandoc.Class.PandocMonad as P
import Text.Pandoc.Definition
@@ -60,35 +61,37 @@ readOdt' _ bytes = bytesToOdt bytes-- of
bytesToOdt :: B.ByteString -> Either PandocError (Pandoc, MediaBag)
bytesToOdt bytes = case toArchiveOrFail bytes of
Right archive -> archiveToOdt archive
- Left _ -> Left $ PandocParseError "Couldn't parse odt file."
+ Left err -> Left $ PandocParseError
+ $ "Could not unzip ODT: " <> T.pack err
--
archiveToOdt :: Archive -> Either PandocError (Pandoc, MediaBag)
-archiveToOdt archive
- | Just contentEntry <- findEntryByPath "content.xml" archive
- , Just stylesEntry <- findEntryByPath "styles.xml" archive
- , Just contentElem <- entryToXmlElem contentEntry
- , Just stylesElem <- entryToXmlElem stylesEntry
- , Right styles <- chooseMax (readStylesAt stylesElem )
- (readStylesAt contentElem)
- , media <- filteredFilesFromArchive archive filePathIsOdtMedia
- , startState <- readerState styles media
- , Right pandocWithMedia <- runConverter' read_body
- startState
- contentElem
-
- = Right pandocWithMedia
-
- | otherwise
- -- Not very detailed, but I don't think more information would be helpful
- = Left $ PandocParseError "Couldn't parse odt file."
- where
- filePathIsOdtMedia :: FilePath -> Bool
+archiveToOdt archive = do
+ let onFailure msg Nothing = Left $ PandocParseError msg
+ onFailure _ (Just x) = Right x
+ contentEntry <- onFailure "Could not find content.xml"
+ (findEntryByPath "content.xml" archive)
+ stylesEntry <- onFailure "Could not find styles.xml"
+ (findEntryByPath "styles.xml" archive)
+ contentElem <- entryToXmlElem contentEntry
+ stylesElem <- entryToXmlElem stylesEntry
+ styles <- either
+ (\_ -> Left $ PandocParseError "Could not read styles")
+ Right
+ (chooseMax (readStylesAt stylesElem ) (readStylesAt contentElem))
+ let filePathIsOdtMedia :: FilePath -> Bool
filePathIsOdtMedia fp =
let (dir, name) = splitFileName fp
in (dir == "Pictures/") || (dir /= "./" && name == "content.xml")
+ let media = filteredFilesFromArchive archive filePathIsOdtMedia
+ let startState = readerState styles media
+ either (\_ -> Left $ PandocParseError "Could not convert opendocument") Right
+ (runConverter' read_body startState contentElem)
--
-entryToXmlElem :: Entry -> Maybe XML.Element
-entryToXmlElem = XML.parseXMLDoc . UTF8.toStringLazy . fromEntry
+entryToXmlElem :: Entry -> Either PandocError Element
+entryToXmlElem entry =
+ case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of
+ Right x -> Right x
+ Left msg -> Left $ PandocXMLError (T.pack $ eRelativePath entry) msg
diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
index 93c6b5e79..96515bf56 100644
--- a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
+++ b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
@@ -22,7 +22,7 @@ module Text.Pandoc.Readers.Odt.Arrows.State where
import Control.Arrow
import qualified Control.Category as Cat
import Control.Monad
-
+import Data.List (foldl')
import Text.Pandoc.Readers.Odt.Arrows.Utils
import Text.Pandoc.Readers.Odt.Generic.Fallible
@@ -122,7 +122,7 @@ iterateS a = ArrowState $ \(s,f) -> foldr a' (s,mzero) f
iterateSL :: (Foldable f, MonadPlus m)
=> ArrowState s x y
-> ArrowState s (f x) (m y)
-iterateSL a = ArrowState $ \(s,f) -> foldl a' (s,mzero) f
+iterateSL a = ArrowState $ \(s,f) -> foldl' a' (s,mzero) f
where a' (s',m) x = second (mplus m.return) $ runArrowState a (s',x)
diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
index 43c44e7e9..5520d039f 100644
--- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE Arrows #-}
+{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveFoldable #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE PatternGuards #-}
@@ -29,14 +30,14 @@ import Control.Monad ((<=<))
import qualified Data.ByteString.Lazy as B
import Data.Foldable (fold)
-import Data.List (find, stripPrefix)
+import Data.List (find)
import qualified Data.Map as M
import qualified Data.Text as T
import Data.Maybe
-import Data.Semigroup (First(..), Option(..))
+import Data.Monoid (Alt (..))
import Text.TeXMath (readMathML, writeTeX)
-import qualified Text.XML.Light as XML
+import qualified Text.Pandoc.XML.Light as XML
import Text.Pandoc.Builder hiding (underline)
import Text.Pandoc.MediaBag (MediaBag, insertMedia)
@@ -505,13 +506,11 @@ type InlineMatcher = ElementMatcher Inlines
type BlockMatcher = ElementMatcher Blocks
-
-newtype FirstMatch a = FirstMatch (Option (First a))
- deriving (Foldable, Monoid, Semigroup)
+newtype FirstMatch a = FirstMatch (Alt Maybe a)
+ deriving (Foldable, Monoid, Semigroup)
firstMatch :: a -> FirstMatch a
-firstMatch = FirstMatch . Option . Just . First
-
+firstMatch = FirstMatch . Alt . Just
--
matchingElement :: (Monoid e)
@@ -557,7 +556,7 @@ read_plain_text = fst ^&&& read_plain_text' >>% recover
>>?% mappend
--
extractText :: XML.Content -> Fallible T.Text
- extractText (XML.Text cData) = succeedWith (T.pack $ XML.cdData cData)
+ extractText (XML.Text cData) = succeedWith (XML.cdData cData)
extractText _ = failEmpty
read_text_seq :: InlineMatcher
@@ -577,7 +576,10 @@ read_spaces = matchingElement NsText "s" (
read_line_break :: InlineMatcher
read_line_break = matchingElement NsText "line-break"
$ returnV linebreak
-
+--
+read_tab :: InlineMatcher
+read_tab = matchingElement NsText "tab"
+ $ returnV space
--
read_span :: InlineMatcher
read_span = matchingElement NsText "span"
@@ -585,6 +587,7 @@ read_span = matchingElement NsText "span"
$ matchChildContent [ read_span
, read_spaces
, read_line_break
+ , read_tab
, read_link
, read_note
, read_citation
@@ -604,6 +607,7 @@ read_paragraph = matchingElement NsText "p"
$ matchChildContent [ read_span
, read_spaces
, read_line_break
+ , read_tab
, read_link
, read_note
, read_citation
@@ -630,6 +634,7 @@ read_header = matchingElement NsText "h"
children <- ( matchChildContent [ read_span
, read_spaces
, read_line_break
+ , read_tab
, read_link
, read_note
, read_citation
@@ -777,14 +782,14 @@ read_frame_img =
"" -> returnV mempty -< ()
src' -> do
let exts = extensionsFromList [Ext_auto_identifiers]
- resource <- lookupResource -< src'
+ resource <- lookupResource -< T.unpack src'
_ <- updateMediaWithResource -< resource
w <- findAttrText' NsSVG "width" -< ()
h <- findAttrText' NsSVG "height" -< ()
titleNodes <- matchChildContent' [ read_frame_title ] -< ()
alt <- matchChildContent [] read_plain_text -< ()
arr (firstMatch . uncurry4 imageWith) -<
- (image_attributes w h, T.pack src', inlineListToIdentifier exts (toList titleNodes), alt)
+ (image_attributes w h, src', inlineListToIdentifier exts (toList titleNodes), alt)
read_frame_title :: InlineMatcher
read_frame_title = matchingElement NsSVG "title" (matchChildContent [] read_plain_text)
@@ -804,7 +809,8 @@ read_frame_mathml =
case fold src of
"" -> returnV mempty -< ()
src' -> do
- let path = fromMaybe src' (stripPrefix "./" src') ++ "/content.xml"
+ let path = T.unpack $
+ fromMaybe src' (T.stripPrefix "./" src') <> "/content.xml"
(_, mathml) <- lookupResource -< path
case readMathML (UTF8.toText $ B.toStrict mathml) of
Left _ -> returnV mempty -< ()
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
index 77174c793..78a7fc0b2 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
@@ -14,9 +14,10 @@ typesafe Haskell namespace identifiers and unsafe "real world" namespaces.
module Text.Pandoc.Readers.Odt.Generic.Namespaces where
import qualified Data.Map as M
+import Data.Text (Text)
--
-type NameSpaceIRI = String
+type NameSpaceIRI = Text
--
type NameSpaceIRIs nsID = M.Map nsID NameSpaceIRI
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
index 6dc56a0d9..edefe3c70 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
@@ -20,7 +20,6 @@ module Text.Pandoc.Readers.Odt.Generic.Utils
, reverseComposition
, tryToRead
, Lookupable(..)
-, readLookupables
, readLookupable
, readPercent
, findBy
@@ -30,11 +29,11 @@ module Text.Pandoc.Readers.Odt.Generic.Utils
import Control.Category (Category, (<<<), (>>>))
import qualified Control.Category as Cat (id)
-import Control.Monad (msum)
-
+import Data.Char (isSpace)
import qualified Data.Foldable as F (Foldable, foldr)
import Data.Maybe
-
+import Data.Text (Text)
+import qualified Data.Text as T
-- | Equivalent to
-- > foldr (.) id
@@ -76,8 +75,8 @@ swing = flip.(.flip id)
-- (nobody wants that) while the latter returns "to much" for simple purposes.
-- This function instead applies 'reads' and returns the first match (if any)
-- in a 'Maybe'.
-tryToRead :: (Read r) => String -> Maybe r
-tryToRead = reads >>> listToMaybe >>> fmap fst
+tryToRead :: (Read r) => Text -> Maybe r
+tryToRead = (reads . T.unpack) >>> listToMaybe >>> fmap fst
-- | A version of 'reads' that requires a '%' sign after the number
readPercent :: ReadS Int
@@ -88,26 +87,12 @@ readPercent s = [ (i,s') | (i , r ) <- reads s
-- | Data that can be looked up.
-- This is mostly a utility to read data with kind *.
class Lookupable a where
- lookupTable :: [(String, a)]
-
--- | The idea is to use this function as if there was a declaration like
---
--- > instance (Lookupable a) => (Read a) where
--- > readsPrec _ = readLookupables
--- .
--- But including this code in this form would need UndecideableInstances.
--- That is a bad idea. Luckily 'readLookupable' (without the s at the end)
--- can be used directly in almost any case.
-readLookupables :: (Lookupable a) => String -> [(a,String)]
-readLookupables s = [ (a,rest) | (word,rest) <- lex s,
- a <- maybeToList (lookup word lookupTable)
- ]
+ lookupTable :: [(Text, a)]
-- | Very similar to a simple 'lookup' in the 'lookupTable', but with a lexer.
-readLookupable :: (Lookupable a) => String -> Maybe a
-readLookupable s = msum
- $ map ((`lookup` lookupTable).fst)
- $ lex s
+readLookupable :: (Lookupable a) => Text -> Maybe a
+readLookupable s =
+ lookup (T.takeWhile (not . isSpace) $ T.dropWhile isSpace s) lookupTable
uncurry3 :: (a->b->c -> z) -> (a,b,c ) -> z
uncurry4 :: (a->b->c->d -> z) -> (a,b,c,d ) -> z
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
index 00c636a0d..341903046 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE LambdaCase #-}
@@ -60,15 +61,15 @@ import Control.Arrow
import Data.Bool ( bool )
import Data.Either ( rights )
import qualified Data.Map as M
-import qualified Data.Text as T
+import Data.Text (Text)
import Data.Default
import Data.Maybe
+import Data.List (foldl')
-import qualified Text.XML.Light as XML
+import qualified Text.Pandoc.XML.Light as XML
import Text.Pandoc.Readers.Odt.Arrows.State
import Text.Pandoc.Readers.Odt.Arrows.Utils
-
import Text.Pandoc.Readers.Odt.Generic.Namespaces
import Text.Pandoc.Readers.Odt.Generic.Utils
import Text.Pandoc.Readers.Odt.Generic.Fallible
@@ -78,13 +79,13 @@ import Text.Pandoc.Readers.Odt.Generic.Fallible
--------------------------------------------------------------------------------
--
-type ElementName = String
-type AttributeName = String
-type AttributeValue = String
-type TextAttributeValue = T.Text
+type ElementName = Text
+type AttributeName = Text
+type AttributeValue = Text
+type TextAttributeValue = Text
--
-type NameSpacePrefix = String
+type NameSpacePrefix = Text
--
type NameSpacePrefixes nsID = M.Map nsID NameSpacePrefix
@@ -292,7 +293,7 @@ readNSattributes = fromState $ \state -> maybe (state, failEmpty )
=> XMLConverterState nsID extraState
-> Maybe (XMLConverterState nsID extraState)
extractNSAttrs startState
- = foldl (\state d -> state >>= addNS d)
+ = foldl' (\state d -> state >>= addNS d)
(Just startState)
nsAttribs
where nsAttribs = mapMaybe readNSattr (XML.elAttribs element)
@@ -461,7 +462,7 @@ lookupDefaultingAttr :: (NameSpaceID nsID, Lookupable a, Default a)
lookupDefaultingAttr nsID attrName
= lookupAttrWithDefault nsID attrName def
--- | Return value as a (Maybe String)
+-- | Return value as a (Maybe Text)
findAttr' :: (NameSpaceID nsID)
=> nsID -> AttributeName
-> XMLConverter nsID extraState x (Maybe AttributeValue)
@@ -477,7 +478,6 @@ findAttrText' nsID attrName
= qualifyName nsID attrName
&&& getCurrentElement
>>% XML.findAttr
- >>^ fmap T.pack
-- | Return value as string or fail
findAttr :: (NameSpaceID nsID)
@@ -492,7 +492,6 @@ findAttrText :: (NameSpaceID nsID)
-> FallibleXMLConverter nsID extraState x TextAttributeValue
findAttrText nsID attrName
= findAttr' nsID attrName
- >>^ fmap T.pack
>>> maybeToChoice
-- | Return value as string or return provided default value
@@ -511,7 +510,7 @@ findAttrTextWithDefault :: (NameSpaceID nsID)
-> XMLConverter nsID extraState x TextAttributeValue
findAttrTextWithDefault nsID attrName deflt
= findAttr' nsID attrName
- >>^ maybe deflt T.pack
+ >>^ fromMaybe deflt
-- | Read and return value or fail
readAttr :: (NameSpaceID nsID, Read attrValue)
@@ -748,7 +747,7 @@ matchContent lookups fallback
-- Internals
--------------------------------------------------------------------------------
-stringToBool' :: String -> Maybe Bool
+stringToBool' :: Text -> Maybe Bool
stringToBool' val | val `elem` trueValues = Just True
| val `elem` falseValues = Just False
| otherwise = Nothing
diff --git a/src/Text/Pandoc/Readers/Odt/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Namespaces.hs
index 3a24a1162..70741c28d 100644
--- a/src/Text/Pandoc/Readers/Odt/Namespaces.hs
+++ b/src/Text/Pandoc/Readers/Odt/Namespaces.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Reader.Odt.Namespaces
Copyright : Copyright (C) 2015 Martin Linnemann
@@ -13,10 +14,10 @@ Namespaces used in odt files.
module Text.Pandoc.Readers.Odt.Namespaces ( Namespace (..)
) where
-import Data.List (isPrefixOf)
import qualified Data.Map as M (empty, insert)
import Data.Maybe (fromMaybe, listToMaybe)
-
+import Data.Text (Text)
+import qualified Data.Text as T
import Text.Pandoc.Readers.Odt.Generic.Namespaces
@@ -30,7 +31,7 @@ instance NameSpaceID Namespace where
findID :: NameSpaceIRI -> Maybe Namespace
-findID iri = listToMaybe [nsID | (iri',nsID) <- nsIDs, iri' `isPrefixOf` iri]
+findID iri = listToMaybe [nsID | (iri',nsID) <- nsIDs, iri' `T.isPrefixOf` iri]
nsIDmap :: NameSpaceIRIs Namespace
nsIDmap = foldr (uncurry $ flip M.insert) M.empty nsIDs
@@ -54,12 +55,12 @@ data Namespace = -- Open Document core
-- Core XML (basically only for the 'id'-attribute)
| NsXML
-- Fallback
- | NsOther String
+ | NsOther Text
deriving ( Eq, Ord, Show )
-- | Not the actual iri's, but large prefixes of them - this way there are
-- less versioning problems and the like.
-nsIDs :: [(String,Namespace)]
+nsIDs :: [(Text, Namespace)]
nsIDs = [
("urn:oasis:names:tc:opendocument:xmlns:animation" , NsAnim ),
("urn:oasis:names:tc:opendocument:xmlns:chart" , NsChart ),
diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
index 46a777df1..ca791ad1e 100644
--- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
@@ -2,6 +2,7 @@
{-# LANGUAGE Arrows #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Odt.StyleReader
Copyright : Copyright (C) 2015 Martin Linnemann
@@ -43,14 +44,16 @@ import Control.Arrow
import Data.Default
import qualified Data.Foldable as F
-import Data.List (unfoldr)
+import Data.List (unfoldr, foldl')
import qualified Data.Map as M
import Data.Maybe
+import Data.Text (Text)
+import qualified Data.Text as T
import qualified Data.Set as S
-import qualified Text.XML.Light as XML
+import qualified Text.Pandoc.XML.Light as XML
-import Text.Pandoc.Shared (safeRead)
+import Text.Pandoc.Shared (safeRead, tshow)
import Text.Pandoc.Readers.Odt.Arrows.Utils
@@ -90,7 +93,7 @@ instance Default FontPitch where
--
-- Thus, we want
-type FontFaceName = String
+type FontFaceName = Text
type FontPitches = M.Map FontFaceName FontPitch
@@ -117,7 +120,7 @@ fontPitchReader = executeInSub NsOffice "font-face-decls" (
&&&
lookupDefaultingAttr NsStyle "font-pitch"
))
- >>?^ ( M.fromList . foldl accumLegalPitches [] )
+ >>?^ ( M.fromList . foldl' accumLegalPitches [] )
) `ifFailedDo` returnV (Right M.empty)
where accumLegalPitches ls (Nothing,_) = ls
accumLegalPitches ls (Just n,p) = (n,p):ls
@@ -151,7 +154,7 @@ findPitch = ( lookupAttr NsStyle "font-pitch"
-- Definitions of main data
--------------------------------------------------------------------------------
-type StyleName = String
+type StyleName = Text
-- | There are two types of styles: named styles with a style family and an
-- optional style parent, and default styles for each style family,
@@ -355,8 +358,8 @@ getListLevelStyle level ListStyle{..} =
-- \^ simpler, but in general less efficient
data ListLevelStyle = ListLevelStyle { listLevelType :: ListLevelType
- , listItemPrefix :: Maybe String
- , listItemSuffix :: Maybe String
+ , listItemPrefix :: Maybe Text
+ , listItemSuffix :: Maybe Text
, listItemFormat :: ListItemNumberFormat
, listItemStart :: Int
}
@@ -366,9 +369,9 @@ instance Show ListLevelStyle where
show ListLevelStyle{..} = "<LLS|"
++ show listLevelType
++ "|"
- ++ maybeToString listItemPrefix
+ ++ maybeToString (T.unpack <$> listItemPrefix)
++ show listItemFormat
- ++ maybeToString listItemSuffix
+ ++ maybeToString (T.unpack <$> listItemSuffix)
++ ">"
where maybeToString = fromMaybe ""
@@ -471,7 +474,7 @@ readTextProperties =
)
where isFontEmphasised = [("normal",False),("italic",True),("oblique",True)]
isFontBold = ("normal",False):("bold",True)
- :map ((,True).show) ([100,200..900]::[Int])
+ :map ((,True) . tshow) ([100,200..900]::[Int])
readUnderlineMode :: StyleReaderSafe _x (Maybe UnderlineMode)
readUnderlineMode = readLineMode "text-underline-mode"
@@ -481,7 +484,7 @@ readStrikeThroughMode :: StyleReaderSafe _x (Maybe UnderlineMode)
readStrikeThroughMode = readLineMode "text-line-through-mode"
"text-line-through-style"
-readLineMode :: String -> String -> StyleReaderSafe _x (Maybe UnderlineMode)
+readLineMode :: Text -> Text -> StyleReaderSafe _x (Maybe UnderlineMode)
readLineMode modeAttr styleAttr = proc x -> do
isUL <- searchAttr NsStyle styleAttr False isLinePresent -< x
mode <- lookupAttr' NsStyle modeAttr -< x
@@ -560,12 +563,13 @@ readListLevelStyle levelType = readAttr NsText "level"
--
chooseMostSpecificListLevelStyle :: S.Set ListLevelStyle -> Maybe ListLevelStyle
-chooseMostSpecificListLevelStyle ls | ls == mempty = Nothing
- | otherwise = Just ( F.foldr1 select ls )
+chooseMostSpecificListLevelStyle ls = F.foldr select Nothing ls
where
- select ( ListLevelStyle t1 p1 s1 f1 b1 )
- ( ListLevelStyle t2 p2 s2 f2 _ )
- = ListLevelStyle (select' t1 t2) (p1 <|> p2) (s1 <|> s2) (selectLinf f1 f2) b1
+ select l Nothing = Just l
+ select ( ListLevelStyle t1 p1 s1 f1 b1 )
+ ( Just ( ListLevelStyle t2 p2 s2 f2 _ ))
+ = Just $ ListLevelStyle (select' t1 t2) (p1 <|> p2) (s1 <|> s2)
+ (selectLinf f1 f2) b1
select' LltNumbered _ = LltNumbered
select' _ LltNumbered = LltNumbered
select' _ _ = LltBullet
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 851aec103..8823befdd 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -1,7 +1,7 @@
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org
- Copyright : Copyright (C) 2014-2020 Albert Krewinkel
+ Copyright : Copyright (C) 2014-2021 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -18,22 +18,19 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Parsing (reportLogMessages)
-import Text.Pandoc.Shared (crFilter)
-
+import Text.Pandoc.Sources (ToSources(..), ensureFinalNewlines)
import Control.Monad.Except (throwError)
import Control.Monad.Reader (runReaderT)
-import Data.Text (Text)
-
-- | Parse org-mode string and return a Pandoc document.
-readOrg :: PandocMonad m
+readOrg :: (PandocMonad m, ToSources a)
=> ReaderOptions -- ^ Reader options
- -> Text -- ^ String to parse (assuming @'\n'@ line endings)
+ -> a
-> m Pandoc
readOrg opts s = do
parsed <- flip runReaderT def $
readWithM parseOrg (optionsToParserState opts)
- (crFilter s <> "\n\n")
+ (ensureFinalNewlines 2 (toSources s))
case parsed of
Right result -> return result
Left e -> throwError e
diff --git a/src/Text/Pandoc/Readers/Org/BlockStarts.hs b/src/Text/Pandoc/Readers/Org/BlockStarts.hs
index 8f7cac6ea..14233569c 100644
--- a/src/Text/Pandoc/Readers/Org/BlockStarts.hs
+++ b/src/Text/Pandoc/Readers/Org/BlockStarts.hs
@@ -1,7 +1,7 @@
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.BlockStarts
- Copyright : Copyright (C) 2014-2020 Albert Krewinkel
+ Copyright : Copyright (C) 2014-2021 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index f2e8b1ab6..f18d2f9a7 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -1,9 +1,10 @@
{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE RecordWildCards #-}
{- |
Module : Text.Pandoc.Readers.Org.Blocks
- Copyright : Copyright (C) 2014-2020 Albert Krewinkel
+ Copyright : Copyright (C) 2014-2021 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -38,10 +39,12 @@ import Data.Functor (($>))
import Data.List (foldl', intersperse)
import Data.Maybe (fromMaybe, isJust, isNothing)
import Data.Text (Text)
-
+import Data.List.NonEmpty (nonEmpty)
+import System.FilePath
import qualified Data.Text as T
import qualified Text.Pandoc.Builder as B
import qualified Text.Pandoc.Walk as Walk
+import Text.Pandoc.Sources (ToSources(..))
--
-- parsing blocks
@@ -294,24 +297,22 @@ verseBlock blockType = try $ do
codeBlock :: PandocMonad m => BlockAttributes -> Text -> OrgParser m (F Blocks)
codeBlock blockAttrs blockType = do
skipSpaces
- (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders)
- content <- rawBlockContent blockType
- resultsContent <- option mempty babelResultsBlock
- let id' = fromMaybe mempty $ blockAttrName blockAttrs
- let codeBlck = B.codeBlockWith ( id', classes, kv ) content
- let labelledBlck = maybe (pure codeBlck)
- (labelDiv codeBlck)
- (blockAttrCaption blockAttrs)
+ (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders)
+ content <- rawBlockContent blockType
+ resultsContent <- option mempty babelResultsBlock
+ let identifier = fromMaybe mempty $ blockAttrName blockAttrs
+ let codeBlk = B.codeBlockWith (identifier, classes, kv) content
+ let wrap = maybe pure addCaption (blockAttrCaption blockAttrs)
return $
- (if exportsCode kv then labelledBlck else mempty) <>
+ (if exportsCode kv then wrap codeBlk else mempty) <>
(if exportsResults kv then resultsContent else mempty)
where
- labelDiv :: Blocks -> F Inlines -> F Blocks
- labelDiv blk value =
- B.divWith nullAttr <$> (mappend <$> labelledBlock value <*> pure blk)
+ addCaption :: F Inlines -> Blocks -> F Blocks
+ addCaption caption blk = B.divWith ("", ["captioned-content"], [])
+ <$> (mkCaptionBlock caption <> pure blk)
- labelledBlock :: F Inlines -> F Blocks
- labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
+ mkCaptionBlock :: F Inlines -> F Blocks
+ mkCaptionBlock = fmap (B.divWith ("", ["caption"], []) . B.plain)
exportsResults :: [(Text, Text)] -> Bool
exportsResults = maybe False (`elem` ["results", "both"]) . lookup "exports"
@@ -527,7 +528,9 @@ include = try $ do
_ -> nullAttr
return $ pure . B.codeBlockWith attr <$> parseRaw
_ -> return $ return . B.fromList . blockFilter params <$> blockList
- insertIncludedFileF blocksParser ["."] filename
+ currentDir <- takeDirectory . sourceName <$> getPosition
+ insertIncludedFile blocksParser toSources
+ [currentDir] filename Nothing Nothing
where
includeTarget :: PandocMonad m => OrgParser m FilePath
includeTarget = do
@@ -543,8 +546,7 @@ include = try $ do
in case (minlvl >>= safeRead :: Maybe Int) of
Nothing -> blks
Just lvl -> let levels = Walk.query headerLevel blks
- -- CAVE: partial function in else
- curMin = if null levels then 0 else minimum levels
+ curMin = maybe 0 minimum $ nonEmpty levels
in Walk.walk (shiftHeader (curMin - lvl)) blks
headerLevel :: Block -> [Int]
@@ -852,16 +854,52 @@ definitionListItem parseIndentedMarker = try $ do
definitionMarker =
spaceChar *> string "::" <* (spaceChar <|> lookAhead newline)
+-- | Checkbox for tasks.
+data Checkbox
+ = UncheckedBox
+ | CheckedBox
+ | SemicheckedBox
+
+-- | Parses a checkbox in a plain list.
+checkbox :: PandocMonad m
+ => OrgParser m Checkbox
+checkbox = do
+ guardEnabled Ext_task_lists
+ try (char '[' *> status <* char ']') <?> "checkbox"
+ where
+ status = choice
+ [ UncheckedBox <$ char ' '
+ , CheckedBox <$ char 'X'
+ , SemicheckedBox <$ char '-'
+ ]
+
+checkboxToInlines :: Checkbox -> Inline
+checkboxToInlines = B.Str . \case
+ UncheckedBox -> "☐"
+ SemicheckedBox -> "☐"
+ CheckedBox -> "☒"
+
-- | parse raw text for one list item
listItem :: PandocMonad m
=> OrgParser m Int
-> OrgParser m (F Blocks)
listItem parseIndentedMarker = try . withContext ListItemState $ do
markerLength <- try parseIndentedMarker
+ box <- optionMaybe checkbox
firstLine <- anyLineNewline
blank <- option "" ("\n" <$ blankline)
rest <- T.concat <$> many (listContinuation markerLength)
- parseFromString blocks $ firstLine <> blank <> rest
+ contents <- parseFromString blocks $ firstLine <> blank <> rest
+ return (maybe id (prependInlines . checkboxToInlines) box <$> contents)
+
+-- | Prepend inlines to blocks, adding them to the first paragraph or
+-- creating a new Plain element if necessary.
+prependInlines :: Inline -> Blocks -> Blocks
+prependInlines inlns = B.fromList . prepend . B.toList
+ where
+ prepend (Plain is : bs) = Plain (inlns : Space : is) : bs
+ prepend (Para is : bs) = Para (inlns : Space : is) : bs
+ prepend bs = Plain [inlns, Space] : bs
-- continuation of a list item - indented and separated by blankline or endline.
-- Note: nested lists are parsed as continuations.
diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
index 3b363270c..2dcbecb1d 100644
--- a/src/Text/Pandoc/Readers/Org/DocumentTree.hs
+++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
@@ -3,7 +3,7 @@
{-# LANGUAGE TupleSections #-}
{- |
Module : Text.Pandoc.Readers.Org.DocumentTree
- Copyright : Copyright (C) 2014-2020 Albert Krewinkel
+ Copyright : Copyright (C) 2014-2021 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
index 9399ebd54..401e1bd8f 100644
--- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs
+++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
@@ -1,7 +1,7 @@
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.ExportSettings
- Copyright : © 2016–2020 Albert Krewinkel
+ Copyright : © 2016-2021 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index b234bee58..6862dd71e 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -2,7 +2,7 @@
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.Inlines
- Copyright : Copyright (C) 2014-2020 Albert Krewinkel
+ Copyright : Copyright (C) 2014-2021 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -29,6 +29,7 @@ import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
import Text.TeXMath (DisplayType (..), readTeX, writePandoc)
+import Text.Pandoc.Sources (ToSources(..))
import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
import Control.Monad (guard, mplus, mzero, unless, void, when)
@@ -262,7 +263,7 @@ berkeleyCitationList = try $ do
where
citationListPart :: PandocMonad m => OrgParser m (F Inlines)
citationListPart = fmap (trimInlinesF . mconcat) . try . many1 $ do
- notFollowedBy' citeKey
+ notFollowedBy' $ citeKey False
notFollowedBy (oneOf ";]")
inline
@@ -277,7 +278,7 @@ berkeleyBareTag' = try $ void (string "cite")
berkeleyTextualCite :: PandocMonad m => OrgParser m (F [Citation])
berkeleyTextualCite = try $ do
- (suppressAuthor, key) <- citeKey
+ (suppressAuthor, key) <- citeKey False
returnF . return $ Citation
{ citationId = key
, citationPrefix = mempty
@@ -322,7 +323,7 @@ linkLikeOrgRefCite = try $ do
-- from the `org-ref-cite-re` variable in `org-ref.el`.
orgRefCiteKey :: PandocMonad m => OrgParser m Text
orgRefCiteKey =
- let citeKeySpecialChars = "-_:\\./," :: String
+ let citeKeySpecialChars = "-_:\\./" :: String
isCiteKeySpecialChar c = c `elem` citeKeySpecialChars
isCiteKeyChar c = isAlphaNum c || isCiteKeySpecialChar c
endOfCitation = try $ do
@@ -350,7 +351,7 @@ citeList = sequence <$> sepEndBy1 citation (try $ char ';' *> skipSpaces)
citation :: PandocMonad m => OrgParser m (F Citation)
citation = try $ do
pref <- prefix
- (suppress_author, key) <- citeKey
+ (suppress_author, key) <- citeKey False
suff <- suffix
return $ do
x <- pref
@@ -367,7 +368,7 @@ citation = try $ do
}
where
prefix = trimInlinesF . mconcat <$>
- manyTill inline (char ']' <|> (']' <$ lookAhead citeKey))
+ manyTill inline (char ']' <|> (']' <$ lookAhead (citeKey False)))
suffix = try $ do
hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
skipSpaces
@@ -477,17 +478,17 @@ linkToInlinesF linkStr =
internalLink :: Text -> Inlines -> F Inlines
internalLink link title = do
- anchorB <- (link `elem`) <$> asksF orgStateAnchorIds
- if anchorB
+ ids <- asksF orgStateAnchorIds
+ if link `elem` ids
then return $ B.link ("#" <> link) "" title
- else return $ B.emph title
+ else let attr' = ("", ["spurious-link"] , [("target", link)])
+ in return $ B.spanWith attr' (B.emph title)
-- | Parse an anchor like @<<anchor-id>>@ and return an empty span with
-- @anchor-id@ set as id. Legal anchors in org-mode are defined through
-- @org-target-regexp@, which is fairly liberal. Since no link is created if
-- @anchor-id@ contains spaces, we are more restrictive in what is accepted as
-- an anchor.
-
anchor :: PandocMonad m => OrgParser m (F Inlines)
anchor = try $ do
anchorId <- parseAnchor
@@ -501,7 +502,6 @@ anchor = try $ do
-- | Replace every char but [a-zA-Z0-9_.-:] with a hyphen '-'. This mirrors
-- the org function @org-export-solidify-link-text@.
-
solidify :: Text -> Text
solidify = T.map replaceSpecialChar
where replaceSpecialChar c
@@ -573,7 +573,7 @@ underline :: PandocMonad m => OrgParser m (F Inlines)
underline = fmap B.underline <$> emphasisBetween '_'
verbatim :: PandocMonad m => OrgParser m (F Inlines)
-verbatim = return . B.code <$> verbatimBetween '='
+verbatim = return . B.codeWith ("", ["verbatim"], []) <$> verbatimBetween '='
code :: PandocMonad m => OrgParser m (F Inlines)
code = return . B.code <$> verbatimBetween '~'
@@ -803,7 +803,7 @@ inlineLaTeX = try $ do
parseAsInlineLaTeX :: PandocMonad m
=> Text -> TeXExport -> OrgParser m (Maybe Inlines)
parseAsInlineLaTeX cs = \case
- TeXExport -> maybeRight <$> runParserT inlineCommand state "" cs
+ TeXExport -> maybeRight <$> runParserT inlineCommand state "" (toSources cs)
TeXIgnore -> return (Just mempty)
TeXVerbatim -> return (Just $ B.str cs)
diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs
index 4864d9478..a1b21046a 100644
--- a/src/Text/Pandoc/Readers/Org/Meta.hs
+++ b/src/Text/Pandoc/Readers/Org/Meta.hs
@@ -3,7 +3,7 @@
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.Meta
- Copyright : Copyright (C) 2014-2020 Albert Krewinkel
+ Copyright : Copyright (C) 2014-2021 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -239,7 +239,7 @@ lineOfInlines = do
todoSequence :: Monad m => OrgParser m TodoSequence
todoSequence = try $ do
todoKws <- todoKeywords
- doneKws <- optionMaybe $ todoDoneSep *> todoKeywords
+ doneKws <- optionMaybe $ todoDoneSep *> doneKeywords
newline
-- There must be at least one DONE keyword. The last TODO keyword is
-- taken if necessary.
@@ -250,11 +250,17 @@ todoSequence = try $ do
(x:xs) -> return $ keywordsToSequence (reverse xs) [x]
where
+ todoKeyword :: Monad m => OrgParser m Text
+ todoKeyword = many1Char nonspaceChar <* skipSpaces
+
todoKeywords :: Monad m => OrgParser m [Text]
todoKeywords = try $
- let keyword = many1Char nonspaceChar <* skipSpaces
- endOfKeywords = todoDoneSep <|> void newline
- in manyTill keyword (lookAhead endOfKeywords)
+ let endOfKeywords = todoDoneSep <|> void newline
+ in manyTill todoKeyword (lookAhead endOfKeywords)
+
+ doneKeywords :: Monad m => OrgParser m [Text]
+ doneKeywords = try $
+ manyTill (todoKeyword <* optional todoDoneSep) (lookAhead newline)
todoDoneSep :: Monad m => OrgParser m ()
todoDoneSep = void . try $ skipSpaces *> char '|' <* skipSpaces1
diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs
index 1e4799e7b..abe8a9ebf 100644
--- a/src/Text/Pandoc/Readers/Org/ParserState.hs
+++ b/src/Text/Pandoc/Readers/Org/ParserState.hs
@@ -3,7 +3,7 @@
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.ParserState
- Copyright : Copyright (C) 2014-2020 Albert Krewinkel
+ Copyright : Copyright (C) 2014-2021 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
index bce71c24d..f0949e205 100644
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -1,6 +1,6 @@
{- |
Module : Text.Pandoc.Readers.Org.Parsing
- Copyright : Copyright (C) 2014-2020 Albert Krewinkel
+ Copyright : Copyright (C) 2014-2021 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -63,8 +63,7 @@ module Text.Pandoc.Readers.Org.Parsing
, ellipses
, citeKey
, gridTableWith
- , insertIncludedFileF
- -- * Re-exports from Text.Pandoc.Parsec
+ , insertIncludedFile
, runParser
, runParserT
, getInput
@@ -100,21 +99,22 @@ module Text.Pandoc.Readers.Org.Parsing
, getState
, updateState
, SourcePos
+ , sourceName
, getPosition
) where
import Data.Text (Text)
import Text.Pandoc.Readers.Org.ParserState
-import Text.Pandoc.Parsing hiding (F, anyLine, blanklines, newline,
- parseFromString)
+import Text.Pandoc.Parsing hiding (anyLine, blanklines, newline,
+ parseFromString)
import qualified Text.Pandoc.Parsing as P
import Control.Monad (guard)
import Control.Monad.Reader (ReaderT)
-- | The parser used to read org files.
-type OrgParser m = ParserT Text OrgParserState (ReaderT OrgParserLocal m)
+type OrgParser m = ParserT Sources OrgParserState (ReaderT OrgParserLocal m)
--
-- Adaptions and specializations of parsing utilities
diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs
index 7f72077a4..ad7c65060 100644
--- a/src/Text/Pandoc/Readers/Org/Shared.hs
+++ b/src/Text/Pandoc/Readers/Org/Shared.hs
@@ -1,7 +1,7 @@
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.Shared
- Copyright : Copyright (C) 2014-2020 Albert Krewinkel
+ Copyright : Copyright (C) 2014-2021 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index eeb3d1389..3990f0cb5 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -4,7 +4,7 @@
{-# LANGUAGE ViewPatterns #-}
{- |
Module : Text.Pandoc.Readers.RST
- Copyright : Copyright (C) 2006-2020 John MacFarlane
+ Copyright : Copyright (C) 2006-2021 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -27,8 +27,7 @@ import Data.Text (Text)
import qualified Data.Text as T
import Text.Pandoc.Builder (Blocks, Inlines, fromList, setMeta, trimInlines)
import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Class.PandocMonad (PandocMonad, fetchItem,
- readFileFromDirs, getCurrentTime)
+import Text.Pandoc.Class.PandocMonad (PandocMonad, fetchItem, getTimestamp)
import Text.Pandoc.CSV (CSVOptions (..), defaultCSVOptions, parseCSV)
import Text.Pandoc.Definition
import Text.Pandoc.Error
@@ -38,25 +37,25 @@ import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Shared
import qualified Text.Pandoc.UTF8 as UTF8
-import Text.Printf (printf)
import Data.Time.Format
+import System.FilePath (takeDirectory)
-- TODO:
-- [ ] .. parsed-literal
-- | Parse reStructuredText string and return Pandoc document.
-readRST :: PandocMonad m
+readRST :: (PandocMonad m, ToSources a)
=> ReaderOptions -- ^ Reader options
- -> Text -- ^ Text to parse (assuming @'\n'@ line endings)
+ -> a
-> m Pandoc
readRST opts s = do
parsed <- readWithM parseRST def{ stateOptions = opts }
- (crFilter s <> "\n\n")
+ (ensureFinalNewlines 2 (toSources s))
case parsed of
Right result -> return result
Left e -> throwError e
-type RSTParser m = ParserT Text ParserState m
+type RSTParser m = ParserT Sources ParserState m
--
-- Constants and data structure definitions
@@ -151,11 +150,19 @@ parseRST = do
startPos <- getPosition
-- go through once just to get list of reference keys and notes
-- docMinusKeys is the raw document with blanks where the keys were...
- docMinusKeys <- T.concat <$>
- manyTill (referenceKey <|> anchorDef <|>
- noteBlock <|> citationBlock <|>
- (snd <$> withRaw comment) <|>
- headerBlock <|> lineClump) eof
+ let chunk = referenceKey
+ <|> anchorDef
+ <|> noteBlock
+ <|> citationBlock
+ <|> (snd <$> withRaw comment)
+ <|> headerBlock
+ <|> lineClump
+ docMinusKeys <- Sources <$>
+ manyTill (do pos <- getPosition
+ t <- chunk
+ return (pos, t)) eof
+ -- UGLY: we collapse source position information.
+ -- TODO: fix the parser to use the F monad instead of two passes
setInput docMinusKeys
setPosition startPos
st' <- getState
@@ -348,7 +355,7 @@ singleHeader' = try $ do
-- hrule block
--
-hrule :: Monad m => ParserT Text st m Blocks
+hrule :: Monad m => ParserT Sources st m Blocks
hrule = try $ do
chr <- oneOf underlineChars
count 3 (char chr)
@@ -363,7 +370,7 @@ hrule = try $ do
-- read a line indented by a given string
indentedLine :: (HasReaderOptions st, Monad m)
- => Int -> ParserT Text st m Text
+ => Int -> ParserT Sources st m Text
indentedLine indents = try $ do
lookAhead spaceChar
gobbleAtMostSpaces indents
@@ -372,7 +379,7 @@ indentedLine indents = try $ do
-- one or more indented lines, possibly separated by blank lines.
-- any amount of indentation will work.
indentedBlock :: (HasReaderOptions st, Monad m)
- => ParserT Text st m Text
+ => ParserT Sources st m Text
indentedBlock = try $ do
indents <- length <$> lookAhead (many1 spaceChar)
lns <- many1 $ try $ do b <- option "" blanklines
@@ -381,20 +388,20 @@ indentedBlock = try $ do
optional blanklines
return $ T.unlines lns
-quotedBlock :: Monad m => ParserT Text st m Text
+quotedBlock :: Monad m => ParserT Sources st m Text
quotedBlock = try $ do
quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
lns <- many1 $ lookAhead (char quote) >> anyLine
optional blanklines
return $ T.unlines lns
-codeBlockStart :: Monad m => ParserT Text st m Char
+codeBlockStart :: Monad m => ParserT Sources st m Char
codeBlockStart = string "::" >> blankline >> blankline
-codeBlock :: Monad m => ParserT Text ParserState m Blocks
+codeBlock :: Monad m => ParserT Sources ParserState m Blocks
codeBlock = try $ codeBlockStart >> codeBlockBody
-codeBlockBody :: Monad m => ParserT Text ParserState m Blocks
+codeBlockBody :: Monad m => ParserT Sources ParserState m Blocks
codeBlockBody = do
lang <- stateRstHighlight <$> getState
try $ B.codeBlockWith ("", maybeToList lang, []) . stripTrailingNewlines <$>
@@ -410,14 +417,14 @@ lhsCodeBlock = try $ do
return $ B.codeBlockWith ("", ["haskell","literate"], [])
$ T.intercalate "\n" lns
-latexCodeBlock :: Monad m => ParserT Text st m [Text]
+latexCodeBlock :: Monad m => ParserT Sources st m [Text]
latexCodeBlock = try $ do
try (latexBlockLine "\\begin{code}")
many1Till anyLine (try $ latexBlockLine "\\end{code}")
where
latexBlockLine s = skipMany spaceChar >> string s >> blankline
-birdCodeBlock :: Monad m => ParserT Text st m [Text]
+birdCodeBlock :: Monad m => ParserT Sources st m [Text]
birdCodeBlock = filterSpace <$> many1 birdTrackLine
where filterSpace lns =
-- if (as is normal) there is always a space after >, drop it
@@ -425,7 +432,7 @@ birdCodeBlock = filterSpace <$> many1 birdTrackLine
then map (T.drop 1) lns
else lns
-birdTrackLine :: Monad m => ParserT Text st m Text
+birdTrackLine :: Monad m => ParserT Sources st m Text
birdTrackLine = char '>' >> anyLine
--
@@ -446,64 +453,43 @@ encoding
-}
includeDirective :: PandocMonad m
- => Text -> [(Text, Text)] -> Text
+ => Text
+ -> [(Text, Text)]
+ -> Text
-> RSTParser m Blocks
includeDirective top fields body = do
- let f = trim top
- guard $ not (T.null f)
+ let f = T.unpack $ trim top
+ guard $ not $ null f
guard $ T.null (trim body)
- -- options
- let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead
- let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead
- oldPos <- getPosition
- oldInput <- getInput
- containers <- stateContainers <$> getState
- when (f `elem` containers) $
- throwError $ PandocParseError $ "Include file loop at " <> tshow oldPos
- updateState $ \s -> s{ stateContainers = f : stateContainers s }
- mbContents <- readFileFromDirs ["."] $ T.unpack f
- contentLines <- case mbContents of
- Just s -> return $ T.lines s
- Nothing -> do
- logMessage $ CouldNotLoadIncludeFile f oldPos
- return []
- let numLines = length contentLines
- let startLine' = case startLine of
- Nothing -> 1
- Just x | x >= 0 -> x
- | otherwise -> numLines + x -- negative from end
- let endLine' = case endLine of
- Nothing -> numLines + 1
- Just x | x >= 0 -> x
- | otherwise -> numLines + x -- negative from end
- let contentLines' = drop (startLine' - 1)
- $ take (endLine' - 1) contentLines
- let contentLines'' = (case trim <$> lookup "end-before" fields of
- Just patt -> takeWhile (not . (patt `T.isInfixOf`))
- Nothing -> id) .
- (case trim <$> lookup "start-after" fields of
- Just patt -> drop 1 .
- dropWhile (not . (patt `T.isInfixOf`))
- Nothing -> id) $ contentLines'
- let contents' = T.unlines contentLines''
- case lookup "code" fields of
- Just lang -> do
- let classes = maybe [] T.words (lookup "class" fields)
- let ident = maybe "" trimr $ lookup "name" fields
- codeblock ident classes fields (trimr lang) contents' False
- Nothing -> case lookup "literal" fields of
- Just _ -> return $ B.rawBlock "rst" contents'
- Nothing -> do
- setPosition $ newPos (T.unpack f) 1 1
- setInput $ contents' <> "\n"
- bs <- optional blanklines >>
- (mconcat <$> many block)
- setInput oldInput
- setPosition oldPos
- updateState $ \s -> s{ stateContainers =
- tail $ stateContainers s }
- return bs
-
+ let startLine = lookup "start-line" fields >>= safeRead
+ let endLine = lookup "end-line" fields >>= safeRead
+ let classes = maybe [] T.words (lookup "class" fields)
+ let ident = maybe "" trimr $ lookup "name" fields
+ let parser =
+ case lookup "code" fields of
+ Just lang ->
+ (codeblock ident classes fields (trimr lang) False
+ . sourcesToText) <$> getInput
+ Nothing ->
+ case lookup "literal" fields of
+ Just _ -> B.rawBlock "rst" . sourcesToText <$> getInput
+ Nothing -> parseBlocks
+ let isLiteral = isJust (lookup "code" fields `mplus` lookup "literal" fields)
+ let selectLines =
+ (case trim <$> lookup "end-before" fields of
+ Just patt -> takeWhile (not . (patt `T.isInfixOf`))
+ Nothing -> id) .
+ (case trim <$> lookup "start-after" fields of
+ Just patt -> drop 1 .
+ dropWhile (not . (patt `T.isInfixOf`))
+ Nothing -> id)
+
+ let toStream t =
+ Sources [(initialPos f,
+ (T.unlines . selectLines . T.lines $ t) <>
+ if isLiteral then mempty else "\n")] -- see #7436
+ currentDir <- takeDirectory . sourceName <$> getPosition
+ insertIncludedFile parser toStream [currentDir] f startLine endLine
--
-- list blocks
@@ -526,7 +512,7 @@ definitionList :: PandocMonad m => RSTParser m Blocks
definitionList = B.definitionList <$> many1 definitionListItem
-- parses bullet list start and returns its length (inc. following whitespace)
-bulletListStart :: Monad m => ParserT Text st m Int
+bulletListStart :: Monad m => ParserT Sources st m Int
bulletListStart = try $ do
notFollowedBy' hrule -- because hrules start out just like lists
marker <- oneOf bulletListMarkers
@@ -613,8 +599,9 @@ comment = try $ do
string ".."
skipMany1 spaceChar <|> (() <$ lookAhead newline)
-- notFollowedBy' directiveLabel -- comment comes after directive so unnec.
- manyTill anyChar blanklines
+ _ <- anyLine
optional indentedBlock
+ optional blanklines
return mempty
directiveLabel :: Monad m => RSTParser m Text
@@ -685,7 +672,7 @@ directive' = do
"replace" -> B.para <$> -- consumed by substKey
parseInlineFromText (trim top)
"date" -> B.para <$> do -- consumed by substKey
- t <- getCurrentTime
+ t <- getTimestamp
let format = case T.unpack (T.strip top) of
[] -> "%Y-%m-%d"
x -> x
@@ -731,8 +718,8 @@ directive' = do
"" -> stateRstHighlight def
lang -> Just lang })
x | x == "code" || x == "code-block" || x == "sourcecode" ->
- codeblock name classes (map (second trimr) fields)
- (trim top) body True
+ return $ codeblock name classes (map (second trimr) fields)
+ (trim top) True body
"aafig" -> do
let attribs = (name, ["aafig"], map (second trimr) fields)
return $ B.codeBlockWith attribs $ stripTrailingNewlines body
@@ -877,10 +864,11 @@ csvTableDirective top fields rawcsv = do
(bs, _) <- fetchItem u
return $ UTF8.toText bs
Nothing -> return rawcsv
- let res = parseCSV opts (case explicitHeader of
- Just h -> h <> "\n" <> rawcsv'
- Nothing -> rawcsv')
- case res of
+ let header' = case explicitHeader of
+ Just h -> parseCSV defaultCSVOptions h
+ Nothing -> Right []
+ let res = parseCSV opts rawcsv'
+ case (<>) <$> header' <*> res of
Left e ->
throwError $ PandocParsecError "csv table" e
Right rawrows -> do
@@ -1017,10 +1005,10 @@ toChunks = dropWhile T.null
then "\\begin{aligned}\n" <> s <> "\n\\end{aligned}"
else s
-codeblock :: Text -> [Text] -> [(Text, Text)] -> Text -> Text -> Bool
- -> RSTParser m Blocks
-codeblock ident classes fields lang body rmTrailingNewlines =
- return $ B.codeBlockWith attribs $ stripTrailingNewlines' body
+codeblock :: Text -> [Text] -> [(Text, Text)] -> Text -> Bool -> Text
+ -> Blocks
+codeblock ident classes fields lang rmTrailingNewlines body =
+ B.codeBlockWith attribs $ stripTrailingNewlines' body
where stripTrailingNewlines' = if rmTrailingNewlines
then stripTrailingNewlines
else id
@@ -1101,7 +1089,7 @@ quotedReferenceName = try $ do
-- plus isolated (no two adjacent) internal hyphens, underscores,
-- periods, colons and plus signs; no whitespace or other characters
-- are allowed.
-simpleReferenceName :: Monad m => ParserT Text st m Text
+simpleReferenceName :: Monad m => ParserT Sources st m Text
simpleReferenceName = do
x <- alphaNum
xs <- many $ alphaNum
@@ -1120,7 +1108,7 @@ referenceKey = do
-- return enough blanks to replace key
return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n"
-targetURI :: Monad m => ParserT Text st m Text
+targetURI :: Monad m => ParserT Sources st m Text
targetURI = do
skipSpaces
optional $ try $ newline >> notFollowedBy blankline
@@ -1158,8 +1146,10 @@ anonymousKey :: Monad m => RSTParser m ()
anonymousKey = try $ do
oneOfStrings [".. __:", "__"]
src <- targetURI
- pos <- getPosition
- let key = toKey $ "_" <> T.pack (printf "%09d" (sourceLine pos))
+ -- we need to ensure that the keys are ordered by occurrence in
+ -- the document.
+ numKeys <- M.size . stateKeys <$> getState
+ let key = toKey $ "_" <> T.pack (show numKeys)
updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
stateKeys s }
@@ -1248,13 +1238,13 @@ headerBlock = do
-- Grid tables TODO:
-- - column spans
-dashedLine :: Monad m => Char -> ParserT Text st m (Int, Int)
+dashedLine :: Monad m => Char -> ParserT Sources st m (Int, Int)
dashedLine ch = do
dashes <- many1 (char ch)
sp <- many (char ' ')
return (length dashes, length $ dashes ++ sp)
-simpleDashedLines :: Monad m => Char -> ParserT Text st m [(Int,Int)]
+simpleDashedLines :: Monad m => Char -> ParserT Sources st m [(Int,Int)]
simpleDashedLines ch = try $ many1 (dashedLine ch)
-- Parse a table row separator
@@ -1380,7 +1370,7 @@ hyphens = do
-- don't want to treat endline after hyphen or dash as a space
return $ B.str result
-escapedChar :: Monad m => ParserT Text st m Inlines
+escapedChar :: Monad m => ParserT Sources st m Inlines
escapedChar = do c <- escaped anyChar
return $ if c == ' ' || c == '\n' || c == '\r'
-- '\ ' is null in RST
@@ -1656,21 +1646,4 @@ note = try $ do
return $ B.note contents
smart :: PandocMonad m => RSTParser m Inlines
-smart = do
- guardEnabled Ext_smart
- doubleQuoted <|> singleQuoted <|>
- choice [apostrophe, dash, ellipses]
-
-singleQuoted :: PandocMonad m => RSTParser m Inlines
-singleQuoted = try $ do
- singleQuoteStart
- withQuoteContext InSingleQuote $
- B.singleQuoted . trimInlines . mconcat <$>
- many1Till inline singleQuoteEnd
-
-doubleQuoted :: PandocMonad m => RSTParser m Inlines
-doubleQuoted = try $ do
- doubleQuoteStart
- withQuoteContext InDoubleQuote $
- B.doubleQuoted . trimInlines . mconcat <$>
- many1Till inline doubleQuoteEnd
+smart = smartPunctuation inline
diff --git a/src/Text/Pandoc/Readers/Roff.hs b/src/Text/Pandoc/Readers/Roff.hs
index 509ce1377..47f16ef4b 100644
--- a/src/Text/Pandoc/Readers/Roff.hs
+++ b/src/Text/Pandoc/Readers/Roff.hs
@@ -42,7 +42,6 @@ import Text.Pandoc.Logging (LogMessage(..))
import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Shared (safeRead)
-import Text.Parsec hiding (tokenPrim)
import Text.Pandoc.RoffChar (characterCodes, combiningAccents)
import qualified Data.Sequence as Seq
import qualified Data.Foldable as Foldable
@@ -122,16 +121,16 @@ instance Default RoffState where
, afterConditional = False
}
-type RoffLexer m = ParserT T.Text RoffState m
+type RoffLexer m = ParserT Sources RoffState m
--
-- Lexer: T.Text -> RoffToken
--
-eofline :: Stream s m Char => ParsecT s u m ()
+eofline :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s u m ()
eofline = void newline <|> eof <|> () <$ lookAhead (string "\\}")
-spacetab :: Stream s m Char => ParsecT s u m Char
+spacetab :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s u m Char
spacetab = char ' ' <|> char '\t'
characterCodeMap :: M.Map T.Text Char
@@ -303,8 +302,7 @@ expandString = try $ do
char '*'
cs <- escapeArg <|> countChar 1 anyChar
s <- linePartsToText <$> resolveText cs pos
- getInput >>= setInput . (s <>)
- return ()
+ addToInput s
-- Parses: '..'
quoteArg :: PandocMonad m => RoffLexer m T.Text
@@ -316,7 +314,7 @@ escFont = do
font' <- if T.null font || font == "P"
then prevFont <$> getState
else return $ foldr processFontLetter defaultFontSpec $ T.unpack font
- modifyState $ \st -> st{ prevFont = currentFont st
+ updateState $ \st -> st{ prevFont = currentFont st
, currentFont = font' }
return [Font font']
where
@@ -372,8 +370,8 @@ lexTable pos = do
spaces
opts <- try tableOptions <|> [] <$ optional (char ';')
case lookup "tab" opts of
- Just (T.uncons -> Just (c, _)) -> modifyState $ \st -> st{ tableTabChar = c }
- _ -> modifyState $ \st -> st{ tableTabChar = '\t' }
+ Just (T.uncons -> Just (c, _)) -> updateState $ \st -> st{ tableTabChar = c }
+ _ -> updateState $ \st -> st{ tableTabChar = '\t' }
spaces
skipMany lexComment
spaces
@@ -489,18 +487,18 @@ lexConditional mname = do
ifPart <- do
optional $ try $ char '\\' >> newline
lexGroup
- <|> do modifyState $ \s -> s{ afterConditional = True }
+ <|> do updateState $ \s -> s{ afterConditional = True }
t <- manToken
- modifyState $ \s -> s{ afterConditional = False }
+ updateState $ \s -> s{ afterConditional = False }
return t
case mbtest of
Nothing -> do
- putState st -- reset state, so we don't record macros in skipped section
+ setState st -- reset state, so we don't record macros in skipped section
report $ SkippedContent (T.cons '.' mname) pos
return mempty
Just True -> return ifPart
Just False -> do
- putState st
+ setState st
return mempty
expression :: PandocMonad m => RoffLexer m (Maybe Bool)
@@ -515,7 +513,7 @@ expression = do
_ -> Nothing
where
returnValue v = do
- modifyState $ \st -> st{ lastExpression = v }
+ updateState $ \st -> st{ lastExpression = v }
return v
lexGroup :: PandocMonad m => RoffLexer m RoffTokens
@@ -536,7 +534,7 @@ lexIncludeFile args = do
result <- readFileFromDirs dirs $ T.unpack fp
case result of
Nothing -> report $ CouldNotLoadIncludeFile fp pos
- Just s -> getInput >>= setInput . (s <>)
+ Just s -> addToInput s
return mempty
[] -> return mempty
@@ -564,13 +562,13 @@ lexStringDef args = do -- string definition
(x:ys) -> do
let ts = singleTok $ TextLine (intercalate [RoffStr " " ] ys)
let stringName = linePartsToText x
- modifyState $ \st ->
+ updateState $ \st ->
st{ customMacros = M.insert stringName ts (customMacros st) }
return mempty
lexMacroDef :: PandocMonad m => [Arg] -> RoffLexer m RoffTokens
lexMacroDef args = do -- macro definition
- modifyState $ \st -> st{ roffMode = CopyMode }
+ updateState $ \st -> st{ roffMode = CopyMode }
(macroName, stopMacro) <-
case args of
(x : y : _) -> return (linePartsToText x, linePartsToText y)
@@ -584,7 +582,7 @@ lexMacroDef args = do -- macro definition
_ <- lexArgs
return ()
ts <- mconcat <$> manyTill manToken stop
- modifyState $ \st ->
+ updateState $ \st ->
st{ customMacros = M.insert macroName ts (customMacros st)
, roffMode = NormalMode }
return mempty
diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs
index 484a6c923..276d28aaa 100644
--- a/src/Text/Pandoc/Readers/TWiki.hs
+++ b/src/Text/Pandoc/Readers/TWiki.hs
@@ -28,22 +28,22 @@ import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (enclosed, nested)
import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag)
-import Text.Pandoc.Shared (crFilter, tshow)
+import Text.Pandoc.Shared (tshow)
import Text.Pandoc.XML (fromEntities)
-- | Read twiki from an input string and return a Pandoc document.
-readTWiki :: PandocMonad m
+readTWiki :: (PandocMonad m, ToSources a)
=> ReaderOptions
- -> Text
+ -> a
-> m Pandoc
readTWiki opts s = do
- res <- readWithM parseTWiki def{ stateOptions = opts }
- (crFilter s <> "\n\n")
+ let sources = ensureFinalNewlines 2 (toSources s)
+ res <- readWithM parseTWiki def{ stateOptions = opts } sources
case res of
Left e -> throwError e
Right d -> return d
-type TWParser = ParserT Text ParserState
+type TWParser = ParserT Sources ParserState
--
-- utility functions
@@ -469,27 +469,7 @@ symbol :: PandocMonad m => TWParser m B.Inlines
symbol = B.str <$> countChar 1 nonspaceChar
smart :: PandocMonad m => TWParser m B.Inlines
-smart = do
- guardEnabled Ext_smart
- doubleQuoted <|> singleQuoted <|>
- choice [ apostrophe
- , dash
- , ellipses
- ]
-
-singleQuoted :: PandocMonad m => TWParser m B.Inlines
-singleQuoted = try $ do
- singleQuoteStart
- withQuoteContext InSingleQuote
- (B.singleQuoted . B.trimInlines . mconcat <$> many1Till inline singleQuoteEnd)
-
-doubleQuoted :: PandocMonad m => TWParser m B.Inlines
-doubleQuoted = try $ do
- doubleQuoteStart
- contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
- withQuoteContext InDoubleQuote (doubleQuoteEnd >>
- return (B.doubleQuoted $ B.trimInlines contents))
- <|> return (B.str "\8220" B.<> contents)
+smart = smartPunctuation inline
link :: PandocMonad m => TWParser m B.Inlines
link = try $ do
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 6691d8381..981878206 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -3,7 +3,7 @@
{- |
Module : Text.Pandoc.Readers.Textile
Copyright : Copyright (C) 2010-2012 Paul Rivier
- 2010-2020 John MacFarlane
+ 2010-2021 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : Paul Rivier <paul*rivier#demotera*com>
@@ -11,7 +11,7 @@
Portability : portable
Conversion from Textile to 'Pandoc' document, based on the spec
-available at http://redcloth.org/textile.
+available at https://www.promptworks.com/textile/.
Implemented and parsed:
- Paragraphs
@@ -38,7 +38,8 @@ module Text.Pandoc.Readers.Textile ( readTextile) where
import Control.Monad (guard, liftM)
import Control.Monad.Except (throwError)
import Data.Char (digitToInt, isUpper)
-import Data.List (intersperse, transpose)
+import Data.List (intersperse, transpose, foldl')
+import Data.List.NonEmpty (NonEmpty(..), nonEmpty)
import Data.Text (Text)
import qualified Data.Text as T
import Text.HTML.TagSoup (Tag (..), fromAttrib)
@@ -52,30 +53,34 @@ import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag)
import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
-import Text.Pandoc.Shared (crFilter, trim, tshow)
+import Text.Pandoc.Shared (trim, tshow)
-- | Parse a Textile text and return a Pandoc document.
-readTextile :: PandocMonad m
+readTextile :: (PandocMonad m, ToSources a)
=> ReaderOptions -- ^ Reader options
- -> Text -- ^ String to parse (assuming @'\n'@ line endings)
+ -> a
-> m Pandoc
readTextile opts s = do
- parsed <- readWithM parseTextile def{ stateOptions = opts }
- (crFilter s <> "\n\n")
+ let sources = ensureFinalNewlines 2 (toSources s)
+ parsed <- readWithM parseTextile def{ stateOptions = opts } sources
case parsed of
Right result -> return result
Left e -> throwError e
+type TextileParser = ParserT Sources ParserState
-- | Generate a Pandoc ADT from a textile document
-parseTextile :: PandocMonad m => ParserT Text ParserState m Pandoc
+parseTextile :: PandocMonad m => TextileParser m Pandoc
parseTextile = do
many blankline
startPos <- getPosition
-- go through once just to get list of reference keys and notes
-- docMinusKeys is the raw document with blanks where the keys/notes were...
- let firstPassParser = noteBlock <|> lineClump
- manyTill firstPassParser eof >>= setInput . T.concat
+ let firstPassParser = do
+ pos <- getPosition
+ t <- noteBlock <|> lineClump
+ return (pos, t)
+ manyTill firstPassParser eof >>= setInput . Sources
setPosition startPos
st' <- getState
let reversedNotes = stateNotes st'
@@ -83,10 +88,10 @@ parseTextile = do
-- now parse it for real...
Pandoc nullMeta . B.toList <$> parseBlocks -- FIXME
-noteMarker :: PandocMonad m => ParserT Text ParserState m Text
+noteMarker :: PandocMonad m => TextileParser m Text
noteMarker = skipMany spaceChar >> string "fn" >> T.pack <$> manyTill digit (char '.')
-noteBlock :: PandocMonad m => ParserT Text ParserState m Text
+noteBlock :: PandocMonad m => TextileParser m Text
noteBlock = try $ do
startPos <- getPosition
ref <- noteMarker
@@ -101,11 +106,11 @@ noteBlock = try $ do
return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n"
-- | Parse document blocks
-parseBlocks :: PandocMonad m => ParserT Text ParserState m Blocks
+parseBlocks :: PandocMonad m => TextileParser m Blocks
parseBlocks = mconcat <$> manyTill block eof
-- | Block parsers list tried in definition order
-blockParsers :: PandocMonad m => [ParserT Text ParserState m Blocks]
+blockParsers :: PandocMonad m => [TextileParser m Blocks]
blockParsers = [ codeBlock
, header
, blockQuote
@@ -120,22 +125,22 @@ blockParsers = [ codeBlock
]
-- | Any block in the order of definition of blockParsers
-block :: PandocMonad m => ParserT Text ParserState m Blocks
+block :: PandocMonad m => TextileParser m Blocks
block = do
res <- choice blockParsers <?> "block"
trace (T.take 60 $ tshow $ B.toList res)
return res
-commentBlock :: PandocMonad m => ParserT Text ParserState m Blocks
+commentBlock :: PandocMonad m => TextileParser m Blocks
commentBlock = try $ do
string "###."
manyTill anyLine blanklines
return mempty
-codeBlock :: PandocMonad m => ParserT Text ParserState m Blocks
+codeBlock :: PandocMonad m => TextileParser m Blocks
codeBlock = codeBlockTextile <|> codeBlockHtml
-codeBlockTextile :: PandocMonad m => ParserT Text ParserState m Blocks
+codeBlockTextile :: PandocMonad m => TextileParser m Blocks
codeBlockTextile = try $ do
string "bc." <|> string "pre."
extended <- option False (True <$ char '.')
@@ -155,7 +160,7 @@ trimTrailingNewlines :: Text -> Text
trimTrailingNewlines = T.dropWhileEnd (=='\n')
-- | Code Blocks in Textile are between <pre> and </pre>
-codeBlockHtml :: PandocMonad m => ParserT Text ParserState m Blocks
+codeBlockHtml :: PandocMonad m => TextileParser m Blocks
codeBlockHtml = try $ do
(t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True))
result' <- T.pack <$> manyTill anyChar (htmlTag (tagClose (=="pre")))
@@ -173,7 +178,7 @@ codeBlockHtml = try $ do
return $ B.codeBlockWith (ident,classes,kvs) result'''
-- | Header of the form "hN. content" with N in 1..6
-header :: PandocMonad m => ParserT Text ParserState m Blocks
+header :: PandocMonad m => TextileParser m Blocks
header = try $ do
char 'h'
level <- digitToInt <$> oneOf "123456"
@@ -185,14 +190,14 @@ header = try $ do
return $ B.headerWith attr' level name
-- | Blockquote of the form "bq. content"
-blockQuote :: PandocMonad m => ParserT Text ParserState m Blocks
+blockQuote :: PandocMonad m => TextileParser m Blocks
blockQuote = try $ do
string "bq" >> attributes >> char '.' >> whitespace
B.blockQuote <$> para
-- Horizontal rule
-hrule :: PandocMonad m => ParserT Text st m Blocks
+hrule :: PandocMonad m => TextileParser m Blocks
hrule = try $ do
skipSpaces
start <- oneOf "-*"
@@ -207,39 +212,39 @@ hrule = try $ do
-- | Can be a bullet list or an ordered list. This implementation is
-- strict in the nesting, sublist must start at exactly "parent depth
-- plus one"
-anyList :: PandocMonad m => ParserT Text ParserState m Blocks
+anyList :: PandocMonad m => TextileParser m Blocks
anyList = try $ anyListAtDepth 1 <* blanklines
-- | This allow one type of list to be nested into an other type,
-- provided correct nesting
-anyListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+anyListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
anyListAtDepth depth = choice [ bulletListAtDepth depth,
orderedListAtDepth depth,
definitionList ]
-- | Bullet List of given depth, depth being the number of leading '*'
-bulletListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+bulletListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
bulletListAtDepth depth = try $ B.bulletList <$> many1 (bulletListItemAtDepth depth)
-- | Bullet List Item of given depth, depth being the number of
-- leading '*'
-bulletListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+bulletListItemAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
bulletListItemAtDepth = genericListItemAtDepth '*'
-- | Ordered List of given depth, depth being the number of
-- leading '#'
-orderedListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+orderedListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
orderedListAtDepth depth = try $ do
items <- many1 (orderedListItemAtDepth depth)
return $ B.orderedList items
-- | Ordered List Item of given depth, depth being the number of
-- leading '#'
-orderedListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+orderedListItemAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
orderedListItemAtDepth = genericListItemAtDepth '#'
-- | Common implementation of list items
-genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT Text ParserState m Blocks
+genericListItemAtDepth :: PandocMonad m => Char -> Int -> TextileParser m Blocks
genericListItemAtDepth c depth = try $ do
count depth (char c) >> attributes >> whitespace
contents <- mconcat <$> many ((B.plain . mconcat <$> many1 inline) <|>
@@ -249,25 +254,25 @@ genericListItemAtDepth c depth = try $ do
return $ contents <> sublist
-- | A definition list is a set of consecutive definition items
-definitionList :: PandocMonad m => ParserT Text ParserState m Blocks
+definitionList :: PandocMonad m => TextileParser m Blocks
definitionList = try $ B.definitionList <$> many1 definitionListItem
-- | List start character.
-listStart :: PandocMonad m => ParserT Text ParserState m ()
+listStart :: PandocMonad m => TextileParser m ()
listStart = genericListStart '*'
<|> () <$ genericListStart '#'
<|> () <$ definitionListStart
-genericListStart :: PandocMonad m => Char -> ParserT Text st m ()
+genericListStart :: PandocMonad m => Char -> TextileParser m ()
genericListStart c = () <$ try (many1 (char c) >> whitespace)
-basicDLStart :: PandocMonad m => ParserT Text ParserState m ()
+basicDLStart :: PandocMonad m => TextileParser m ()
basicDLStart = do
char '-'
whitespace
notFollowedBy newline
-definitionListStart :: PandocMonad m => ParserT Text ParserState m Inlines
+definitionListStart :: PandocMonad m => TextileParser m Inlines
definitionListStart = try $ do
basicDLStart
trimInlines . mconcat <$>
@@ -280,15 +285,15 @@ definitionListStart = try $ do
-- the term defined, then spaces and ":=". The definition follows, on
-- the same single line, or spaned on multiple line, after a line
-- break.
-definitionListItem :: PandocMonad m => ParserT Text ParserState m (Inlines, [Blocks])
+definitionListItem :: PandocMonad m => TextileParser m (Inlines, [Blocks])
definitionListItem = try $ do
term <- mconcat . intersperse B.linebreak <$> many1 definitionListStart
def' <- string ":=" *> optional whitespace *> (multilineDef <|> inlineDef)
return (term, def')
- where inlineDef :: PandocMonad m => ParserT Text ParserState m [Blocks]
+ where inlineDef :: PandocMonad m => TextileParser m [Blocks]
inlineDef = liftM (\d -> [B.plain d])
$ optional whitespace >> (trimInlines . mconcat <$> many inline) <* newline
- multilineDef :: PandocMonad m => ParserT Text ParserState m [Blocks]
+ multilineDef :: PandocMonad m => TextileParser m [Blocks]
multilineDef = try $ do
optional whitespace >> newline
s <- T.pack <$> many1Till anyChar (try (string "=:" >> newline))
@@ -299,7 +304,7 @@ definitionListItem = try $ do
-- raw content
-- | A raw Html Block, optionally followed by blanklines
-rawHtmlBlock :: PandocMonad m => ParserT Text ParserState m Blocks
+rawHtmlBlock :: PandocMonad m => TextileParser m Blocks
rawHtmlBlock = try $ do
skipMany spaceChar
(_,b) <- htmlTag isBlockTag
@@ -307,14 +312,14 @@ rawHtmlBlock = try $ do
return $ B.rawBlock "html" b
-- | Raw block of LaTeX content
-rawLaTeXBlock' :: PandocMonad m => ParserT Text ParserState m Blocks
+rawLaTeXBlock' :: PandocMonad m => TextileParser m Blocks
rawLaTeXBlock' = do
guardEnabled Ext_raw_tex
B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces)
-- | In textile, paragraphs are separated by blank lines.
-para :: PandocMonad m => ParserT Text ParserState m Blocks
+para :: PandocMonad m => TextileParser m Blocks
para = B.para . trimInlines . mconcat <$> many1 inline
-- Tables
@@ -325,7 +330,7 @@ toAlignment '>' = AlignRight
toAlignment '=' = AlignCenter
toAlignment _ = AlignDefault
-cellAttributes :: PandocMonad m => ParserT Text ParserState m (Bool, Alignment)
+cellAttributes :: PandocMonad m => TextileParser m (Bool, Alignment)
cellAttributes = try $ do
isHeader <- option False (True <$ char '_')
-- we just ignore colspan and rowspan markers:
@@ -338,7 +343,7 @@ cellAttributes = try $ do
return (isHeader, alignment)
-- | A table cell spans until a pipe |
-tableCell :: PandocMonad m => ParserT Text ParserState m ((Bool, Alignment), Blocks)
+tableCell :: PandocMonad m => TextileParser m ((Bool, Alignment), Blocks)
tableCell = try $ do
char '|'
(isHeader, alignment) <- option (False, AlignDefault) cellAttributes
@@ -349,7 +354,7 @@ tableCell = try $ do
return ((isHeader, alignment), B.plain content)
-- | A table row is made of many table cells
-tableRow :: PandocMonad m => ParserT Text ParserState m [((Bool, Alignment), Blocks)]
+tableRow :: PandocMonad m => TextileParser m [((Bool, Alignment), Blocks)]
tableRow = try $ do
-- skip optional row attributes
optional $ try $ do
@@ -359,7 +364,7 @@ tableRow = try $ do
many1 tableCell <* char '|' <* blankline
-- | A table with an optional header.
-table :: PandocMonad m => ParserT Text ParserState m Blocks
+table :: PandocMonad m => TextileParser m Blocks
table = try $ do
-- ignore table attributes
caption <- option mempty $ try $ do
@@ -375,8 +380,9 @@ table = try $ do
(toprow:rest) | any (fst . fst) toprow ->
(toprow, rest)
_ -> (mempty, rawrows)
- let nbOfCols = maximum $ map length (headers:rows)
- let aligns = map minimum $ transpose $ map (map (snd . fst)) (headers:rows)
+ let nbOfCols = maximum $ fmap length (headers :| rows)
+ let aligns = map (maybe AlignDefault minimum . nonEmpty) $
+ transpose $ map (map (snd . fst)) (headers:rows)
let toRow = Row nullAttr . map B.simpleCell
toHeaderRow l = [toRow l | not (null l)]
return $ B.table (B.simpleCaption $ B.plain caption)
@@ -386,7 +392,7 @@ table = try $ do
(TableFoot nullAttr [])
-- | Ignore markers for cols, thead, tfoot.
-ignorableRow :: PandocMonad m => ParserT Text ParserState m ()
+ignorableRow :: PandocMonad m => TextileParser m ()
ignorableRow = try $ do
char '|'
oneOf ":^-~"
@@ -395,7 +401,7 @@ ignorableRow = try $ do
_ <- anyLine
return ()
-explicitBlockStart :: PandocMonad m => Text -> ParserT Text ParserState m ()
+explicitBlockStart :: PandocMonad m => Text -> TextileParser m ()
explicitBlockStart name = try $ do
string (T.unpack name)
attributes
@@ -407,8 +413,8 @@ explicitBlockStart name = try $ do
-- However, they can be used to set HTML/CSS attributes when needed.
maybeExplicitBlock :: PandocMonad m
=> Text -- ^ block tag name
- -> ParserT Text ParserState m Blocks -- ^ implicit block
- -> ParserT Text ParserState m Blocks
+ -> TextileParser m Blocks -- ^ implicit block
+ -> TextileParser m Blocks
maybeExplicitBlock name blk = try $ do
optional $ explicitBlockStart name
blk
@@ -421,11 +427,11 @@ maybeExplicitBlock name blk = try $ do
-- | Any inline element
-inline :: PandocMonad m => ParserT Text ParserState m Inlines
+inline :: PandocMonad m => TextileParser m Inlines
inline = choice inlineParsers <?> "inline"
-- | Inline parsers tried in order
-inlineParsers :: PandocMonad m => [ParserT Text ParserState m Inlines]
+inlineParsers :: PandocMonad m => [TextileParser m Inlines]
inlineParsers = [ str
, whitespace
, endline
@@ -445,7 +451,7 @@ inlineParsers = [ str
]
-- | Inline markups
-inlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines
+inlineMarkup :: PandocMonad m => TextileParser m Inlines
inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
, simpleInline (string "**") B.strong
, simpleInline (string "__") B.emph
@@ -459,29 +465,29 @@ inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
]
-- | Trademark, registered, copyright
-mark :: PandocMonad m => ParserT Text st m Inlines
+mark :: PandocMonad m => TextileParser m Inlines
mark = try $ char '(' >> (try tm <|> try reg <|> copy)
-reg :: PandocMonad m => ParserT Text st m Inlines
+reg :: PandocMonad m => TextileParser m Inlines
reg = do
oneOf "Rr"
char ')'
return $ B.str "\174"
-tm :: PandocMonad m => ParserT Text st m Inlines
+tm :: PandocMonad m => TextileParser m Inlines
tm = do
oneOf "Tt"
oneOf "Mm"
char ')'
return $ B.str "\8482"
-copy :: PandocMonad m => ParserT Text st m Inlines
+copy :: PandocMonad m => TextileParser m Inlines
copy = do
oneOf "Cc"
char ')'
return $ B.str "\169"
-note :: PandocMonad m => ParserT Text ParserState m Inlines
+note :: PandocMonad m => TextileParser m Inlines
note = try $ do
ref <- char '[' *> many1 digit <* char ']'
notes <- stateNotes <$> getState
@@ -505,13 +511,13 @@ wordBoundaries :: [Char]
wordBoundaries = markupChars <> stringBreakers
-- | Parse a hyphened sequence of words
-hyphenedWords :: PandocMonad m => ParserT Text ParserState m Text
+hyphenedWords :: PandocMonad m => TextileParser m Text
hyphenedWords = do
x <- wordChunk
xs <- many (try $ char '-' >> wordChunk)
return $ T.intercalate "-" (x:xs)
-wordChunk :: PandocMonad m => ParserT Text ParserState m Text
+wordChunk :: PandocMonad m => TextileParser m Text
wordChunk = try $ do
hd <- noneOf wordBoundaries
tl <- many ( noneOf wordBoundaries <|>
@@ -520,7 +526,7 @@ wordChunk = try $ do
return $ T.pack $ hd:tl
-- | Any string
-str :: PandocMonad m => ParserT Text ParserState m Inlines
+str :: PandocMonad m => TextileParser m Inlines
str = do
baseStr <- hyphenedWords
-- RedCloth compliance : if parsed word is uppercase and immediately
@@ -533,11 +539,11 @@ str = do
return $ B.str fullStr
-- | Some number of space chars
-whitespace :: PandocMonad m => ParserT Text st m Inlines
+whitespace :: PandocMonad m => TextileParser m Inlines
whitespace = many1 spaceChar >> return B.space <?> "whitespace"
-- | In Textile, an isolated endline character is a line break
-endline :: PandocMonad m => ParserT Text ParserState m Inlines
+endline :: PandocMonad m => TextileParser m Inlines
endline = try $ do
newline
notFollowedBy blankline
@@ -545,18 +551,18 @@ endline = try $ do
notFollowedBy rawHtmlBlock
return B.linebreak
-rawHtmlInline :: PandocMonad m => ParserT Text ParserState m Inlines
+rawHtmlInline :: PandocMonad m => TextileParser m Inlines
rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
-- | Raw LaTeX Inline
-rawLaTeXInline' :: PandocMonad m => ParserT Text ParserState m Inlines
+rawLaTeXInline' :: PandocMonad m => TextileParser m Inlines
rawLaTeXInline' = try $ do
guardEnabled Ext_raw_tex
B.rawInline "latex" <$> rawLaTeXInline
-- | Textile standard link syntax is "label":target. But we
-- can also have ["label":target].
-link :: PandocMonad m => ParserT Text ParserState m Inlines
+link :: PandocMonad m => TextileParser m Inlines
link = try $ do
bracketed <- (True <$ char '[') <|> return False
char '"' *> notFollowedBy (oneOf " \t\n\r")
@@ -576,7 +582,7 @@ link = try $ do
else B.spanWith attr $ B.link url "" name'
-- | image embedding
-image :: PandocMonad m => ParserT Text ParserState m Inlines
+image :: PandocMonad m => TextileParser m Inlines
image = try $ do
char '!' >> notFollowedBy space
(ident, cls, kvs) <- attributes
@@ -588,51 +594,51 @@ image = try $ do
char '!'
return $ B.imageWith attr src alt (B.str alt)
-escapedInline :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedInline :: PandocMonad m => TextileParser m Inlines
escapedInline = escapedEqs <|> escapedTag
-escapedEqs :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedEqs :: PandocMonad m => TextileParser m Inlines
escapedEqs = B.str . T.pack <$>
try (string "==" *> manyTill anyChar' (try $ string "=="))
-- | literal text escaped btw <notextile> tags
-escapedTag :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedTag :: PandocMonad m => TextileParser m Inlines
escapedTag = B.str . T.pack <$>
try (string "<notextile>" *>
manyTill anyChar' (try $ string "</notextile>"))
-- | Any special symbol defined in wordBoundaries
-symbol :: PandocMonad m => ParserT Text ParserState m Inlines
+symbol :: PandocMonad m => TextileParser m Inlines
symbol = B.str . T.singleton <$> (notFollowedBy newline *>
notFollowedBy rawHtmlBlock *>
oneOf wordBoundaries)
-- | Inline code
-code :: PandocMonad m => ParserT Text ParserState m Inlines
+code :: PandocMonad m => TextileParser m Inlines
code = code1 <|> code2
-- any character except a newline before a blank line
-anyChar' :: PandocMonad m => ParserT Text ParserState m Char
+anyChar' :: PandocMonad m => TextileParser m Char
anyChar' =
satisfy (/='\n') <|>
try (char '\n' <* notFollowedBy blankline)
-code1 :: PandocMonad m => ParserT Text ParserState m Inlines
+code1 :: PandocMonad m => TextileParser m Inlines
code1 = B.code . T.pack <$> surrounded (char '@') anyChar'
-code2 :: PandocMonad m => ParserT Text ParserState m Inlines
+code2 :: PandocMonad m => TextileParser m Inlines
code2 = do
htmlTag (tagOpen (=="tt") null)
B.code . T.pack <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt"))
-- | Html / CSS attributes
-attributes :: PandocMonad m => ParserT Text ParserState m Attr
-attributes = foldl (flip ($)) ("",[],[]) <$>
+attributes :: PandocMonad m => TextileParser m Attr
+attributes = foldl' (flip ($)) ("",[],[]) <$>
try (do special <- option id specialAttribute
attrs <- many attribute
return (special : attrs))
-specialAttribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+specialAttribute :: PandocMonad m => TextileParser m (Attr -> Attr)
specialAttribute = do
alignStr <- ("center" <$ char '=') <|>
("justify" <$ try (string "<>")) <|>
@@ -641,11 +647,11 @@ specialAttribute = do
notFollowedBy spaceChar
return $ addStyle $ T.pack $ "text-align:" ++ alignStr
-attribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+attribute :: PandocMonad m => TextileParser m (Attr -> Attr)
attribute = try $
(classIdAttr <|> styleAttr <|> langAttr) <* notFollowedBy spaceChar
-classIdAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+classIdAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
classIdAttr = try $ do -- (class class #id)
char '('
ws <- T.words `fmap` T.pack <$> manyTill anyChar' (char ')')
@@ -657,7 +663,7 @@ classIdAttr = try $ do -- (class class #id)
classes'
-> return $ \(_,_,keyvals) -> ("",classes',keyvals)
-styleAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+styleAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
styleAttr = do
style <- try $ enclosed (char '{') (char '}') anyChar'
return $ addStyle $ T.pack style
@@ -668,23 +674,23 @@ addStyle style (id',classes,keyvals) =
where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"]
style' = style <> ";" <> T.concat [v | ("style",v) <- keyvals]
-langAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+langAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
langAttr = do
lang <- try $ enclosed (char '[') (char ']') alphaNum
return $ \(id',classes,keyvals) -> (id',classes,("lang",T.pack lang):keyvals)
-- | Parses material surrounded by a parser.
surrounded :: (PandocMonad m, Show t)
- => ParserT Text st m t -- ^ surrounding parser
- -> ParserT Text st m a -- ^ content parser (to be used repeatedly)
- -> ParserT Text st m [a]
+ => ParserT Sources st m t -- ^ surrounding parser
+ -> ParserT Sources st m a -- ^ content parser (to be used repeatedly)
+ -> ParserT Sources st m [a]
surrounded border =
enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border)
simpleInline :: PandocMonad m
- => ParserT Text ParserState m t -- ^ surrounding parser
+ => TextileParser m t -- ^ surrounding parser
-> (Inlines -> Inlines) -- ^ Inline constructor
- -> ParserT Text ParserState m Inlines -- ^ content parser (to be used repeatedly)
+ -> TextileParser m Inlines -- ^ content parser (to be used repeatedly)
simpleInline border construct = try $ do
notAfterString
border *> notFollowedBy (oneOf " \t\n\r")
@@ -698,7 +704,7 @@ simpleInline border construct = try $ do
then body
else B.spanWith attr body
-groupedInlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines
+groupedInlineMarkup :: PandocMonad m => TextileParser m Inlines
groupedInlineMarkup = try $ do
char '['
sp1 <- option mempty $ B.space <$ whitespace
@@ -707,5 +713,5 @@ groupedInlineMarkup = try $ do
char ']'
return $ sp1 <> result <> sp2
-eof' :: Monad m => ParserT Text s m Char
+eof' :: Monad m => ParserT Sources s m Char
eof' = '\n' <$ eof
diff --git a/src/Text/Pandoc/Readers/TikiWiki.hs b/src/Text/Pandoc/Readers/TikiWiki.hs
index fb4b662c5..5c414fdec 100644
--- a/src/Text/Pandoc/Readers/TikiWiki.hs
+++ b/src/Text/Pandoc/Readers/TikiWiki.hs
@@ -30,23 +30,23 @@ import Text.Pandoc.Definition
import Text.Pandoc.Logging (Verbosity (..))
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (enclosed, nested)
-import Text.Pandoc.Shared (crFilter, safeRead)
+import Text.Pandoc.Shared (safeRead)
import Text.Pandoc.XML (fromEntities)
import Text.Printf (printf)
-- | Read TikiWiki from an input string and return a Pandoc document.
-readTikiWiki :: PandocMonad m
+readTikiWiki :: (PandocMonad m, ToSources a)
=> ReaderOptions
- -> Text
+ -> a
-> m Pandoc
readTikiWiki opts s = do
- res <- readWithM parseTikiWiki def{ stateOptions = opts }
- (crFilter s <> "\n\n")
+ let sources = ensureFinalNewlines 2 (toSources s)
+ res <- readWithM parseTikiWiki def{ stateOptions = opts } sources
case res of
Left e -> throwError e
Right d -> return d
-type TikiWikiParser = ParserT Text ParserState
+type TikiWikiParser = ParserT Sources ParserState
--
-- utility functions
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index 474e4fac0..b5cf5a0f3 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE LambdaCase #-}
{- |
Module : Text.Pandoc.Readers.Txt2Tags
Copyright : Copyright (C) 2014 Matthew Pickering
@@ -19,6 +20,7 @@ import Control.Monad.Except (catchError, throwError)
import Control.Monad.Reader (Reader, asks, runReader)
import Data.Default
import Data.List (intercalate, transpose)
+import Data.List.NonEmpty (nonEmpty)
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import qualified Data.Text as T
@@ -31,9 +33,9 @@ import Data.Time (defaultTimeLocale)
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (space, spaces, uri)
-import Text.Pandoc.Shared (compactify, compactifyDL, crFilter, escapeURI)
+import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI)
-type T2T = ParserT Text ParserState (Reader T2TMeta)
+type T2T = ParserT Sources ParserState (Reader T2TMeta)
-- | An object for the T2T macros meta information
-- the contents of each field is simply substituted verbatim into the file
@@ -53,25 +55,28 @@ getT2TMeta = do
inps <- P.getInputFiles
outp <- fromMaybe "" <$> P.getOutputFile
curDate <- formatTime defaultTimeLocale "%F" <$> P.getZonedTime
- let getModTime = fmap (formatTime defaultTimeLocale "%T") .
- P.getModificationTime
- curMtime <- case inps of
- [] -> formatTime defaultTimeLocale "%T" <$> P.getZonedTime
- _ -> catchError
- (maximum <$> mapM getModTime inps)
- (const (return ""))
- return $ T2TMeta (T.pack curDate) (T.pack curMtime) (intercalate ", " inps) outp
+ curMtime <- catchError
+ (mapM P.getModificationTime inps >>=
+ (\case
+ Nothing ->
+ formatTime defaultTimeLocale "%T" <$> P.getZonedTime
+ Just ts -> return $
+ formatTime defaultTimeLocale "%T" $ maximum ts)
+ . nonEmpty)
+ (const (return ""))
+ return $ T2TMeta (T.pack curDate) (T.pack curMtime)
+ (intercalate ", " inps) outp
-- | Read Txt2Tags from an input string returning a Pandoc document
-readTxt2Tags :: PandocMonad m
+readTxt2Tags :: (PandocMonad m, ToSources a)
=> ReaderOptions
- -> Text
+ -> a
-> m Pandoc
readTxt2Tags opts s = do
+ let sources = ensureFinalNewlines 2 (toSources s)
meta <- getT2TMeta
let parsed = flip runReader meta $
- readWithM parseT2T (def {stateOptions = opts}) $
- crFilter s <> "\n\n"
+ readWithM parseT2T (def {stateOptions = opts}) sources
case parsed of
Right result -> return result
Left e -> throwError e
@@ -261,9 +266,9 @@ table = try $ do
rows <- many1 (many commentLine *> tableRow)
let columns = transpose rows
let ncolumns = length columns
- let aligns = map (foldr1 findAlign . map fst) columns
+ let aligns = map (fromMaybe AlignDefault . foldr findAlign Nothing) columns
let rows' = map (map snd) rows
- let size = maximum (map length rows')
+ let size = maybe 0 maximum $ nonEmpty $ map length rows'
let rowsPadded = map (pad size) rows'
let headerPadded = if null tableHeader then mempty else pad size tableHeader
let toRow = Row nullAttr . map B.simpleCell
@@ -278,10 +283,11 @@ pad :: (Monoid a) => Int -> [a] -> [a]
pad n xs = xs ++ replicate (n - length xs) mempty
-findAlign :: Alignment -> Alignment -> Alignment
-findAlign x y
- | x == y = x
- | otherwise = AlignDefault
+findAlign :: (Alignment, a) -> Maybe Alignment -> Maybe Alignment
+findAlign (x,_) (Just y)
+ | x == y = Just x
+ | otherwise = Just AlignDefault
+findAlign (x,_) Nothing = Just x
headerRow :: T2T [(Alignment, Blocks)]
headerRow = genericRow (string "||")
@@ -472,9 +478,29 @@ macro = try $ do
-- raw URLs in text are automatically linked
url :: T2T Inlines
url = try $ do
- (rawUrl, escapedUrl) <- try uri <|> emailAddress
+ (rawUrl, escapedUrl) <- try uri <|> emailAddress'
return $ B.link rawUrl "" (B.str escapedUrl)
+emailAddress' :: T2T (Text, Text)
+emailAddress' = do
+ (base, mailURI) <- emailAddress
+ query <- option "" emailQuery
+ return (base <> query, mailURI <> query)
+
+emailQuery :: T2T Text
+emailQuery = do
+ char '?'
+ parts <- kv `sepBy1` (char '&')
+ return $ "?" <> T.intercalate "&" parts
+
+kv :: T2T Text
+kv = do
+ k <- T.pack <$> many1 alphaNum
+ char '='
+ let vchar = alphaNum <|> try (oneOf "%._/~:,=$@&+-;*" <* lookAhead alphaNum)
+ v <- T.pack <$> many1 vchar
+ return (k <> "=" <> v)
+
uri :: T2T (Text, Text)
uri = try $ do
address <- t2tURI
@@ -564,7 +590,7 @@ getTarget = do
_ -> "html"
atStart :: T2T ()
-atStart = (sourceColumn <$> getPosition) >>= guard . (== 1)
+atStart = getPosition >>= guard . (== 1) . sourceColumn
ignoreSpacesCap :: T2T Text -> T2T Text
ignoreSpacesCap p = T.toLower <$> (spaces *> p <* spaces)
diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs
index 74dac5ea7..460f304c4 100644
--- a/src/Text/Pandoc/Readers/Vimwiki.hs
+++ b/src/Text/Pandoc/Readers/Vimwiki.hs
@@ -74,23 +74,28 @@ import Text.Pandoc.Parsing (ParserState, ParserT, blanklines, emailAddress,
many1Till, orderedListMarker, readWithM,
registerHeader, spaceChar, stateMeta,
stateOptions, uri, manyTillChar, manyChar, textStr,
- many1Char, countChar, many1TillChar)
-import Text.Pandoc.Shared (crFilter, splitTextBy, stringify, stripFirstAndLast,
+ many1Char, countChar, many1TillChar,
+ alphaNum, anyChar, char, newline, noneOf, oneOf,
+ space, spaces, string)
+import Text.Pandoc.Sources (ToSources(..), Sources)
+import Text.Pandoc.Shared (splitTextBy, stringify, stripFirstAndLast,
isURI, tshow)
-import Text.Parsec.Char (alphaNum, anyChar, char, newline, noneOf, oneOf, space,
- spaces, string)
import Text.Parsec.Combinator (between, choice, eof, lookAhead, many1,
manyTill, notFollowedBy, option, skipMany1)
import Text.Parsec.Prim (getState, many, try, updateState, (<|>))
-readVimwiki :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readVimwiki :: (PandocMonad m, ToSources a)
+ => ReaderOptions
+ -> a
+ -> m Pandoc
readVimwiki opts s = do
- res <- readWithM parseVimwiki def{ stateOptions = opts } $ crFilter s
+ let sources = toSources s
+ res <- readWithM parseVimwiki def{ stateOptions = opts } sources
case res of
Left e -> throwError e
Right result -> return result
-type VwParser = ParserT Text ParserState
+type VwParser = ParserT Sources ParserState
-- constants