aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
authorYan Pas <yanp.bugz@gmail.com>2018-10-07 18:10:01 +0300
committerYan Pas <yanp.bugz@gmail.com>2018-10-07 18:10:01 +0300
commit27467189ab184c5d098e244e01f7d1bfdb0d4d45 (patch)
treed1fb96ebbc49ee0c4e73ef354feddd521690d545 /src/Text/Pandoc
parent4f3dd3b1af7217214287ab886147c5e33a54774d (diff)
parentbd8a66394bc25b52dca9ffd963a560a4ca492f9c (diff)
downloadpandoc-27467189ab184c5d098e244e01f7d1bfdb0d4d45.tar.gz
Merge branch 'master' into groff_reader
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/App.hs79
-rw-r--r--src/Text/Pandoc/Class.hs6
-rw-r--r--src/Text/Pandoc/Emoji.hs7
-rw-r--r--src/Text/Pandoc/Extensions.hs7
-rw-r--r--src/Text/Pandoc/Highlighting.hs1
-rw-r--r--src/Text/Pandoc/ImageSize.hs55
-rw-r--r--src/Text/Pandoc/Lua.hs26
-rw-r--r--src/Text/Pandoc/Lua/Filter.hs144
-rw-r--r--src/Text/Pandoc/Lua/Init.hs15
-rw-r--r--src/Text/Pandoc/Lua/Module/MediaBag.hs2
-rw-r--r--src/Text/Pandoc/Lua/Module/Pandoc.hs35
-rw-r--r--src/Text/Pandoc/Lua/Module/Utils.hs34
-rw-r--r--src/Text/Pandoc/Lua/Packages.hs32
-rw-r--r--src/Text/Pandoc/Lua/StackInstances.hs254
-rw-r--r--src/Text/Pandoc/Lua/Util.hs186
-rw-r--r--src/Text/Pandoc/Options.hs2
-rw-r--r--src/Text/Pandoc/PDF.hs122
-rw-r--r--src/Text/Pandoc/Parsing.hs23
-rw-r--r--src/Text/Pandoc/Readers/CommonMark.hs64
-rw-r--r--src/Text/Pandoc/Readers/Creole.hs2
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs66
-rw-r--r--src/Text/Pandoc/Readers/Docx/Lists.hs5
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs13
-rw-r--r--src/Text/Pandoc/Readers/EPUB.hs2
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs53
-rw-r--r--src/Text/Pandoc/Readers/Haddock.hs14
-rw-r--r--src/Text/Pandoc/Readers/JATS.hs2
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs1474
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Lang.hs173
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Parsing.hs668
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Types.hs6
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs192
-rw-r--r--src/Text/Pandoc/Readers/Muse.hs514
-rw-r--r--src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs4
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs2
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/Utils.hs8
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs2
-rw-r--r--src/Text/Pandoc/Readers/Odt/StyleReader.hs4
-rw-r--r--src/Text/Pandoc/Readers/Org/Blocks.hs7
-rw-r--r--src/Text/Pandoc/Readers/Org/DocumentTree.hs139
-rw-r--r--src/Text/Pandoc/Readers/Org/ExportSettings.hs2
-rw-r--r--src/Text/Pandoc/Readers/Org/Inlines.hs10
-rw-r--r--src/Text/Pandoc/Readers/Org/Meta.hs14
-rw-r--r--src/Text/Pandoc/Readers/Org/ParserState.hs10
-rw-r--r--src/Text/Pandoc/Readers/Org/Parsing.hs15
-rw-r--r--src/Text/Pandoc/Readers/Org/Shared.hs4
-rw-r--r--src/Text/Pandoc/Readers/RST.hs68
-rw-r--r--src/Text/Pandoc/Readers/TWiki.hs111
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs2
-rw-r--r--src/Text/Pandoc/Readers/TikiWiki.hs9
-rw-r--r--src/Text/Pandoc/Readers/Txt2Tags.hs8
-rw-r--r--src/Text/Pandoc/Readers/Vimwiki.hs18
-rw-r--r--src/Text/Pandoc/Shared.hs41
-rw-r--r--src/Text/Pandoc/Translations.hs39
-rw-r--r--src/Text/Pandoc/UUID.hs15
-rw-r--r--src/Text/Pandoc/Writers/AsciiDoc.hs25
-rw-r--r--src/Text/Pandoc/Writers/CommonMark.hs57
-rw-r--r--src/Text/Pandoc/Writers/ConTeXt.hs25
-rw-r--r--src/Text/Pandoc/Writers/Custom.hs155
-rw-r--r--src/Text/Pandoc/Writers/Docbook.hs7
-rw-r--r--src/Text/Pandoc/Writers/Docx.hs36
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs39
-rw-r--r--src/Text/Pandoc/Writers/FB2.hs12
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs199
-rw-r--r--src/Text/Pandoc/Writers/Haddock.hs14
-rw-r--r--src/Text/Pandoc/Writers/ICML.hs5
-rw-r--r--src/Text/Pandoc/Writers/JATS.hs8
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs203
-rw-r--r--src/Text/Pandoc/Writers/Man.hs33
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs55
-rw-r--r--src/Text/Pandoc/Writers/Math.hs2
-rw-r--r--src/Text/Pandoc/Writers/MediaWiki.hs1
-rw-r--r--src/Text/Pandoc/Writers/Ms.hs31
-rw-r--r--src/Text/Pandoc/Writers/Muse.hs245
-rw-r--r--src/Text/Pandoc/Writers/ODT.hs4
-rw-r--r--src/Text/Pandoc/Writers/OPML.hs3
-rw-r--r--src/Text/Pandoc/Writers/OpenDocument.hs156
-rw-r--r--src/Text/Pandoc/Writers/Org.hs4
-rw-r--r--src/Text/Pandoc/Writers/Powerpoint/Presentation.hs22
-rw-r--r--src/Text/Pandoc/Writers/RST.hs87
-rw-r--r--src/Text/Pandoc/Writers/RTF.hs6
-rw-r--r--src/Text/Pandoc/Writers/Shared.hs121
-rw-r--r--src/Text/Pandoc/Writers/TEI.hs15
-rw-r--r--src/Text/Pandoc/Writers/Texinfo.hs30
-rw-r--r--src/Text/Pandoc/Writers/Textile.hs14
85 files changed, 3756 insertions, 2668 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index a59fd9bbe..79d83c0d3 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -52,7 +52,7 @@ import Data.Aeson (defaultOptions)
import Data.Aeson.TH (deriveJSON)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as B
-import Data.Char (toLower, toUpper, isAscii, ord)
+import Data.Char (toLower, toUpper)
import Data.List (find, intercalate, isPrefixOf, isSuffixOf, sort)
import qualified Data.Map as M
import Data.Maybe (fromMaybe, isJust, isNothing)
@@ -62,8 +62,7 @@ import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TE
import qualified Data.Text.Encoding.Error as TE
-import Data.Yaml (decode)
-import qualified Data.Yaml as Yaml
+import qualified Data.YAML as YAML
import GHC.Generics
import Network.URI (URI (..), parseURI)
#ifdef EMBED_DATA_FILES
@@ -84,19 +83,18 @@ import System.Exit (exitSuccess)
import System.FilePath
import System.IO (nativeNewline, stdout)
import qualified System.IO as IO (Newline (..))
-import System.IO.Error (isDoesNotExistError)
import Text.Pandoc
import Text.Pandoc.BCP47 (Lang (..), parseBCP47)
import Text.Pandoc.Builder (setMeta, deleteMeta)
import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), applyFilters)
import Text.Pandoc.Highlighting (highlightingStyles)
import Text.Pandoc.PDF (makePDF)
+import Text.Pandoc.Readers.Markdown (yamlToMeta)
import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained)
import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs,
headerShift, isURI, ordNub, safeRead, tabFilter, uriPathToPath)
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.Writers.Math (defaultKaTeXURL, defaultMathJaxURL)
-import Text.Pandoc.XML (toEntities)
import Text.Printf
#ifndef _WINDOWS
import System.Posix.IO (stdOutput)
@@ -144,6 +142,13 @@ engines = map ("html",) htmlEngines ++
pdfEngines :: [String]
pdfEngines = ordNub $ map snd engines
+pdfIsNoWriterErrorMsg :: String
+pdfIsNoWriterErrorMsg =
+ "To create a pdf using pandoc, use " ++
+ "-t latex|beamer|context|ms|html5" ++
+ "\nand specify an output file with " ++
+ ".pdf extension (-o filename.pdf)."
+
pdfWriterAndProg :: Maybe String -- ^ user-specified writer name
-> Maybe String -- ^ user-specified pdf-engine
-> IO (String, Maybe String) -- ^ IO (writerName, maybePdfEngineProg)
@@ -155,9 +160,9 @@ pdfWriterAndProg mWriter mEngine = do
where
go Nothing Nothing = Right ("latex", "pdflatex")
go (Just writer) Nothing = (writer,) <$> engineForWriter writer
- go Nothing (Just engine) = (,engine) <$> writerForEngine engine
+ go Nothing (Just engine) = (,engine) <$> writerForEngine (takeBaseName engine)
go (Just writer) (Just engine) =
- case find (== (baseWriterName writer, engine)) engines of
+ case find (== (baseWriterName writer, takeBaseName engine)) engines of
Just _ -> Right (writer, engine)
Nothing -> Left $ "pdf-engine " ++ engine ++
" is not compatible with output format " ++ writer
@@ -167,6 +172,7 @@ pdfWriterAndProg mWriter mEngine = do
[] -> Left $
"pdf-engine " ++ eng ++ " not known"
+ engineForWriter "pdf" = Left pdfIsNoWriterErrorMsg
engineForWriter w = case [e | (f,e) <- engines, f == baseWriterName w] of
eng : _ -> Right eng
[] -> Left $
@@ -235,11 +241,7 @@ convertWithOpts opts = do
else case getWriter (map toLower writerName) of
Left e -> E.throwIO $ PandocAppError $
if format == "pdf"
- then e ++
- "\nTo create a pdf using pandoc, use " ++
- "-t latex|beamer|context|ms|html5" ++
- "\nand specify an output file with " ++
- ".pdf extension (-o filename.pdf)."
+ then e ++ "\n" ++ pdfIsNoWriterErrorMsg
else e
Right (w, es) -> return (w :: Writer PandocIO, es)
@@ -381,11 +383,10 @@ convertWithOpts opts = do
"" -> tp <.> format
_ -> tp
Just . UTF8.toString <$>
- (readFileStrict tp' `catchError`
+ ((fst <$> fetchItem tp') `catchError`
(\e ->
case e of
- PandocIOError _ e' |
- isDoesNotExistError e' ->
+ PandocResourceNotFound _ ->
readDataFile ("templates" </> tp')
_ -> throwError e))
@@ -398,6 +399,10 @@ convertWithOpts opts = do
("application/xml", jatsCSL)
return $ ("csl", jatsEncoded) : optMetadata opts
else return $ optMetadata opts
+ metadataFromFile <-
+ case optMetadataFile opts of
+ Nothing -> return mempty
+ Just file -> readFileLazy file >>= yamlToMeta
case lookup "lang" (optMetadata opts) of
Just l -> case parseBCP47 l of
@@ -437,6 +442,7 @@ convertWithOpts opts = do
, writerTOCDepth = optTOCDepth opts
, writerReferenceDoc = optReferenceDoc opts
, writerSyntaxMap = syntaxMap
+ , writerPreferAscii = optAscii opts
}
let readerOpts = def{
@@ -490,6 +496,7 @@ convertWithOpts opts = do
( (if isJust (optExtractMedia opts)
then fillMediaBag
else return)
+ >=> return . addNonPresentMetadata metadataFromFile
>=> return . addMetadata metadata
>=> applyTransforms transforms
>=> applyFilters readerOpts filters' [format]
@@ -512,19 +519,10 @@ convertWithOpts opts = do
let htmlFormat = format `elem`
["html","html4","html5","s5","slidy",
"slideous","dzslides","revealjs"]
- escape
- | optAscii opts
- , htmlFormat || format == "docbook4" ||
- format == "docbook5" || format == "docbook" ||
- format == "jats" || format == "opml" ||
- format == "icml" = toEntities
- | optAscii opts
- , format == "ms" || format == "man" = groffEscape
- | otherwise = id
addNl = if standalone
then id
else (<> T.singleton '\n')
- output <- (addNl . escape) <$> f writerOptions doc
+ output <- addNl <$> f writerOptions doc
writerFn eol outputFile =<<
if optSelfContained opts && htmlFormat
-- TODO not maximally efficient; change type
@@ -532,12 +530,6 @@ convertWithOpts opts = do
then T.pack <$> makeSelfContained (T.unpack output)
else return output
-groffEscape :: Text -> Text
-groffEscape = T.concatMap toUchar
- where toUchar c
- | isAscii c = T.singleton c
- | otherwise = T.pack $ printf "\\[u%04X]" (ord c)
-
type Transform = Pandoc -> Pandoc
isTextFormat :: String -> Bool
@@ -555,6 +547,7 @@ data Opt = Opt
, optTemplate :: Maybe FilePath -- ^ Custom template
, optVariables :: [(String,String)] -- ^ Template variables to set
, optMetadata :: [(String, String)] -- ^ Metadata fields to set
+ , optMetadataFile :: Maybe FilePath -- ^ Name of YAML metadata file
, optOutputFile :: Maybe FilePath -- ^ Name of output file
, optInputFiles :: [FilePath] -- ^ Names of input files
, optNumberSections :: Bool -- ^ Number sections in LaTeX
@@ -598,7 +591,7 @@ data Opt = Opt
, optPdfEngineArgs :: [String] -- ^ Flags to pass to the engine
, optSlideLevel :: Maybe Int -- ^ Header level that creates slides
, optSetextHeaders :: Bool -- ^ Use atx headers for markdown level 1-2
- , optAscii :: Bool -- ^ Use ascii characters only in html
+ , optAscii :: Bool -- ^ Prefer ascii output
, optDefaultImageExtension :: String -- ^ Default image extension
, optExtractMedia :: Maybe FilePath -- ^ Path to extract embedded media
, optTrackChanges :: TrackChanges -- ^ Accept or reject MS Word track-changes.
@@ -627,6 +620,7 @@ defaultOpts = Opt
, optTemplate = Nothing
, optVariables = []
, optMetadata = []
+ , optMetadataFile = Nothing
, optOutputFile = Nothing
, optInputFiles = []
, optNumberSections = False
@@ -686,6 +680,9 @@ defaultOpts = Opt
, optStripComments = False
}
+addNonPresentMetadata :: Text.Pandoc.Meta -> Pandoc -> Pandoc
+addNonPresentMetadata newmeta (Pandoc meta bs) = Pandoc (meta <> newmeta) bs
+
addMetadata :: [(String, String)] -> Pandoc -> Pandoc
addMetadata kvs pdc = foldr addMeta (removeMetaKeys kvs pdc) kvs
@@ -702,10 +699,12 @@ removeMetaKeys :: [(String,String)] -> Pandoc -> Pandoc
removeMetaKeys kvs pdc = foldr (deleteMeta . fst) pdc kvs
readMetaValue :: String -> MetaValue
-readMetaValue s = case decode (UTF8.fromString s) of
- Just (Yaml.String t) -> MetaString $ T.unpack t
- Just (Yaml.Bool b) -> MetaBool b
- _ -> MetaString s
+readMetaValue s = case YAML.decodeStrict (UTF8.fromString s) of
+ Right [YAML.Scalar (YAML.SStr t)]
+ -> MetaString $ T.unpack t
+ Right [YAML.Scalar (YAML.SBool b)]
+ -> MetaBool b
+ _ -> MetaString s
-- Determine default reader based on source file extensions
defaultReaderName :: String -> [FilePath] -> String
@@ -960,6 +959,12 @@ options =
"KEY[:VALUE]")
""
+ , Option "" ["metadata-file"]
+ (ReqArg
+ (\arg opt -> return opt{ optMetadataFile = Just arg })
+ "FILE")
+ ""
+
, Option "V" ["variable"]
(ReqArg
(\arg opt -> do
@@ -1153,7 +1158,7 @@ options =
, Option "" ["ascii"]
(NoArg
(\opt -> return opt { optAscii = True }))
- "" -- "Use ascii characters only in HTML output"
+ "" -- "Prefer ASCII output"
, Option "" ["reference-links"]
(NoArg
diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs
index 4ade2dc6d..e47546dfc 100644
--- a/src/Text/Pandoc/Class.hs
+++ b/src/Text/Pandoc/Class.hs
@@ -132,7 +132,7 @@ import Network.HTTP.Client.Internal (addProxy)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import System.Environment (getEnv)
import Network.HTTP.Types.Header ( hContentType )
-import Network (withSocketsDo)
+import Network.Socket (withSocketsDo)
import Data.ByteString.Lazy (toChunks)
import qualified Control.Exception as E
import qualified Data.Time.LocalTime as IO (getCurrentTimeZone)
@@ -882,10 +882,10 @@ adjustImagePath _ _ x = x
-- of things that would normally be obtained through IO.
data PureState = PureState { stStdGen :: StdGen
, stWord8Store :: [Word8] -- should be
- -- inifinite,
+ -- infinite,
-- i.e. [1..]
, stUniqStore :: [Int] -- should be
- -- inifinite and
+ -- infinite and
-- contain every
-- element at most
-- once, e.g. [1..]
diff --git a/src/Text/Pandoc/Emoji.hs b/src/Text/Pandoc/Emoji.hs
index 5cc965153..7d0af1a72 100644
--- a/src/Text/Pandoc/Emoji.hs
+++ b/src/Text/Pandoc/Emoji.hs
@@ -28,9 +28,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Emoji symbol lookup from canonical string identifier.
-}
-module Text.Pandoc.Emoji ( emojis ) where
+module Text.Pandoc.Emoji ( emojis, emojiToInline ) where
import Prelude
import qualified Data.Map as M
+import Text.Pandoc.Definition (Inline (Span, Str))
emojis :: M.Map String String
emojis = M.fromList
@@ -905,3 +906,7 @@ emojis = M.fromList
,("zero","0\65039\8419")
,("zzz","\128164")
]
+
+emojiToInline :: String -> Maybe Inline
+emojiToInline emojikey = makeSpan <$> M.lookup emojikey emojis
+ where makeSpan = Span ("", ["emoji"], [("data-emoji", emojikey)]) . (:[]) . Str
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index 5ccb7dffb..b60c57497 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -101,7 +101,10 @@ data Extension =
-- and disallow laziness
| Ext_definition_lists -- ^ Definition lists as in pandoc, mmd, php
| Ext_east_asian_line_breaks -- ^ Newlines in paragraphs are ignored between
- -- East Asian wide characters
+ -- East Asian wide characters. Note: this extension
+ -- does not affect readers/writers directly; it causes
+ -- the eastAsianLineBreakFilter to be applied after
+ -- parsing, in Text.Pandoc.App.convertWithOpts.
| Ext_emoji -- ^ Support emoji like :smile:
| Ext_empty_paragraphs -- ^ Allow empty paragraphs
| Ext_epub_html_exts -- ^ Recognise the EPUB extended version of HTML
@@ -111,7 +114,7 @@ data Extension =
| Ext_fenced_code_attributes -- ^ Allow attributes on fenced code blocks
| Ext_fenced_code_blocks -- ^ Parse fenced code blocks
| Ext_fenced_divs -- ^ Allow fenced div syntax :::
- | Ext_footnotes -- ^ Pandoc/PHP/MMD style footnotes
+ | Ext_footnotes -- ^ Pandoc\/PHP\/MMD style footnotes
| Ext_four_space_rule -- ^ Require 4-space indent for list contents
| Ext_gfm_auto_identifiers -- ^ Automatic identifiers for headers, using
-- GitHub's method for generating identifiers
diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs
index 70bb70302..672eca392 100644
--- a/src/Text/Pandoc/Highlighting.hs
+++ b/src/Text/Pandoc/Highlighting.hs
@@ -45,6 +45,7 @@ module Text.Pandoc.Highlighting ( highlightingStyles
, tango
, kate
, monochrome
+ , breezeDark
, haddock
, Style
, fromListingsLanguage
diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
index c5fe98a66..d57f66da5 100644
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -319,20 +319,22 @@ pngSize img = do
(shift w1 24 + shift w2 16 + shift w3 8 + w4,
shift h1 24 + shift h2 16 + shift h3 8 + h4)
_ -> Nothing -- "PNG parse error"
- let (dpix, dpiy) = findpHYs rest''
+ (dpix, dpiy) <- findpHYs rest''
return ImageSize { pxX = x, pxY = y, dpiX = dpix, dpiY = dpiy }
-findpHYs :: ByteString -> (Integer, Integer)
+findpHYs :: ByteString -> Maybe (Integer, Integer)
findpHYs x
- | B.null x || "IDAT" `B.isPrefixOf` x = (72,72)
+ | B.null x || "IDAT" `B.isPrefixOf` x = return (72,72)
| "pHYs" `B.isPrefixOf` x =
- let [x1,x2,x3,x4,y1,y2,y3,y4,u] =
- map fromIntegral $ unpack $ B.take 9 $ B.drop 4 x
- factor = if u == 1 -- dots per meter
- then \z -> z * 254 `div` 10000
- else const 72
- in ( factor $ shift x1 24 + shift x2 16 + shift x3 8 + x4,
- factor $ shift y1 24 + shift y2 16 + shift y3 8 + y4 )
+ case map fromIntegral $ unpack $ B.take 9 $ B.drop 4 x of
+ [x1,x2,x3,x4,y1,y2,y3,y4,u] -> do
+ let factor = if u == 1 -- dots per meter
+ then \z -> z * 254 `div` 10000
+ else const 72
+ return
+ ( factor $ shift x1 24 + shift x2 16 + shift x3 8 + x4,
+ factor $ shift y1 24 + shift y2 16 + shift y3 8 + y4 )
+ _ -> mzero
| otherwise = findpHYs $ B.drop 1 x -- read another byte
gifSize :: ByteString -> Maybe ImageSize
@@ -408,20 +410,21 @@ jpegSize img =
jfifSize :: ByteString -> Either String ImageSize
jfifSize rest =
- let [dpiDensity,dpix1,dpix2,dpiy1,dpiy2] = map fromIntegral
- $ unpack $ B.take 5 $B.drop 9 rest
- factor = case dpiDensity of
- 1 -> id
- 2 -> \x -> x * 254 `div` 10
- _ -> const 72
- dpix = factor (shift dpix1 8 + dpix2)
- dpiy = factor (shift dpiy1 8 + dpiy2)
- in case findJfifSize rest of
- Left msg -> Left msg
- Right (w,h) ->Right ImageSize { pxX = w
+ case map fromIntegral $ unpack $ B.take 5 $ B.drop 9 rest of
+ [dpiDensity,dpix1,dpix2,dpiy1,dpiy2] ->
+ let factor = case dpiDensity of
+ 1 -> id
+ 2 -> \x -> x * 254 `div` 10
+ _ -> const 72
+ dpix = factor (shift dpix1 8 + dpix2)
+ dpiy = factor (shift dpiy1 8 + dpiy2)
+ in case findJfifSize rest of
+ Left msg -> Left msg
+ Right (w,h) -> Right ImageSize { pxX = w
, pxY = h
, dpiX = dpix
, dpiY = dpiy }
+ _ -> Left "unable to determine JFIF size"
findJfifSize :: ByteString -> Either String (Integer,Integer)
findJfifSize bs =
@@ -541,10 +544,12 @@ exifHeader hdr = do
let resfactor = case lookup ResolutionUnit allentries of
Just (UnsignedShort 1) -> 100 / 254
_ -> 1
- let xres = maybe 72 (\(UnsignedRational x) -> floor $ x * resfactor)
- $ lookup XResolution allentries
- let yres = maybe 72 (\(UnsignedRational x) -> floor $ x * resfactor)
- $ lookup YResolution allentries
+ let xres = case lookup XResolution allentries of
+ Just (UnsignedRational x) -> floor (x * resfactor)
+ _ -> 72
+ let yres = case lookup YResolution allentries of
+ Just (UnsignedRational y) -> floor (y * resfactor)
+ _ -> 72
return ImageSize{
pxX = wdth
, pxY = hght
diff --git a/src/Text/Pandoc/Lua.hs b/src/Text/Pandoc/Lua.hs
index cd7117074..e160f7123 100644
--- a/src/Text/Pandoc/Lua.hs
+++ b/src/Text/Pandoc/Lua.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
{-
Copyright © 2017–2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -16,6 +15,7 @@ You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
+{-# LANGUAGE NoImplicitPrelude #-}
{- |
Module : Text.Pandoc.Lua
Copyright : Copyright © 2017–2018 Albert Krewinkel
@@ -34,14 +34,14 @@ module Text.Pandoc.Lua
import Prelude
import Control.Monad ((>=>))
-import Foreign.Lua (FromLuaStack (peek), Lua, LuaException (..),
- Status (OK), ToLuaStack (push))
+import Foreign.Lua (Lua)
import Text.Pandoc.Class (PandocIO)
import Text.Pandoc.Definition (Pandoc)
import Text.Pandoc.Lua.Filter (LuaFilter, walkMWithLuaFilter)
-import Text.Pandoc.Lua.Init (runPandocLua, registerScriptPath)
-import Text.Pandoc.Lua.Util (popValue)
+import Text.Pandoc.Lua.Init (LuaException (..), runPandocLua, registerScriptPath)
+import Text.Pandoc.Lua.Util (dofileWithTraceback)
import Text.Pandoc.Options (ReaderOptions)
+
import qualified Foreign.Lua as Lua
-- | Run the Lua filter in @filterPath@ for a transformation to target
@@ -59,26 +59,24 @@ runLuaFilter' ropts filterPath format pd = do
registerReaderOptions
registerScriptPath filterPath
top <- Lua.gettop
- stat <- Lua.dofile filterPath
- if stat /= OK
- then do
- luaErrMsg <- popValue
- Lua.throwLuaError luaErrMsg
+ stat <- dofileWithTraceback filterPath
+ if stat /= Lua.OK
+ then Lua.throwTopMessage
else do
newtop <- Lua.gettop
-- Use the returned filters, or the implicitly defined global filter if
-- nothing was returned.
luaFilters <- if newtop - top >= 1
- then peek (-1)
- else Lua.getglobal "_G" *> fmap (:[]) popValue
+ then Lua.peek Lua.stackTop
+ else Lua.pushglobaltable *> fmap (:[]) Lua.popValue
runAll luaFilters pd
where
registerFormat = do
- push format
+ Lua.push format
Lua.setglobal "FORMAT"
registerReaderOptions = do
- push ropts
+ Lua.push ropts
Lua.setglobal "PANDOC_READER_OPTIONS"
runAll :: [LuaFilter] -> Pandoc -> Lua Pandoc
diff --git a/src/Text/Pandoc/Lua/Filter.hs b/src/Text/Pandoc/Lua/Filter.hs
index 264066305..d17f9a969 100644
--- a/src/Text/Pandoc/Lua/Filter.hs
+++ b/src/Text/Pandoc/Lua/Filter.hs
@@ -1,6 +1,33 @@
-{-# LANGUAGE NoImplicitPrelude #-}
+{-
+Copyright © 2012-2018 John MacFarlane <jgm@berkeley.edu>
+ 2017-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
{-# LANGUAGE FlexibleContexts #-}
-
+{-# LANGUAGE NoImplicitPrelude #-}
+{- |
+Module : Text.Pandoc.Lua.Filter
+Copyright : © 2012–2018 John MacFarlane,
+ © 2017-2018 Albert Krewinkel
+License : GNU GPL, version 2 or above
+Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+Stability : alpha
+
+Types and functions for running Lua filters.
+-}
module Text.Pandoc.Lua.Filter ( LuaFilterFunction
, LuaFilter
, tryFilter
@@ -12,62 +39,58 @@ module Text.Pandoc.Lua.Filter ( LuaFilterFunction
, inlineElementNames
) where
import Prelude
-import Control.Monad (mplus, unless, when, (>=>))
+import Control.Monad (mplus, (>=>))
import Control.Monad.Catch (finally)
-import Text.Pandoc.Definition
+import Data.Data (Data, DataType, dataTypeConstrs, dataTypeName, dataTypeOf,
+ showConstr, toConstr, tyconUQname)
import Data.Foldable (foldrM)
import Data.Map (Map)
-import qualified Data.Map as Map
-import qualified Foreign.Lua as Lua
-import Foreign.Lua (FromLuaStack (peek), Lua, StackIndex,
- Status (OK), ToLuaStack (push))
+import Foreign.Lua (Lua, Peekable, Pushable)
+import Text.Pandoc.Definition
+import Text.Pandoc.Lua.StackInstances ()
import Text.Pandoc.Walk (walkM, Walkable)
-import Data.Data (Data, DataType, dataTypeConstrs, dataTypeName, dataTypeOf,
- showConstr, toConstr, tyconUQname)
-import Text.Pandoc.Lua.StackInstances()
-import Text.Pandoc.Lua.Util (typeCheck)
-type FunctionMap = Map String LuaFilterFunction
-
-newtype LuaFilterFunction = LuaFilterFunction { functionIndex :: Int }
-
-instance ToLuaStack LuaFilterFunction where
- push = pushFilterFunction
-
-instance FromLuaStack LuaFilterFunction where
- peek = registerFilterFunction
-
-newtype LuaFilter = LuaFilter FunctionMap
-
-instance FromLuaStack LuaFilter where
- peek idx =
- let constrs = metaFilterName : pandocFilterNames
- ++ blockElementNames
- ++ inlineElementNames
- fn c acc = do
- Lua.getfield idx c
- filterFn <- Lua.tryLua (peek (-1))
- Lua.pop 1
+import qualified Data.Map.Strict as Map
+import qualified Foreign.Lua as Lua
+import qualified Text.Pandoc.Lua.Util as LuaUtil
+
+-- | Filter function stored in the registry
+newtype LuaFilterFunction = LuaFilterFunction Lua.Reference
+
+-- | Collection of filter functions (at most one function per element
+-- constructor)
+newtype LuaFilter = LuaFilter (Map String LuaFilterFunction)
+
+instance Peekable LuaFilter where
+ peek idx = do
+ let constrs = metaFilterName
+ : pandocFilterNames
+ ++ blockElementNames
+ ++ inlineElementNames
+ let go constr acc = do
+ Lua.getfield idx constr
+ filterFn <- registerFilterFunction
return $ case filterFn of
- Left _ -> acc
- Right f -> (c, f) : acc
- in LuaFilter . Map.fromList <$> foldrM fn [] constrs
-
--- | Push the filter function to the top of the stack.
+ Nothing -> acc
+ Just fn -> Map.insert constr fn acc
+ LuaFilter <$> foldrM go Map.empty constrs
+
+-- | Register the function at the top of the stack as a filter function in the
+-- registry.
+registerFilterFunction :: Lua (Maybe LuaFilterFunction)
+registerFilterFunction = do
+ isFn <- Lua.isfunction Lua.stackTop
+ if isFn
+ then Just . LuaFilterFunction <$> Lua.ref Lua.registryindex
+ else Nothing <$ Lua.pop 1
+
+-- | Retrieve filter function from registry and push it to the top of the stack.
pushFilterFunction :: LuaFilterFunction -> Lua ()
-pushFilterFunction lf =
- -- The function is stored in a lua registry table, retrieve it from there.
- Lua.rawgeti Lua.registryindex (functionIndex lf)
-
-registerFilterFunction :: StackIndex -> Lua LuaFilterFunction
-registerFilterFunction idx = do
- isFn <- Lua.isfunction idx
- unless isFn . Lua.throwLuaError $ "Not a function at index " ++ show idx
- Lua.pushvalue idx
- refIdx <- Lua.ref Lua.registryindex
- return $ LuaFilterFunction refIdx
-
-elementOrList :: FromLuaStack a => a -> Lua [a]
+pushFilterFunction (LuaFilterFunction fnRef) =
+ Lua.getref Lua.registryindex fnRef
+
+
+elementOrList :: Peekable a => a -> Lua [a]
elementOrList x = do
let topOfStack = Lua.stackTop
elementUnchanged <- Lua.isnil topOfStack
@@ -77,12 +100,10 @@ elementOrList x = do
mbres <- Lua.peekEither topOfStack
case mbres of
Right res -> [res] <$ Lua.pop 1
- Left _ -> do
- typeCheck Lua.stackTop Lua.TypeTable
- Lua.toList topOfStack `finally` Lua.pop 1
+ Left _ -> Lua.peekList topOfStack `finally` Lua.pop 1
-- | Try running a filter for the given element
-tryFilter :: (Data a, FromLuaStack a, ToLuaStack a)
+tryFilter :: (Data a, Peekable a, Pushable a)
=> LuaFilter -> a -> Lua [a]
tryFilter (LuaFilter fnMap) x =
let filterFnName = showConstr (toConstr x)
@@ -96,14 +117,11 @@ tryFilter (LuaFilter fnMap) x =
-- called with given element as argument and is expected to return an element.
-- Alternatively, the function can return nothing or nil, in which case the
-- element is left unchanged.
-runFilterFunction :: ToLuaStack a => LuaFilterFunction -> a -> Lua ()
+runFilterFunction :: Pushable a => LuaFilterFunction -> a -> Lua ()
runFilterFunction lf x = do
pushFilterFunction lf
- push x
- z <- Lua.pcall 1 1 Nothing
- when (z /= OK) $ do
- let addPrefix = ("Error while running filter function: " ++)
- Lua.throwTopMessageAsError' addPrefix
+ Lua.push x
+ LuaUtil.callWithTraceback 1 1
walkMWithLuaFilter :: LuaFilter -> Pandoc -> Lua Pandoc
walkMWithLuaFilter f =
@@ -156,7 +174,7 @@ metaFilterName = "Meta"
pandocFilterNames :: [String]
pandocFilterNames = ["Pandoc", "Doc"]
-singleElement :: FromLuaStack a => a -> Lua a
+singleElement :: Peekable a => a -> Lua a
singleElement x = do
elementUnchanged <- Lua.isnil (-1)
if elementUnchanged
@@ -167,6 +185,6 @@ singleElement x = do
Right res -> res <$ Lua.pop 1
Left err -> do
Lua.pop 1
- Lua.throwLuaError $
+ Lua.throwException $
"Error while trying to get a filter's return " ++
"value from lua stack.\n" ++ err
diff --git a/src/Text/Pandoc/Lua/Init.hs b/src/Text/Pandoc/Lua/Init.hs
index c8c7fdfbd..35611d481 100644
--- a/src/Text/Pandoc/Lua/Init.hs
+++ b/src/Text/Pandoc/Lua/Init.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
{-
Copyright © 2017-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -16,6 +15,7 @@ You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
+{-# LANGUAGE NoImplicitPrelude #-}
{- |
Module : Text.Pandoc.Lua
Copyright : Copyright © 2017-2018 Albert Krewinkel
@@ -40,7 +40,7 @@ import Control.Monad.Trans (MonadIO (..))
import Data.Data (Data, dataTypeConstrs, dataTypeOf, showConstr)
import Data.IORef (newIORef, readIORef)
import Data.Version (Version (versionBranch))
-import Foreign.Lua (Lua, LuaException (..))
+import Foreign.Lua (Lua)
import GHC.IO.Encoding (getForeignEncoding, setForeignEncoding, utf8)
import Paths_pandoc (version)
import Text.Pandoc.Class (PandocIO, getCommonState, getUserDataDir, getMediaBag,
@@ -54,17 +54,22 @@ import qualified Foreign.Lua as Lua
import qualified Foreign.Lua.Module.Text as Lua
import qualified Text.Pandoc.Definition as Pandoc
+-- | Lua error message
+newtype LuaException = LuaException String deriving (Show)
+
-- | Run the lua interpreter, using pandoc's default way of environment
--- initalization.
+-- initialization.
runPandocLua :: Lua a -> PandocIO (Either LuaException a)
runPandocLua luaOp = do
luaPkgParams <- luaPackageParams
enc <- liftIO $ getForeignEncoding <* setForeignEncoding utf8
- res <- liftIO $ Lua.runLuaEither (initLuaState luaPkgParams *> luaOp)
+ res <- liftIO $ Lua.runEither (initLuaState luaPkgParams *> luaOp)
liftIO $ setForeignEncoding enc
newMediaBag <- liftIO (readIORef (luaPkgMediaBag luaPkgParams))
setMediaBag newMediaBag
- return res
+ return $ case res of
+ Left (Lua.Exception msg) -> Left (LuaException msg)
+ Right x -> Right x
-- | Generate parameters required to setup pandoc's lua environment.
luaPackageParams :: PandocIO LuaPackageParams
diff --git a/src/Text/Pandoc/Lua/Module/MediaBag.hs b/src/Text/Pandoc/Lua/Module/MediaBag.hs
index f48fe56c5..150c06cc8 100644
--- a/src/Text/Pandoc/Lua/Module/MediaBag.hs
+++ b/src/Text/Pandoc/Lua/Module/MediaBag.hs
@@ -87,7 +87,7 @@ mediaDirectoryFn mbRef = do
zipWithM_ addEntry [1..] dirContents
return 1
where
- addEntry :: Int -> (FilePath, MimeType, Int) -> Lua ()
+ addEntry :: Lua.Integer -> (FilePath, MimeType, Int) -> Lua ()
addEntry idx (fp, mimeType, contentLength) = do
Lua.newtable
Lua.push "path" *> Lua.push fp *> Lua.rawset (-3)
diff --git a/src/Text/Pandoc/Lua/Module/Pandoc.hs b/src/Text/Pandoc/Lua/Module/Pandoc.hs
index 8cb630d7b..769b04b9e 100644
--- a/src/Text/Pandoc/Lua/Module/Pandoc.hs
+++ b/src/Text/Pandoc/Lua/Module/Pandoc.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
{-
Copyright © 2017-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -17,6 +16,7 @@ along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE NoImplicitPrelude #-}
{- |
Module : Text.Pandoc.Lua.Module.Pandoc
Copyright : Copyright © 2017-2018 Albert Krewinkel
@@ -36,13 +36,12 @@ import Control.Monad (when)
import Data.Default (Default (..))
import Data.Maybe (fromMaybe)
import Data.Text (pack)
-import Foreign.Lua (ToLuaStack, FromLuaStack, Lua, NumResults, Optional, liftIO)
+import Foreign.Lua (Lua, NumResults, Optional, Peekable, Pushable)
import System.Exit (ExitCode (..))
import Text.Pandoc.Class (runIO)
import Text.Pandoc.Definition (Block, Inline)
import Text.Pandoc.Lua.Filter (walkInlines, walkBlocks, LuaFilter)
import Text.Pandoc.Lua.StackInstances ()
-import Text.Pandoc.Lua.Util (addFunction, addValue, loadScriptFromDataDir)
import Text.Pandoc.Walk (Walkable)
import Text.Pandoc.Options (ReaderOptions (readerExtensions))
import Text.Pandoc.Process (pipeProcess)
@@ -51,19 +50,20 @@ import Text.Pandoc.Readers (Reader (..), getReader)
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Lazy.Char8 as BSL
import qualified Foreign.Lua as Lua
+import qualified Text.Pandoc.Lua.Util as LuaUtil
-- | Push the "pandoc" on the lua stack. Requires the `list` module to be
-- loaded.
pushModule :: Maybe FilePath -> Lua NumResults
pushModule datadir = do
- loadScriptFromDataDir datadir "pandoc.lua"
- addFunction "read" readDoc
- addFunction "pipe" pipeFn
- addFunction "walk_block" walkBlock
- addFunction "walk_inline" walkInline
+ LuaUtil.loadScriptFromDataDir datadir "pandoc.lua"
+ LuaUtil.addFunction "read" readDoc
+ LuaUtil.addFunction "pipe" pipeFn
+ LuaUtil.addFunction "walk_block" walkBlock
+ LuaUtil.addFunction "walk_inline" walkInline
return 1
-walkElement :: (ToLuaStack a, Walkable [Inline] a, Walkable [Block] a)
+walkElement :: (Pushable a, Walkable [Inline] a, Walkable [Block] a)
=> a -> LuaFilter -> Lua a
walkElement x f = walkInlines f x >>= walkBlocks f
@@ -81,7 +81,8 @@ readDoc content formatSpecOrNil = do
Right (reader, es) ->
case reader of
TextReader r -> do
- res <- liftIO $ runIO $ r def{ readerExtensions = es } (pack content)
+ res <- Lua.liftIO . runIO $
+ r def{ readerExtensions = es } (pack content)
case res of
Right pd -> (1 :: NumResults) <$ Lua.push pd -- success, push Pandoc
Left s -> Lua.raiseError (show s) -- error while reading
@@ -93,7 +94,7 @@ pipeFn :: String
-> BL.ByteString
-> Lua NumResults
pipeFn command args input = do
- (ec, output) <- liftIO $ pipeProcess Nothing command args input
+ (ec, output) <- Lua.liftIO $ pipeProcess Nothing command args input
case ec of
ExitSuccess -> 1 <$ Lua.push output
ExitFailure n -> Lua.raiseError (PipeError command n output)
@@ -104,26 +105,26 @@ data PipeError = PipeError
, pipeErrorOutput :: BL.ByteString
}
-instance FromLuaStack PipeError where
+instance Peekable PipeError where
peek idx =
PipeError
<$> (Lua.getfield idx "command" *> Lua.peek (-1) <* Lua.pop 1)
<*> (Lua.getfield idx "error_code" *> Lua.peek (-1) <* Lua.pop 1)
<*> (Lua.getfield idx "output" *> Lua.peek (-1) <* Lua.pop 1)
-instance ToLuaStack PipeError where
+instance Pushable PipeError where
push pipeErr = do
Lua.newtable
- addValue "command" (pipeErrorCommand pipeErr)
- addValue "error_code" (pipeErrorCode pipeErr)
- addValue "output" (pipeErrorOutput pipeErr)
+ LuaUtil.addField "command" (pipeErrorCommand pipeErr)
+ LuaUtil.addField "error_code" (pipeErrorCode pipeErr)
+ LuaUtil.addField "output" (pipeErrorOutput pipeErr)
pushPipeErrorMetaTable
Lua.setmetatable (-2)
where
pushPipeErrorMetaTable :: Lua ()
pushPipeErrorMetaTable = do
v <- Lua.newmetatable "pandoc pipe error"
- when v $ addFunction "__tostring" pipeErrorMessage
+ when v $ LuaUtil.addFunction "__tostring" pipeErrorMessage
pipeErrorMessage :: PipeError -> Lua BL.ByteString
pipeErrorMessage (PipeError cmd errorCode output) = return $ mconcat
diff --git a/src/Text/Pandoc/Lua/Module/Utils.hs b/src/Text/Pandoc/Lua/Module/Utils.hs
index 7fa4616be..030d6af95 100644
--- a/src/Text/Pandoc/Lua/Module/Utils.hs
+++ b/src/Text/Pandoc/Lua/Module/Utils.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
{-
Copyright © 2017-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -16,6 +15,7 @@ You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
+{-# LANGUAGE NoImplicitPrelude #-}
{- |
Module : Text.Pandoc.Lua.Module.Utils
Copyright : Copyright © 2017-2018 Albert Krewinkel
@@ -33,15 +33,16 @@ module Text.Pandoc.Lua.Module.Utils
import Prelude
import Control.Applicative ((<|>))
import Data.Default (def)
-import Foreign.Lua (FromLuaStack, Lua, LuaInteger, NumResults)
+import Foreign.Lua (Peekable, Lua, NumResults)
import Text.Pandoc.Class (runIO, setUserDataDir)
import Text.Pandoc.Definition (Pandoc, Meta, MetaValue, Block, Inline)
import Text.Pandoc.Lua.StackInstances ()
-import Text.Pandoc.Lua.Util (addFunction, popValue)
+import Text.Pandoc.Lua.Util (addFunction)
import qualified Data.Digest.Pure.SHA as SHA
import qualified Data.ByteString.Lazy as BSL
import qualified Foreign.Lua as Lua
+import qualified Text.Pandoc.Builder as B
import qualified Text.Pandoc.Filter.JSON as JSONFilter
import qualified Text.Pandoc.Shared as Shared
@@ -49,6 +50,7 @@ import qualified Text.Pandoc.Shared as Shared
pushModule :: Maybe FilePath -> Lua NumResults
pushModule mbDatadir = do
Lua.newtable
+ addFunction "blocks_to_inlines" blocksToInlines
addFunction "hierarchicalize" hierarchicalize
addFunction "normalize_date" normalizeDate
addFunction "run_json_filter" (runJSONFilter mbDatadir)
@@ -57,6 +59,14 @@ pushModule mbDatadir = do
addFunction "to_roman_numeral" toRomanNumeral
return 1
+-- | Squashes a list of blocks into inlines.
+blocksToInlines :: [Block] -> Lua.Optional [Inline] -> Lua [Inline]
+blocksToInlines blks optSep = do
+ let sep = case Lua.fromOptional optSep of
+ Just x -> B.fromList x
+ Nothing -> Shared.defaultBlocksSeparator
+ return $ B.toList (Shared.blocksToInlinesWithSep sep blks)
+
-- | Convert list of Pandoc blocks into (hierarchical) list of Elements.
hierarchicalize :: [Block] -> Lua [Shared.Element]
hierarchicalize = return . Shared.hierarchicalize
@@ -79,7 +89,7 @@ runJSONFilter mbDatadir doc filterFile optArgs = do
Just x -> return x
Nothing -> do
Lua.getglobal "FORMAT"
- (:[]) <$> popValue
+ (:[]) <$> Lua.popValue
filterRes <- Lua.liftIO . runIO $ do
setUserDataDir mbDatadir
JSONFilter.apply def args filterFile doc
@@ -111,18 +121,18 @@ data AstElement
| MetaValueElement MetaValue
deriving (Show)
-instance FromLuaStack AstElement where
+instance Peekable AstElement where
peek idx = do
- res <- Lua.tryLua $ (PandocElement <$> Lua.peek idx)
- <|> (InlineElement <$> Lua.peek idx)
- <|> (BlockElement <$> Lua.peek idx)
- <|> (MetaElement <$> Lua.peek idx)
- <|> (MetaValueElement <$> Lua.peek idx)
+ res <- Lua.try $ (PandocElement <$> Lua.peek idx)
+ <|> (InlineElement <$> Lua.peek idx)
+ <|> (BlockElement <$> Lua.peek idx)
+ <|> (MetaElement <$> Lua.peek idx)
+ <|> (MetaValueElement <$> Lua.peek idx)
case res of
Right x -> return x
- Left _ -> Lua.throwLuaError
+ Left _ -> Lua.throwException
"Expected an AST element, but could not parse value as such."
-- | Convert a number < 4000 to uppercase roman numeral.
-toRomanNumeral :: LuaInteger -> Lua String
+toRomanNumeral :: Lua.Integer -> Lua String
toRomanNumeral = return . Shared.toRomanNumeral . fromIntegral
diff --git a/src/Text/Pandoc/Lua/Packages.hs b/src/Text/Pandoc/Lua/Packages.hs
index 59637826e..5cf11f5c5 100644
--- a/src/Text/Pandoc/Lua/Packages.hs
+++ b/src/Text/Pandoc/Lua/Packages.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
{-
Copyright © 2017-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -16,8 +15,9 @@ You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
-{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE ScopedTypeVariables #-}
{- |
Module : Text.Pandoc.Lua.Packages
Copyright : Copyright © 2017-2018 Albert Krewinkel
@@ -35,12 +35,11 @@ module Text.Pandoc.Lua.Packages
import Prelude
import Control.Monad (forM_)
-import Data.ByteString.Char8 (unpack)
+import Data.ByteString (ByteString)
import Data.IORef (IORef)
import Foreign.Lua (Lua, NumResults, liftIO)
import Text.Pandoc.Class (CommonState, readDataFile, runIO, setUserDataDir)
import Text.Pandoc.MediaBag (MediaBag)
-import Text.Pandoc.Lua.Util (dostring')
import qualified Foreign.Lua as Lua
import Text.Pandoc.Lua.Module.Pandoc as Pandoc
@@ -57,14 +56,10 @@ data LuaPackageParams = LuaPackageParams
-- | Insert pandoc's package loader as the first loader, making it the default.
installPandocPackageSearcher :: LuaPackageParams -> Lua ()
installPandocPackageSearcher luaPkgParams = do
- luaVersion <- Lua.getglobal "_VERSION" *> Lua.peek (-1)
- if luaVersion == "Lua 5.1"
- then Lua.getglobal' "package.loaders"
- else Lua.getglobal' "package.searchers"
+ Lua.getglobal' "package.searchers"
shiftArray
Lua.pushHaskellFunction (pandocPackageSearcher luaPkgParams)
- Lua.wrapHaskellFunction
- Lua.rawseti (-2) 1
+ Lua.rawseti (Lua.nthFromTop 2) 1
Lua.pop 1 -- remove 'package.searchers' from stack
where
shiftArray = forM_ [4, 3, 2, 1] $ \i -> do
@@ -86,7 +81,6 @@ pandocPackageSearcher luaPkgParams pkgName =
where
pushWrappedHsFun f = do
Lua.pushHaskellFunction f
- Lua.wrapHaskellFunction
return 1
searchPureLuaLoader = do
let filename = pkgName ++ ".lua"
@@ -97,21 +91,19 @@ pandocPackageSearcher luaPkgParams pkgName =
Lua.push ("no file '" ++ filename ++ "' in pandoc's datadir")
return 1
-loadStringAsPackage :: String -> String -> Lua NumResults
+loadStringAsPackage :: String -> ByteString -> Lua NumResults
loadStringAsPackage pkgName script = do
- status <- dostring' script
+ status <- Lua.dostring script
if status == Lua.OK
then return (1 :: NumResults)
else do
- msg <- Lua.peek (-1) <* Lua.pop 1
- Lua.push ("Error while loading ``" ++ pkgName ++ "`.\n" ++ msg)
- Lua.lerror
- return (2 :: NumResults)
+ msg <- Lua.popValue
+ Lua.raiseError ("Error while loading `" <> pkgName <> "`.\n" <> msg)
--- | Get the string representation of the pandoc module
-dataDirScript :: Maybe FilePath -> FilePath -> IO (Maybe String)
+-- | Get the ByteString representation of the pandoc module.
+dataDirScript :: Maybe FilePath -> FilePath -> IO (Maybe ByteString)
dataDirScript datadir moduleFile = do
res <- runIO $ setUserDataDir datadir >> readDataFile moduleFile
return $ case res of
Left _ -> Nothing
- Right s -> Just (unpack s)
+ Right s -> Just s
diff --git a/src/Text/Pandoc/Lua/StackInstances.hs b/src/Text/Pandoc/Lua/StackInstances.hs
index 3298079c5..931b8c225 100644
--- a/src/Text/Pandoc/Lua/StackInstances.hs
+++ b/src/Text/Pandoc/Lua/StackInstances.hs
@@ -1,4 +1,8 @@
-{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE LambdaCase #-}
+{-# OPTIONS_GHC -fno-warn-orphans #-}
{-
Copyright © 2012-2018 John MacFarlane <jgm@berkeley.edu>
2017-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -17,10 +21,6 @@ You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
-{-# LANGUAGE FlexibleInstances #-}
-{-# LANGUAGE LambdaCase #-}
-{-# LANGUAGE ScopedTypeVariables #-}
-{-# OPTIONS_GHC -fno-warn-orphans #-}
{- |
Module : Text.Pandoc.Lua.StackInstances
Copyright : © 2012-2018 John MacFarlane
@@ -37,148 +37,125 @@ module Text.Pandoc.Lua.StackInstances () where
import Prelude
import Control.Applicative ((<|>))
import Control.Monad (when)
-import Control.Monad.Catch (finally)
import Data.Data (showConstr, toConstr)
-import Data.Foldable (forM_)
-import Foreign.Lua (FromLuaStack (peek), Lua, LuaInteger, LuaNumber, StackIndex,
- ToLuaStack (push), Type (..), throwLuaError, tryLua)
+import Foreign.Lua (Lua, Peekable, Pushable, StackIndex)
import Text.Pandoc.Definition
import Text.Pandoc.Extensions (Extensions)
-import Text.Pandoc.Lua.Util (getTable, getTag, pushViaConstructor, typeCheck)
+import Text.Pandoc.Lua.Util (defineHowTo, pushViaConstructor)
import Text.Pandoc.Options (ReaderOptions (..), TrackChanges)
-import Text.Pandoc.Shared (Element (Blk, Sec), safeRead)
+import Text.Pandoc.Shared (Element (Blk, Sec))
-import qualified Foreign.Lua as Lua
import qualified Data.Set as Set
+import qualified Foreign.Lua as Lua
import qualified Text.Pandoc.Lua.Util as LuaUtil
-defineHowTo :: String -> Lua a -> Lua a
-defineHowTo ctx op = op `Lua.modifyLuaError` (("Could not " ++ ctx ++ ": ") ++)
-
-instance ToLuaStack Pandoc where
+instance Pushable Pandoc where
push (Pandoc meta blocks) =
pushViaConstructor "Pandoc" blocks meta
-instance FromLuaStack Pandoc where
+instance Peekable Pandoc where
peek idx = defineHowTo "get Pandoc value" $ do
- typeCheck idx Lua.TypeTable
- blocks <- getTable idx "blocks"
- meta <- Lua.getfield idx "meta" *> (Lua.peek Lua.stackTop `finally` Lua.pop 1)
+ blocks <- LuaUtil.rawField idx "blocks"
+ meta <- LuaUtil.rawField idx "meta"
return $ Pandoc meta blocks
-instance ToLuaStack Meta where
+instance Pushable Meta where
push (Meta mmap) =
pushViaConstructor "Meta" mmap
-instance FromLuaStack Meta where
- peek idx = defineHowTo "get Meta value" $ do
- typeCheck idx Lua.TypeTable
- Meta <$> peek idx
+instance Peekable Meta where
+ peek idx = defineHowTo "get Meta value" $
+ Meta <$> Lua.peek idx
-instance ToLuaStack MetaValue where
+instance Pushable MetaValue where
push = pushMetaValue
-instance FromLuaStack MetaValue where
+instance Peekable MetaValue where
peek = peekMetaValue
-instance ToLuaStack Block where
+instance Pushable Block where
push = pushBlock
-instance FromLuaStack Block where
+instance Peekable Block where
peek = peekBlock
-- Inline
-instance ToLuaStack Inline where
+instance Pushable Inline where
push = pushInline
-instance FromLuaStack Inline where
+instance Peekable Inline where
peek = peekInline
-- Citation
-instance ToLuaStack Citation where
+instance Pushable Citation where
push (Citation cid prefix suffix mode noteNum hash) =
pushViaConstructor "Citation" cid mode prefix suffix noteNum hash
-instance FromLuaStack Citation where
+instance Peekable Citation where
peek idx = do
- id' <- getTable idx "id"
- prefix <- getTable idx "prefix"
- suffix <- getTable idx "suffix"
- mode <- getTable idx "mode"
- num <- getTable idx "note_num"
- hash <- getTable idx "hash"
+ id' <- LuaUtil.rawField idx "id"
+ prefix <- LuaUtil.rawField idx "prefix"
+ suffix <- LuaUtil.rawField idx "suffix"
+ mode <- LuaUtil.rawField idx "mode"
+ num <- LuaUtil.rawField idx "note_num"
+ hash <- LuaUtil.rawField idx "hash"
return $ Citation id' prefix suffix mode num hash
-instance ToLuaStack Alignment where
- push = push . show
-instance FromLuaStack Alignment where
- peek idx = safeRead' =<< peek idx
-
-instance ToLuaStack CitationMode where
- push = push . show
-instance FromLuaStack CitationMode where
- peek idx = safeRead' =<< peek idx
-
-instance ToLuaStack Format where
- push (Format f) = push f
-instance FromLuaStack Format where
- peek idx = Format <$> peek idx
-
-instance ToLuaStack ListNumberDelim where
- push = push . show
-instance FromLuaStack ListNumberDelim where
- peek idx = safeRead' =<< peek idx
-
-instance ToLuaStack ListNumberStyle where
- push = push . show
-instance FromLuaStack ListNumberStyle where
- peek idx = safeRead' =<< peek idx
-
-instance ToLuaStack MathType where
- push = push . show
-instance FromLuaStack MathType where
- peek idx = safeRead' =<< peek idx
-
-instance ToLuaStack QuoteType where
- push = push . show
-instance FromLuaStack QuoteType where
- peek idx = safeRead' =<< peek idx
-
-instance ToLuaStack Double where
- push = push . (realToFrac :: Double -> LuaNumber)
-instance FromLuaStack Double where
- peek = fmap (realToFrac :: LuaNumber -> Double) . peek
-
-instance ToLuaStack Int where
- push = push . (fromIntegral :: Int -> LuaInteger)
-instance FromLuaStack Int where
- peek = fmap (fromIntegral :: LuaInteger-> Int) . peek
-
-safeRead' :: Read a => String -> Lua a
-safeRead' s = case safeRead s of
- Nothing -> throwLuaError ("Could not read: " ++ s)
- Just x -> return x
+instance Pushable Alignment where
+ push = Lua.push . show
+instance Peekable Alignment where
+ peek = Lua.peekRead
+
+instance Pushable CitationMode where
+ push = Lua.push . show
+instance Peekable CitationMode where
+ peek = Lua.peekRead
+
+instance Pushable Format where
+ push (Format f) = Lua.push f
+instance Peekable Format where
+ peek idx = Format <$> Lua.peek idx
+
+instance Pushable ListNumberDelim where
+ push = Lua.push . show
+instance Peekable ListNumberDelim where
+ peek = Lua.peekRead
+
+instance Pushable ListNumberStyle where
+ push = Lua.push . show
+instance Peekable ListNumberStyle where
+ peek = Lua.peekRead
+
+instance Pushable MathType where
+ push = Lua.push . show
+instance Peekable MathType where
+ peek = Lua.peekRead
+
+instance Pushable QuoteType where
+ push = Lua.push . show
+instance Peekable QuoteType where
+ peek = Lua.peekRead
-- | Push an meta value element to the top of the lua stack.
pushMetaValue :: MetaValue -> Lua ()
pushMetaValue = \case
MetaBlocks blcks -> pushViaConstructor "MetaBlocks" blcks
- MetaBool bool -> push bool
+ MetaBool bool -> Lua.push bool
MetaInlines inlns -> pushViaConstructor "MetaInlines" inlns
MetaList metalist -> pushViaConstructor "MetaList" metalist
MetaMap metamap -> pushViaConstructor "MetaMap" metamap
- MetaString str -> push str
+ MetaString str -> Lua.push str
-- | Interpret the value at the given stack index as meta value.
peekMetaValue :: StackIndex -> Lua MetaValue
peekMetaValue idx = defineHowTo "get MetaValue" $ do
-- Get the contents of an AST element.
- let elementContent :: FromLuaStack a => Lua a
- elementContent = peek idx
+ let elementContent :: Peekable a => Lua a
+ elementContent = Lua.peek idx
luatype <- Lua.ltype idx
case luatype of
- TypeBoolean -> MetaBool <$> peek idx
- TypeString -> MetaString <$> peek idx
- TypeTable -> do
- tag <- tryLua $ getTag idx
+ Lua.TypeBoolean -> MetaBool <$> Lua.peek idx
+ Lua.TypeString -> MetaString <$> Lua.peek idx
+ Lua.TypeTable -> do
+ tag <- Lua.try $ LuaUtil.getTag idx
case tag of
Right "MetaBlocks" -> MetaBlocks <$> elementContent
Right "MetaBool" -> MetaBool <$> elementContent
@@ -186,16 +163,16 @@ peekMetaValue idx = defineHowTo "get MetaValue" $ do
Right "MetaInlines" -> MetaInlines <$> elementContent
Right "MetaList" -> MetaList <$> elementContent
Right "MetaString" -> MetaString <$> elementContent
- Right t -> throwLuaError ("Unknown meta tag: " ++ t)
+ Right t -> Lua.throwException ("Unknown meta tag: " <> t)
Left _ -> do
-- no meta value tag given, try to guess.
len <- Lua.rawlen idx
if len <= 0
- then MetaMap <$> peek idx
- else (MetaInlines <$> peek idx)
- <|> (MetaBlocks <$> peek idx)
- <|> (MetaList <$> peek idx)
- _ -> throwLuaError "could not get meta value"
+ then MetaMap <$> Lua.peek idx
+ else (MetaInlines <$> Lua.peek idx)
+ <|> (MetaBlocks <$> Lua.peek idx)
+ <|> (MetaList <$> Lua.peek idx)
+ _ -> Lua.throwException "could not get meta value"
-- | Push an block element to the top of the lua stack.
pushBlock :: Block -> Lua ()
@@ -219,8 +196,7 @@ pushBlock = \case
-- | Return the value at the given index as block if possible.
peekBlock :: StackIndex -> Lua Block
peekBlock idx = defineHowTo "get Block value" $ do
- typeCheck idx Lua.TypeTable
- tag <- getTag idx
+ tag <- LuaUtil.getTag idx
case tag of
"BlockQuote" -> BlockQuote <$> elementContent
"BulletList" -> BulletList <$> elementContent
@@ -239,11 +215,11 @@ peekBlock idx = defineHowTo "get Block value" $ do
"Table" -> (\(capt, aligns, widths, headers, body) ->
Table capt aligns widths headers body)
<$> elementContent
- _ -> throwLuaError ("Unknown block type: " ++ tag)
+ _ -> Lua.throwException ("Unknown block type: " <> tag)
where
-- Get the contents of an AST element.
- elementContent :: FromLuaStack a => Lua a
- elementContent = getTable idx "c"
+ elementContent :: Peekable a => Lua a
+ elementContent = LuaUtil.rawField idx "c"
-- | Push an inline element to the top of the lua stack.
pushInline :: Inline -> Lua ()
@@ -271,8 +247,7 @@ pushInline = \case
-- | Return the value at the given index as inline if possible.
peekInline :: StackIndex -> Lua Inline
peekInline idx = defineHowTo "get Inline value" $ do
- typeCheck idx Lua.TypeTable
- tag <- getTag idx
+ tag <- LuaUtil.getTag idx
case tag of
"Cite" -> uncurry Cite <$> elementContent
"Code" -> withAttr Code <$> elementContent
@@ -295,11 +270,11 @@ peekInline idx = defineHowTo "get Inline value" $ do
"Strong" -> Strong <$> elementContent
"Subscript" -> Subscript <$> elementContent
"Superscript"-> Superscript <$> elementContent
- _ -> throwLuaError ("Unknown inline type: " ++ tag)
+ _ -> Lua.throwException ("Unknown inline type: " <> tag)
where
-- Get the contents of an AST element.
- elementContent :: FromLuaStack a => Lua a
- elementContent = getTable idx "c"
+ elementContent :: Peekable a => Lua a
+ elementContent = LuaUtil.rawField idx "c"
withAttr :: (Attr -> a -> b) -> (LuaAttr, a) -> b
withAttr f (attributes, x) = f (fromLuaAttr attributes) x
@@ -307,25 +282,25 @@ withAttr f (attributes, x) = f (fromLuaAttr attributes) x
-- | Wrapper for Attr
newtype LuaAttr = LuaAttr { fromLuaAttr :: Attr }
-instance ToLuaStack LuaAttr where
+instance Pushable LuaAttr where
push (LuaAttr (id', classes, kv)) =
pushViaConstructor "Attr" id' classes kv
-instance FromLuaStack LuaAttr where
- peek idx = defineHowTo "get Attr value" (LuaAttr <$> peek idx)
+instance Peekable LuaAttr where
+ peek idx = defineHowTo "get Attr value" (LuaAttr <$> Lua.peek idx)
--
-- Hierarchical elements
--
-instance ToLuaStack Element where
- push (Blk blk) = push blk
+instance Pushable Element where
+ push (Blk blk) = Lua.push blk
push (Sec lvl num attr label contents) = do
Lua.newtable
- LuaUtil.addValue "level" lvl
- LuaUtil.addValue "numbering" num
- LuaUtil.addValue "attr" (LuaAttr attr)
- LuaUtil.addValue "label" label
- LuaUtil.addValue "contents" contents
+ LuaUtil.addField "level" lvl
+ LuaUtil.addField "numbering" num
+ LuaUtil.addField "attr" (LuaAttr attr)
+ LuaUtil.addField "label" label
+ LuaUtil.addField "contents" contents
pushSecMetaTable
Lua.setmetatable (-2)
where
@@ -333,7 +308,7 @@ instance ToLuaStack Element where
pushSecMetaTable = do
inexistant <- Lua.newmetatable "PandocElementSec"
when inexistant $ do
- LuaUtil.addValue "t" "Sec"
+ LuaUtil.addField "t" "Sec"
Lua.push "__index"
Lua.pushvalue (-2)
Lua.rawset (-3)
@@ -342,18 +317,13 @@ instance ToLuaStack Element where
--
-- Reader Options
--
-instance ToLuaStack Extensions where
- push exts = push (show exts)
-
-instance ToLuaStack TrackChanges where
- push = push . showConstr . toConstr
+instance Pushable Extensions where
+ push exts = Lua.push (show exts)
-instance ToLuaStack a => ToLuaStack (Set.Set a) where
- push set = do
- Lua.newtable
- forM_ set (`LuaUtil.addValue` True)
+instance Pushable TrackChanges where
+ push = Lua.push . showConstr . toConstr
-instance ToLuaStack ReaderOptions where
+instance Pushable ReaderOptions where
push ro = do
let ReaderOptions
(extensions :: Extensions)
@@ -367,12 +337,12 @@ instance ToLuaStack ReaderOptions where
(stripComments :: Bool)
= ro
Lua.newtable
- LuaUtil.addValue "extensions" extensions
- LuaUtil.addValue "standalone" standalone
- LuaUtil.addValue "columns" columns
- LuaUtil.addValue "tabStop" tabStop
- LuaUtil.addValue "indentedCodeClasses" indentedCodeClasses
- LuaUtil.addValue "abbreviations" abbreviations
- LuaUtil.addValue "defaultImageExtension" defaultImageExtension
- LuaUtil.addValue "trackChanges" trackChanges
- LuaUtil.addValue "stripComments" stripComments
+ LuaUtil.addField "extensions" extensions
+ LuaUtil.addField "standalone" standalone
+ LuaUtil.addField "columns" columns
+ LuaUtil.addField "tabStop" tabStop
+ LuaUtil.addField "indentedCodeClasses" indentedCodeClasses
+ LuaUtil.addField "abbreviations" abbreviations
+ LuaUtil.addField "defaultImageExtension" defaultImageExtension
+ LuaUtil.addField "trackChanges" trackChanges
+ LuaUtil.addField "stripComments" stripComments
diff --git a/src/Text/Pandoc/Lua/Util.hs b/src/Text/Pandoc/Lua/Util.hs
index ea9ec2554..77b27b88e 100644
--- a/src/Text/Pandoc/Lua/Util.hs
+++ b/src/Text/Pandoc/Lua/Util.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
{-
Copyright © 2012-2018 John MacFarlane <jgm@berkeley.edu>
2017-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -18,6 +17,8 @@ along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE NoImplicitPrelude #-}
{- |
Module : Text.Pandoc.Lua.Util
Copyright : © 2012–2018 John MacFarlane,
@@ -31,101 +32,53 @@ Lua utility functions.
-}
module Text.Pandoc.Lua.Util
( getTag
- , getTable
- , addValue
+ , rawField
+ , addField
, addFunction
- , getRawInt
- , setRawInt
- , addRawInt
- , typeCheck
- , raiseError
- , popValue
- , PushViaCall
- , pushViaCall
+ , addValue
, pushViaConstructor
, loadScriptFromDataDir
- , dostring'
+ , defineHowTo
+ , throwTopMessageAsError'
+ , callWithTraceback
+ , dofileWithTraceback
) where
import Prelude
-import Control.Monad (when)
-import Control.Monad.Catch (finally)
-import Data.ByteString.Char8 (unpack)
-import Foreign.Lua (FromLuaStack (..), NumResults, Lua, NumArgs, StackIndex,
- ToLuaStack (..), ToHaskellFunction)
-import Foreign.Lua.Api (Status, call, pop, rawget, rawgeti, rawset, rawseti)
+import Control.Monad (unless, when)
+import Foreign.Lua ( Lua, NumArgs, NumResults, Peekable, Pushable, StackIndex
+ , Status, ToHaskellFunction )
import Text.Pandoc.Class (readDataFile, runIOorExplode, setUserDataDir)
import qualified Foreign.Lua as Lua
-
--- | Adjust the stack index, assuming that @n@ new elements have been pushed on
--- the stack.
-adjustIndexBy :: StackIndex -> StackIndex -> StackIndex
-adjustIndexBy idx n =
- if idx < 0
- then idx - n
- else idx
+import qualified Text.Pandoc.UTF8 as UTF8
-- | Get value behind key from table at given index.
-getTable :: (ToLuaStack a, FromLuaStack b) => StackIndex -> a -> Lua b
-getTable idx key = do
- push key
- rawget (idx `adjustIndexBy` 1)
- popValue
+rawField :: Peekable a => StackIndex -> String -> Lua a
+rawField idx key = do
+ absidx <- Lua.absindex idx
+ Lua.push key
+ Lua.rawget absidx
+ Lua.popValue
+
+-- | Add a value to the table at the top of the stack at a string-index.
+addField :: Pushable a => String -> a -> Lua ()
+addField = addValue
-- | Add a key-value pair to the table at the top of the stack.
-addValue :: (ToLuaStack a, ToLuaStack b) => a -> b -> Lua ()
+addValue :: (Pushable a, Pushable b) => a -> b -> Lua ()
addValue key value = do
- push key
- push value
- rawset (-3)
+ Lua.push key
+ Lua.push value
+ Lua.rawset (Lua.nthFromTop 3)
-- | Add a function to the table at the top of the stack, using the given name.
addFunction :: ToHaskellFunction a => String -> a -> Lua ()
addFunction name fn = do
Lua.push name
Lua.pushHaskellFunction fn
- Lua.wrapHaskellFunction
Lua.rawset (-3)
--- | Get value behind key from table at given index.
-getRawInt :: FromLuaStack a => StackIndex -> Int -> Lua a
-getRawInt idx key = do
- rawgeti idx key
- popValue
-
--- | Set numeric key/value in table at the given index
-setRawInt :: ToLuaStack a => StackIndex -> Int -> a -> Lua ()
-setRawInt idx key value = do
- push value
- rawseti (idx `adjustIndexBy` 1) key
-
--- | Set numeric key/value in table at the top of the stack.
-addRawInt :: ToLuaStack a => Int -> a -> Lua ()
-addRawInt = setRawInt (-1)
-
-typeCheck :: StackIndex -> Lua.Type -> Lua ()
-typeCheck idx expected = do
- actual <- Lua.ltype idx
- when (actual /= expected) $ do
- expName <- Lua.typename expected
- actName <- Lua.typename actual
- Lua.throwLuaError $ "expected " ++ expName ++ " but got " ++ actName ++ "."
-
-raiseError :: ToLuaStack a => a -> Lua NumResults
-raiseError e = do
- Lua.push e
- fromIntegral <$> Lua.lerror
-
--- | Get, then pop the value at the top of the stack.
-popValue :: FromLuaStack a => Lua a
-popValue = do
- resOrError <- Lua.peekEither (-1)
- pop 1
- case resOrError of
- Left err -> Lua.throwLuaError err
- Right x -> return x
-
-- | Helper class for pushing a single value to the stack via a lua function.
-- See @pushViaCall@.
class PushViaCall a where
@@ -136,11 +89,11 @@ instance PushViaCall (Lua ()) where
Lua.push fn
Lua.rawget Lua.registryindex
pushArgs
- call num 1
+ Lua.call num 1
-instance (ToLuaStack a, PushViaCall b) => PushViaCall (a -> b) where
+instance (Pushable a, PushViaCall b) => PushViaCall (a -> b) where
pushViaCall' fn pushArgs num x =
- pushViaCall' fn (pushArgs *> push x) (num + 1)
+ pushViaCall' fn (pushArgs *> Lua.push x) (num + 1)
-- | Push an value to the stack via a lua function. The lua function is called
-- with all arguments that are passed to this function and is expected to return
@@ -155,26 +108,11 @@ pushViaConstructor pandocFn = pushViaCall ("pandoc." ++ pandocFn)
-- | Load a file from pandoc's data directory.
loadScriptFromDataDir :: Maybe FilePath -> FilePath -> Lua ()
loadScriptFromDataDir datadir scriptFile = do
- script <- fmap unpack . Lua.liftIO . runIOorExplode $
+ script <- Lua.liftIO . runIOorExplode $
setUserDataDir datadir >> readDataFile scriptFile
- status <- dostring' script
- when (status /= Lua.OK) .
- Lua.throwTopMessageAsError' $ \msg ->
- "Couldn't load '" ++ scriptFile ++ "'.\n" ++ msg
-
--- | Load a string and immediately perform a full garbage collection. This is
--- important to keep the program from hanging: If the program contained a call
--- to @require@, the a new loader function was created which then become
--- garbage. If that function is collected at an inopportune times, i.e. when the
--- Lua API is called via a function that doesn't allow calling back into Haskell
--- (getraw, setraw, …), then the function's finalizer, and the full program,
--- will hang.
-dostring' :: String -> Lua Status
-dostring' script = do
- loadRes <- Lua.loadstring script
- if loadRes == Lua.OK
- then Lua.pcall 0 1 Nothing <* Lua.gc Lua.GCCOLLECT 0
- else return loadRes
+ status <- Lua.dostring script
+ when (status /= Lua.OK) $
+ throwTopMessageAsError' (("Couldn't load '" ++ scriptFile ++ "'.\n") ++)
-- | Get the tag of a value. This is an optimized and specialized version of
-- @Lua.getfield idx "tag"@. It only checks for the field on the table at index
@@ -182,8 +120,54 @@ dostring' script = do
-- metatable.
getTag :: StackIndex -> Lua String
getTag idx = do
- top <- Lua.gettop
- hasMT <- Lua.getmetatable idx
- push "tag"
- if hasMT then Lua.rawget (-2) else Lua.rawget (idx `adjustIndexBy` 1)
- peek Lua.stackTop `finally` Lua.settop top
+ -- push metatable or just the table
+ Lua.getmetatable idx >>= \hasMT -> unless hasMT (Lua.pushvalue idx)
+ Lua.push "tag"
+ Lua.rawget (Lua.nthFromTop 2)
+ Lua.tostring Lua.stackTop <* Lua.pop 2 >>= \case
+ Nothing -> Lua.throwException "untagged value"
+ Just x -> return (UTF8.toString x)
+
+-- | Modify the message at the top of the stack before throwing it as an
+-- Exception.
+throwTopMessageAsError' :: (String -> String) -> Lua a
+throwTopMessageAsError' modifier = do
+ msg <- Lua.tostring' Lua.stackTop
+ Lua.pop 2 -- remove error and error string pushed by tostring'
+ Lua.throwException (modifier (UTF8.toString msg))
+
+-- | Mark the context of a Lua computation for better error reporting.
+defineHowTo :: String -> Lua a -> Lua a
+defineHowTo ctx = Lua.withExceptionMessage (("Could not " <> ctx <> ": ") <>)
+
+-- | Like @'Lua.pcall'@, but uses a predefined error handler which adds a
+-- traceback on error.
+pcallWithTraceback :: NumArgs -> NumResults -> Lua Status
+pcallWithTraceback nargs nresults = do
+ let traceback' :: Lua NumResults
+ traceback' = do
+ l <- Lua.state
+ msg <- Lua.tostring' (Lua.nthFromBottom 1)
+ Lua.traceback l (Just (UTF8.toString msg)) 2
+ return 1
+ tracebackIdx <- Lua.absindex (Lua.nthFromTop (Lua.fromNumArgs nargs + 1))
+ Lua.pushHaskellFunction traceback'
+ Lua.insert tracebackIdx
+ result <- Lua.pcall nargs nresults (Just tracebackIdx)
+ Lua.remove tracebackIdx
+ return result
+
+-- | Like @'Lua.call'@, but adds a traceback to the error message (if any).
+callWithTraceback :: NumArgs -> NumResults -> Lua ()
+callWithTraceback nargs nresults = do
+ result <- pcallWithTraceback nargs nresults
+ when (result /= Lua.OK) Lua.throwTopMessage
+
+-- | Run the given string as a Lua program, while also adding a traceback to the
+-- error message if an error occurs.
+dofileWithTraceback :: FilePath -> Lua Status
+dofileWithTraceback fp = do
+ loadRes <- Lua.loadfile fp
+ case loadRes of
+ Lua.OK -> pcallWithTraceback 0 Lua.multret
+ _ -> return loadRes
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index e5ca1764c..204060d70 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -194,6 +194,7 @@ data WriterOptions = WriterOptions
, writerReferenceDoc :: Maybe FilePath -- ^ Path to reference document if specified
, writerReferenceLocation :: ReferenceLocation -- ^ Location of footnotes and references for writing markdown
, writerSyntaxMap :: SyntaxMap
+ , writerPreferAscii :: Bool -- ^ Prefer ASCII representations of characters when possible
} deriving (Show, Data, Typeable, Generic)
instance Default WriterOptions where
@@ -228,6 +229,7 @@ instance Default WriterOptions where
, writerReferenceDoc = Nothing
, writerReferenceLocation = EndOfDocument
, writerSyntaxMap = defaultSyntaxMap
+ , writerPreferAscii = False
}
instance HasSyntaxExtensions WriterOptions where
diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index b171d65b0..3484699c0 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -79,13 +79,52 @@ changePathSeparators = intercalate "/" . splitDirectories
#endif
makePDF :: String -- ^ pdf creator (pdflatex, lualatex, xelatex,
- -- wkhtmltopdf, weasyprint, prince, context, pdfroff)
+ -- wkhtmltopdf, weasyprint, prince, context, pdfroff,
+ -- or path to executable)
-> [String] -- ^ arguments to pass to pdf creator
-> (WriterOptions -> Pandoc -> PandocIO Text) -- ^ writer
-> WriterOptions -- ^ options
-> Pandoc -- ^ document
-> PandocIO (Either ByteString ByteString)
-makePDF "wkhtmltopdf" pdfargs writer opts doc@(Pandoc meta _) = do
+makePDF program pdfargs writer opts doc = do
+ case takeBaseName program of
+ "wkhtmltopdf" -> makeWithWkhtmltopdf program pdfargs writer opts doc
+ prog | prog `elem` ["weasyprint", "prince"] -> do
+ source <- writer opts doc
+ verbosity <- getVerbosity
+ liftIO $ html2pdf verbosity program pdfargs source
+ "pdfroff" -> do
+ source <- writer opts doc
+ let args = ["-ms", "-mpdfmark", "-e", "-t", "-k", "-KUTF-8", "-i",
+ "--no-toc-relocation"] ++ pdfargs
+ verbosity <- getVerbosity
+ liftIO $ ms2pdf verbosity program args source
+ baseProg -> do
+ -- With context and latex, we create a temp directory within
+ -- the working directory, since pdflatex sometimes tries to
+ -- use tools like epstopdf.pl, which are restricted if run
+ -- on files outside the working directory.
+ let withTemp = withTempDirectory "."
+ commonState <- getCommonState
+ verbosity <- getVerbosity
+ liftIO $ withTemp "tex2pdf." $ \tmpdir -> do
+ source <- runIOorExplode $ do
+ putCommonState commonState
+ doc' <- handleImages tmpdir doc
+ writer opts doc'
+ case baseProg of
+ "context" -> context2pdf verbosity program tmpdir source
+ prog | prog `elem` ["pdflatex", "lualatex", "xelatex"]
+ -> tex2pdf verbosity program pdfargs tmpdir source
+ _ -> return $ Left $ UTF8.fromStringLazy $ "Unknown program " ++ program
+
+makeWithWkhtmltopdf :: String -- ^ wkhtmltopdf or path
+ -> [String] -- ^ arguments
+ -> (WriterOptions -> Pandoc -> PandocIO Text) -- ^ writer
+ -> WriterOptions -- ^ options
+ -> Pandoc -- ^ document
+ -> PandocIO (Either ByteString ByteString)
+makeWithWkhtmltopdf program pdfargs writer opts doc@(Pandoc meta _) = do
let mathArgs = case writerHTMLMathMethod opts of
-- with MathJax, wait til all math is rendered:
MathJax _ -> ["--run-script", "MathJax.Hub.Register.StartupHook('End Typeset', function() { window.status = 'mathjax_loaded' });",
@@ -111,39 +150,7 @@ makePDF "wkhtmltopdf" pdfargs writer opts doc@(Pandoc meta _) = do
]
source <- writer opts doc
verbosity <- getVerbosity
- liftIO $ html2pdf verbosity "wkhtmltopdf" args source
-makePDF "weasyprint" pdfargs writer opts doc = do
- source <- writer opts doc
- verbosity <- getVerbosity
- liftIO $ html2pdf verbosity "weasyprint" pdfargs source
-makePDF "prince" pdfargs writer opts doc = do
- source <- writer opts doc
- verbosity <- getVerbosity
- liftIO $ html2pdf verbosity "prince" pdfargs source
-makePDF "pdfroff" pdfargs writer opts doc = do
- source <- writer opts doc
- let args = ["-ms", "-mpdfmark", "-e", "-t", "-k", "-KUTF-8", "-i",
- "--no-toc-relocation"] ++ pdfargs
- verbosity <- getVerbosity
- liftIO $ ms2pdf verbosity args source
-makePDF program pdfargs writer opts doc = do
- -- With context and latex, we create a temp directory within
- -- the working directory, since pdflatex sometimes tries to
- -- use tools like epstopdf.pl, which are restricted if run
- -- on files outside the working directory.
- let withTemp = withTempDirectory "."
- commonState <- getCommonState
- verbosity <- getVerbosity
- liftIO $ withTemp "tex2pdf." $ \tmpdir -> do
- source <- runIOorExplode $ do
- putCommonState commonState
- doc' <- handleImages tmpdir doc
- writer opts doc'
- case takeBaseName program of
- "context" -> context2pdf verbosity tmpdir source
- prog | prog `elem` ["pdflatex", "lualatex", "xelatex"]
- -> tex2pdf' verbosity pdfargs tmpdir program source
- _ -> return $ Left $ UTF8.fromStringLazy $ "Unknown program " ++ program
+ liftIO $ html2pdf verbosity program args source
handleImages :: FilePath -- ^ temp dir to store images
-> Pandoc -- ^ document
@@ -181,7 +188,7 @@ convertImage tmpdir fname =
then return $ Right pdfOut
else return $ Left "conversion from SVG failed")
(\(e :: E.SomeException) -> return $ Left $
- "check that rsvg2pdf is in path.\n" ++
+ "check that rsvg-convert is in path.\n" ++
show e)
_ -> JP.readImage fname >>= \res ->
case res of
@@ -195,13 +202,13 @@ convertImage tmpdir fname =
mime = getMimeType fname
doNothing = return (Right fname)
-tex2pdf' :: Verbosity -- ^ Verbosity level
- -> [String] -- ^ Arguments to the latex-engine
- -> FilePath -- ^ temp directory for output
- -> String -- ^ tex program
- -> Text -- ^ tex source
- -> IO (Either ByteString ByteString)
-tex2pdf' verbosity args tmpDir program source = do
+tex2pdf :: Verbosity -- ^ Verbosity level
+ -> String -- ^ tex program
+ -> [String] -- ^ Arguments to the latex-engine
+ -> FilePath -- ^ temp directory for output
+ -> Text -- ^ tex source
+ -> IO (Either ByteString ByteString)
+tex2pdf verbosity program args tmpDir source = do
let numruns = if "\\tableofcontents" `T.isInfixOf` source
then 3 -- to get page numbers
else 2 -- 1 run won't give you PDF bookmarks
@@ -278,12 +285,7 @@ runTeXProgram verbosity program args runNumber numRuns tmpDir source = do
let file' = file
#endif
let programArgs = ["-halt-on-error", "-interaction", "nonstopmode",
- "-output-directory", tmpDir'] ++
- -- see #4484, only compress images on last run:
- if program == "xelatex" && runNumber < numRuns
- then ["-output-driver", "xdvipdfmx -z0"]
- else []
- ++ args ++ [file']
+ "-output-directory", tmpDir'] ++ args ++ [file']
env' <- getEnvironment
let sep = [searchPathSeparator]
let texinputs = maybe (tmpDir' ++ sep) ((tmpDir' ++ sep) ++)
@@ -307,7 +309,7 @@ runTeXProgram verbosity program args runNumber numRuns tmpDir source = do
putStrLn $ "[makePDF] Run #" ++ show runNumber
BL.hPutStr stdout out
putStr "\n"
- if runNumber <= numRuns
+ if runNumber < numRuns
then runTeXProgram verbosity program args (runNumber + 1) numRuns tmpDir source
else do
let pdfFile = replaceDirectory (replaceExtension file ".pdf") tmpDir
@@ -328,14 +330,15 @@ runTeXProgram verbosity program args runNumber numRuns tmpDir source = do
return (exit, log', pdf)
ms2pdf :: Verbosity
+ -> String
-> [String]
-> Text
-> IO (Either ByteString ByteString)
-ms2pdf verbosity args source = do
+ms2pdf verbosity program args source = do
env' <- getEnvironment
when (verbosity >= INFO) $ do
putStrLn "[makePDF] Command line:"
- putStrLn $ "pdfroff " ++ " " ++ unwords (map show args)
+ putStrLn $ program ++ " " ++ unwords (map show args)
putStr "\n"
putStrLn "[makePDF] Environment:"
mapM_ print env'
@@ -344,11 +347,11 @@ ms2pdf verbosity args source = do
putStr $ T.unpack source
putStr "\n"
(exit, out) <- E.catch
- (pipeProcess (Just env') "pdfroff" args
+ (pipeProcess (Just env') program args
(BL.fromStrict $ UTF8.fromText source))
(\(e :: IOError) -> if isDoesNotExistError e
then E.throwIO $
- PandocPDFProgramNotFoundError "pdfroff"
+ PandocPDFProgramNotFoundError program
else E.throwIO e)
when (verbosity >= INFO) $ do
BL.hPutStr stdout out
@@ -358,7 +361,7 @@ ms2pdf verbosity args source = do
ExitSuccess -> Right out
html2pdf :: Verbosity -- ^ Verbosity level
- -> String -- ^ Program (wkhtmltopdf, weasyprint or prince)
+ -> String -- ^ Program (wkhtmltopdf, weasyprint, prince, or path)
-> [String] -- ^ Args to program
-> Text -- ^ HTML5 source
-> IO (Either ByteString ByteString)
@@ -369,7 +372,7 @@ html2pdf verbosity program args source = do
file <- withTempFile "." "html2pdf.html" $ \fp _ -> return fp
pdfFile <- withTempFile "." "html2pdf.pdf" $ \fp _ -> return fp
BS.writeFile file $ UTF8.fromText source
- let pdfFileArgName = ["-o" | program == "prince"]
+ let pdfFileArgName = ["-o" | takeBaseName program == "prince"]
let programArgs = args ++ [file] ++ pdfFileArgName ++ [pdfFile]
env' <- getEnvironment
when (verbosity >= INFO) $ do
@@ -408,10 +411,11 @@ html2pdf verbosity program args source = do
(ExitSuccess, Just pdf) -> Right pdf
context2pdf :: Verbosity -- ^ Verbosity level
+ -> String -- ^ "context" or path to it
-> FilePath -- ^ temp directory for output
-> Text -- ^ ConTeXt source
-> IO (Either ByteString ByteString)
-context2pdf verbosity tmpDir source = inDirectory tmpDir $ do
+context2pdf verbosity program tmpDir source = inDirectory tmpDir $ do
let file = "input.tex"
BS.writeFile file $ UTF8.fromText source
#ifdef _WINDOWS
@@ -426,7 +430,7 @@ context2pdf verbosity tmpDir source = inDirectory tmpDir $ do
putStrLn "[makePDF] temp dir:"
putStrLn tmpDir'
putStrLn "[makePDF] Command line:"
- putStrLn $ "context" ++ " " ++ unwords (map show programArgs)
+ putStrLn $ program ++ " " ++ unwords (map show programArgs)
putStr "\n"
putStrLn "[makePDF] Environment:"
mapM_ print env'
@@ -435,7 +439,7 @@ context2pdf verbosity tmpDir source = inDirectory tmpDir $ do
BL.readFile file >>= BL.putStr
putStr "\n"
(exit, out) <- E.catch
- (pipeProcess (Just env') "context" programArgs BL.empty)
+ (pipeProcess (Just env') program programArgs BL.empty)
(\(e :: IOError) -> if isDoesNotExistError e
then E.throwIO $
PandocPDFProgramNotFoundError "context"
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 05f4f7d36..5d95d0e27 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -514,22 +514,19 @@ charsInBalanced open close parser = try $ do
-- Auxiliary functions for romanNumeral:
-lowercaseRomanDigits :: [Char]
-lowercaseRomanDigits = ['i','v','x','l','c','d','m']
-
-uppercaseRomanDigits :: [Char]
-uppercaseRomanDigits = map toUpper lowercaseRomanDigits
-
-- | Parses a roman numeral (uppercase or lowercase), returns number.
romanNumeral :: Stream s m Char => Bool -- ^ Uppercase if true
-> ParserT s st m Int
romanNumeral upperCase = do
- let romanDigits = if upperCase
- then uppercaseRomanDigits
- else lowercaseRomanDigits
- lookAhead $ oneOf romanDigits
- let [one, five, ten, fifty, hundred, fivehundred, thousand] =
- map char romanDigits
+ let rchar uc = char $ if upperCase then uc else toLower uc
+ let one = rchar 'I'
+ let five = rchar 'V'
+ let ten = rchar 'X'
+ let fifty = rchar 'L'
+ let hundred = rchar 'C'
+ let fivehundred = rchar 'D'
+ let thousand = rchar 'M'
+ lookAhead $ choice [one, five, ten, fifty, hundred, fivehundred, thousand]
thousands <- ((1000 *) . length) <$> many thousand
ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900
fivehundreds <- option 0 $ 500 <$ fivehundred
@@ -1289,7 +1286,7 @@ type SubstTable = M.Map Key Inlines
-- unique identifier, and update the list of identifiers
-- in state. Issue a warning if an explicit identifier
-- is encountered that duplicates an earlier identifier
--- (explict or automatically generated).
+-- (explicit or automatically generated).
registerHeader :: (Stream s m a, HasReaderOptions st,
HasHeaderMap st, HasLogMessages st, HasIdentifierList st)
=> Attr -> Inlines -> ParserT s st m Attr
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index 79a4abbc2..9c4f7a8ac 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -39,10 +39,12 @@ import Control.Monad.State
import Data.Char (isAlphaNum, isLetter, isSpace, toLower)
import Data.List (groupBy)
import qualified Data.Map as Map
+import Data.Maybe (mapMaybe)
import Data.Text (Text, unpack)
+import Text.Pandoc.Asciify (toAsciiChar)
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
-import Text.Pandoc.Emoji (emojis)
+import Text.Pandoc.Emoji (emojiToInline)
import Text.Pandoc.Options
import Text.Pandoc.Shared (stringify)
import Text.Pandoc.Walk (walkM)
@@ -51,7 +53,7 @@ import Text.Pandoc.Walk (walkM)
readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readCommonMark opts s = return $
(if isEnabled Ext_gfm_auto_identifiers opts
- then addHeaderIdentifiers
+ then addHeaderIdentifiers opts
else id) $
nodeToPandoc opts $ commonmarkToNode opts' exts s
where opts' = [ optSmart | isEnabled Ext_smart opts ]
@@ -59,24 +61,27 @@ readCommonMark opts s = return $
[ extTable | isEnabled Ext_pipe_tables opts ] ++
[ extAutolink | isEnabled Ext_autolink_bare_uris opts ]
-convertEmojis :: String -> String
-convertEmojis (':':xs) =
+convertEmojis :: String -> [Inline]
+convertEmojis s@(':':xs) =
case break (==':') xs of
(ys,':':zs) ->
- case Map.lookup ys emojis of
- Just s -> s ++ convertEmojis zs
- Nothing -> ':' : ys ++ convertEmojis (':':zs)
- _ -> ':':xs
-convertEmojis (x:xs) = x : convertEmojis xs
-convertEmojis [] = []
-
-addHeaderIdentifiers :: Pandoc -> Pandoc
-addHeaderIdentifiers doc = evalState (walkM addHeaderId doc) mempty
-
-addHeaderId :: Block -> State (Map.Map String Int) Block
-addHeaderId (Header lev (_,classes,kvs) ils) = do
+ case emojiToInline ys of
+ Just em -> em : convertEmojis zs
+ Nothing -> Str (':' : ys) : convertEmojis (':':zs)
+ _ -> [Str s]
+convertEmojis s =
+ case break (==':') s of
+ ("","") -> []
+ (_,"") -> [Str s]
+ (xs,ys) -> Str xs:convertEmojis ys
+
+addHeaderIdentifiers :: ReaderOptions -> Pandoc -> Pandoc
+addHeaderIdentifiers opts doc = evalState (walkM (addHeaderId opts) doc) mempty
+
+addHeaderId :: ReaderOptions -> Block -> State (Map.Map String Int) Block
+addHeaderId opts (Header lev (_,classes,kvs) ils) = do
idmap <- get
- let ident = toIdent ils
+ let ident = toIdent opts ils
ident' <- case Map.lookup ident idmap of
Nothing -> do
put (Map.insert ident 1 idmap)
@@ -85,13 +90,16 @@ addHeaderId (Header lev (_,classes,kvs) ils) = do
put (Map.adjust (+ 1) ident idmap)
return (ident ++ "-" ++ show i)
return $ Header lev (ident',classes,kvs) ils
-addHeaderId x = return x
+addHeaderId _ x = return x
-toIdent :: [Inline] -> String
-toIdent = map (\c -> if isSpace c then '-' else c)
- . filter (\c -> isLetter c || isAlphaNum c || isSpace c ||
- c == '_' || c == '-')
- . map toLower . stringify
+toIdent :: ReaderOptions -> [Inline] -> String
+toIdent opts = map (\c -> if isSpace c then '-' else c)
+ . filterer
+ . map toLower . stringify
+ where filterer = if isEnabled Ext_ascii_identifiers opts
+ then mapMaybe toAsciiChar
+ else filter (\c -> isLetter c || isAlphaNum c || isSpace c ||
+ c == '_' || c == '-')
nodeToPandoc :: ReaderOptions -> Node -> Pandoc
nodeToPandoc opts (Node _ DOCUMENT nodes) =
@@ -200,17 +208,17 @@ addInlines :: ReaderOptions -> [Node] -> [Inline]
addInlines opts = foldr (addInline opts) []
addInline :: ReaderOptions -> Node -> [Inline] -> [Inline]
-addInline opts (Node _ (TEXT t) _) = (map toinl clumps ++)
+addInline opts (Node _ (TEXT t) _) = (foldr ((++) . toinl) [] clumps ++)
where raw = unpack t
clumps = groupBy samekind raw
samekind ' ' ' ' = True
samekind ' ' _ = False
samekind _ ' ' = False
samekind _ _ = True
- toinl (' ':_) = Space
- toinl xs = Str $ if isEnabled Ext_emoji opts
- then convertEmojis xs
- else xs
+ toinl (' ':_) = [Space]
+ toinl xs = if isEnabled Ext_emoji opts
+ then convertEmojis xs
+ else [Str xs]
addInline _ (Node _ LINEBREAK _) = (LineBreak :)
addInline opts (Node _ SOFTBREAK _)
| isEnabled Ext_hard_line_breaks opts = (LineBreak :)
diff --git a/src/Text/Pandoc/Readers/Creole.hs b/src/Text/Pandoc/Readers/Creole.hs
index 4fd38c0fd..a337bf937 100644
--- a/src/Text/Pandoc/Readers/Creole.hs
+++ b/src/Text/Pandoc/Readers/Creole.hs
@@ -2,7 +2,7 @@
{-
Copyright (C) 2017 Sascha Wilde <wilde@sha-bang.de>
- partly based on all the other readers, especialy the work by
+ partly based on all the other readers, especially the work by
John MacFarlane <jgm@berkeley.edu> and
Alexander Sulfrian <alexander.sulfrian@fu-berlin.de>
all bugs are solely created by me.
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 3d48c7ee8..b7bd71754 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -537,7 +537,6 @@ type DB m = StateT DBState m
data DBState = DBState{ dbSectionLevel :: Int
, dbQuoteType :: QuoteType
, dbMeta :: Meta
- , dbAcceptsMeta :: Bool
, dbBook :: Bool
, dbFigureTitle :: Inlines
, dbContent :: [Content]
@@ -547,7 +546,6 @@ instance Default DBState where
def = DBState{ dbSectionLevel = 0
, dbQuoteType = DoubleQuote
, dbMeta = mempty
- , dbAcceptsMeta = False
, dbBook = False
, dbFigureTitle = mempty
, dbContent = [] }
@@ -609,18 +607,26 @@ named s e = qName (elName e) == s
--
-acceptingMetadata :: PandocMonad m => DB m a -> DB m a
-acceptingMetadata p = do
- modify (\s -> s { dbAcceptsMeta = True } )
- res <- p
- modify (\s -> s { dbAcceptsMeta = False })
- return res
-
-checkInMeta :: (PandocMonad m, Monoid a) => DB m () -> DB m a
-checkInMeta p = do
- accepts <- dbAcceptsMeta <$> get
- when accepts p
- return mempty
+addMetadataFromElement :: PandocMonad m => Element -> DB m Blocks
+addMetadataFromElement e = do
+ case filterChild (named "title") e of
+ Nothing -> return ()
+ Just z -> do
+ getInlines z >>= addMeta "title"
+ addMetaField "subtitle" z
+ case filterChild (named "authorgroup") e of
+ Nothing -> return ()
+ Just z -> addMetaField "author" z
+ addMetaField "subtitle" e
+ addMetaField "author" e
+ addMetaField "date" e
+ addMetaField "release" e
+ return mempty
+ where addMetaField fieldname elt =
+ case filterChildren (named fieldname) elt of
+ [] -> return ()
+ [z] -> getInlines z >>= addMeta fieldname
+ zs -> mapM getInlines zs >>= addMeta fieldname
addMeta :: PandocMonad m => ToMetaValue a => String -> a -> DB m ()
addMeta field val = modify (setMeta field val)
@@ -718,11 +724,6 @@ parseBlock (Elem e) =
"attribution" -> return mempty
"titleabbrev" -> return mempty
"authorinitials" -> return mempty
- "title" -> checkInMeta getTitle
- "author" -> checkInMeta getAuthor
- "authorgroup" -> checkInMeta getAuthorGroup
- "releaseinfo" -> checkInMeta (getInlines e >>= addMeta "release")
- "date" -> checkInMeta getDate
"bibliography" -> sect 0
"bibliodiv" -> sect 1
"biblioentry" -> parseMixed para (elContent e)
@@ -788,8 +789,8 @@ parseBlock (Elem e) =
"figure" -> getFigure e
"mediaobject" -> para <$> getMediaobject e
"caption" -> return mempty
- "info" -> metaBlock
- "articleinfo" -> metaBlock
+ "info" -> addMetadataFromElement e
+ "articleinfo" -> addMetadataFromElement e
"sectioninfo" -> return mempty -- keywords & other metadata
"refsectioninfo" -> return mempty -- keywords & other metadata
"refsect1info" -> return mempty -- keywords & other metadata
@@ -803,10 +804,11 @@ parseBlock (Elem e) =
"chapterinfo" -> return mempty -- keywords & other metadata
"glossaryinfo" -> return mempty -- keywords & other metadata
"appendixinfo" -> return mempty -- keywords & other metadata
- "bookinfo" -> metaBlock
+ "bookinfo" -> addMetadataFromElement e
"article" -> modify (\st -> st{ dbBook = False }) >>
- getBlocks e
- "book" -> modify (\st -> st{ dbBook = True }) >> getBlocks e
+ addMetadataFromElement e >> getBlocks e
+ "book" -> modify (\st -> st{ dbBook = True }) >>
+ addMetadataFromElement e >> getBlocks e
"table" -> parseTable
"informaltable" -> parseTable
"informalexample" -> divWith ("", ["informalexample"], []) <$>
@@ -816,6 +818,8 @@ parseBlock (Elem e) =
"screen" -> codeBlockWithLang
"programlisting" -> codeBlockWithLang
"?xml" -> return mempty
+ "title" -> return mempty -- handled in parent element
+ "subtitle" -> return mempty -- handled in parent element
_ -> getBlocks e
where parseMixed container conts = do
let (ils,rest) = break isBlockElement conts
@@ -857,19 +861,6 @@ parseBlock (Elem e) =
terms' <- mapM getInlines terms
items' <- mapM getBlocks items
return (mconcat $ intersperse (str "; ") terms', items')
- getTitle = do
- tit <- getInlines e
- subtit <- case filterChild (named "subtitle") e of
- Just s -> (text ": " <>) <$>
- getInlines s
- Nothing -> return mempty
- addMeta "title" (tit <> subtit)
-
- getAuthor = (:[]) <$> getInlines e >>= addMeta "author"
- getAuthorGroup = do
- let terms = filterChildren (named "author") e
- mapM getInlines terms >>= addMeta "author"
- getDate = getInlines e >>= addMeta "date"
parseTable = do
let isCaption x = named "title" x || named "caption" x
caption <- case filterChild isCaption e of
@@ -935,7 +926,6 @@ parseBlock (Elem e) =
modify $ \st -> st{ dbSectionLevel = n - 1 }
return $ headerWith (ident,[],[]) n' headerText <> b
lineItems = mapM getInlines $ filterChildren (named "line") e
- metaBlock = acceptingMetadata (getBlocks e) >> return mempty
getInlines :: PandocMonad m => Element -> DB m Inlines
getInlines e' = (trimInlines . mconcat) <$>
diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs
index 49ea71601..0be363f3d 100644
--- a/src/Text/Pandoc/Readers/Docx/Lists.hs
+++ b/src/Text/Pandoc/Readers/Docx/Lists.hs
@@ -183,14 +183,13 @@ blocksToDefinitions' defAcc acc
pair = if remainingAttr2 == ("", [], []) then (concatMap plainParaInlines blks1, [blks2]) else (concatMap plainParaInlines blks1, [[Div remainingAttr2 blks2]])
in
blocksToDefinitions' (pair : defAcc) acc blks
-blocksToDefinitions' defAcc acc
+blocksToDefinitions' ((defTerm, defItems):defs) acc
(Div (ident2, classes2, kvs2) blks2 : blks)
- | (not . null) defAcc && "Definition" `elem` classes2 =
+ | "Definition" `elem` classes2 =
let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2)
defItems2 = case remainingAttr2 == ("", [], []) of
True -> blks2
False -> [Div remainingAttr2 blks2]
- ((defTerm, defItems):defs) = defAcc
defAcc' = case null defItems of
True -> (defTerm, [defItems2]) : defs
False -> (defTerm, init defItems ++ [last defItems ++ defItems2]) : defs
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 4c4c06073..b4e52de14 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -785,7 +785,7 @@ So we do this in a number of steps. If we encounter the fldchar begin
tag, we start open a fldchar state variable (see state above). We add
the instrtext to it as FieldInfo. Then we close that and start adding
the runs when we get to separate. Then when we get to end, we produce
-the Field type with approriate FieldInfo and Runs.
+the Field type with appropriate FieldInfo and Runs.
-}
elemToParPart ns element
| isElem ns "w" "r" element
@@ -1056,8 +1056,10 @@ elemToRunStyle ns element parentStyle
| Just rPr <- findChildByName ns "w" "rPr" element =
RunStyle
{
- isBold = checkOnOff ns rPr (elemName ns "w" "b")
- , isItalic = checkOnOff ns rPr (elemName ns "w" "i")
+ isBold = checkOnOff ns rPr (elemName ns "w" "b") `mplus`
+ checkOnOff ns rPr (elemName ns "w" "bCs")
+ , isItalic = checkOnOff ns rPr (elemName ns "w" "i") `mplus`
+ checkOnOff ns rPr (elemName ns "w" "iCs")
, isSmallCaps = checkOnOff ns rPr (elemName ns "w" "smallCaps")
, isStrike = checkOnOff ns rPr (elemName ns "w" "strike")
, rVertAlign =
@@ -1153,8 +1155,9 @@ getSymChar :: NameSpaces -> Element -> RunElem
getSymChar ns element
| Just s <- lowerFromPrivate <$> getCodepoint
, Just font <- getFont =
- let [(char, _)] = readLitChar ("\\x" ++ s) in
- TextRun . maybe "" (:[]) $ getUnicode font char
+ case readLitChar ("\\x" ++ s) of
+ [(char, _)] -> TextRun . maybe "" (:[]) $ getUnicode font char
+ _ -> TextRun ""
where
getCodepoint = findAttrByName ns "w" "char" element
getFont = stringToFont =<< findAttrByName ns "w" "font" element
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index c26447641..bfc3fc3ee 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -73,7 +73,7 @@ readEPUB opts bytes = case toArchiveOrFail bytes of
-- runEPUB :: Except PandocError a -> Either PandocError a
-- runEPUB = runExcept
--- Note that internal reference are aggresively normalised so that all ids
+-- Note that internal reference are aggressively normalised so that all ids
-- are of the form "filename#id"
--
archiveToEPUB :: (PandocMonad m) => ReaderOptions -> Archive -> m Pandoc
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 32a1ba5a6..b06e07a80 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -68,11 +68,13 @@ import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad (..))
import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps)
import Text.Pandoc.Definition
+import Text.Pandoc.Readers.LaTeX (rawLaTeXInline)
+import Text.Pandoc.Readers.LaTeX.Types (Macro)
import Text.Pandoc.Error
import Text.Pandoc.Logging
import Text.Pandoc.Options (
Extension (Ext_epub_html_exts, Ext_empty_paragraphs, Ext_native_divs,
- Ext_native_spans, Ext_raw_html, Ext_line_blocks),
+ Ext_native_spans, Ext_raw_html, Ext_line_blocks, Ext_raw_tex),
ReaderOptions (readerExtensions, readerStripComments),
extensionEnabled)
import Text.Pandoc.Parsing hiding ((<|>))
@@ -102,7 +104,8 @@ readHtml opts inp = do
(m:_) -> messageString m
result <- flip runReaderT def $
runParserT parseDoc
- (HTMLState def{ stateOptions = opts } [] Nothing Set.empty M.empty [])
+ (HTMLState def{ stateOptions = opts }
+ [] Nothing Set.empty M.empty [] M.empty)
"source" tags
case result of
Right doc -> return doc
@@ -124,7 +127,8 @@ data HTMLState =
baseHref :: Maybe URI,
identifiers :: Set.Set String,
headerMap :: M.Map Inlines String,
- logMessages :: [LogMessage]
+ logMessages :: [LogMessage],
+ macros :: M.Map Text Macro
}
data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext
@@ -659,6 +663,7 @@ inline = choice
, pCode
, pSpan
, pMath False
+ , pScriptMath
, pRawHtmlInline
]
@@ -745,18 +750,18 @@ pLink = try $ do
let uid = fromMaybe (T.unpack $ fromAttrib "name" tag) $
maybeFromAttrib "id" tag
let cls = words $ T.unpack $ fromAttrib "class" tag
- lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a")
+ lab <- mconcat <$> manyTill inline (pCloses "a")
-- check for href; if href, then a link, otherwise a span
case maybeFromAttrib "href" tag of
Nothing ->
- return $ B.spanWith (uid, cls, []) lab
+ return $ extractSpaces (B.spanWith (uid, cls, [])) lab
Just url' -> do
mbBaseHref <- baseHref <$> getState
let url = case (parseURIReference url', mbBaseHref) of
(Just rel, Just bs) ->
show (rel `nonStrictRelativeTo` bs)
_ -> url'
- return $ B.linkWith (uid, cls, []) (escapeURI url) title lab
+ return $ extractSpaces (B.linkWith (uid, cls, []) (escapeURI url) title) lab
pImage :: PandocMonad m => TagParser m Inlines
pImage = do
@@ -818,6 +823,17 @@ toStringAttr :: [(Text, Text)] -> [(String, String)]
toStringAttr = map go
where go (x,y) = (T.unpack x, T.unpack y)
+pScriptMath :: PandocMonad m => TagParser m Inlines
+pScriptMath = try $ do
+ TagOpen _ attr' <- pSatisfy $ tagOpen (=="script") (const True)
+ isdisplay <- case lookup "type" attr' of
+ Just x | "math/tex" `T.isPrefixOf` x
+ -> return $ "display" `T.isSuffixOf` x
+ _ -> mzero
+ contents <- T.unpack . innerText <$>
+ manyTill pAnyTag (pSatisfy (matchTagClose "script"))
+ return $ (if isdisplay then B.displayMath else B.math) contents
+
pMath :: PandocMonad m => Bool -> TagParser m Inlines
pMath inCase = try $ do
open@(TagOpen _ attr') <- pSatisfy $ tagOpen (=="math") (const True)
@@ -852,7 +868,7 @@ pInTags' tagtype tagtest parser = try $ do
pSatisfy (\t -> t ~== TagOpen tagtype [] && tagtest t)
mconcat <$> manyTill parser (pCloses tagtype <|> eof)
--- parses p, preceeded by an optional opening tag
+-- parses p, preceded by an optional opening tag
-- and followed by an optional closing tags
pOptInTag :: PandocMonad m => Text -> TagParser m a -> TagParser m a
pOptInTag tagtype p = try $ do
@@ -907,9 +923,25 @@ pTagContents =
<|> pStr
<|> pSpace
<|> smartPunctuation pTagContents
+ <|> pRawTeX
<|> pSymbol
<|> pBad
+pRawTeX :: PandocMonad m => InlinesParser m Inlines
+pRawTeX = do
+ lookAhead $ try $ do
+ char '\\'
+ choice $ map (try . string) ["begin", "eqref", "ref"]
+ guardEnabled Ext_raw_tex
+ inp <- getInput
+ st <- getState
+ res <- lift $ runParserT (withRaw rawLaTeXInline) st "chunk" (T.unpack inp)
+ case res of
+ Left _ -> mzero
+ Right (contents, raw) -> do
+ _ <- count (length raw) anyChar
+ return $ B.rawInline "tex" contents
+
pStr :: PandocMonad m => InlinesParser m Inlines
pStr = do
result <- many1 $ satisfy $ \c ->
@@ -923,6 +955,7 @@ isSpecial '\'' = True
isSpecial '.' = True
isSpecial '-' = True
isSpecial '$' = True
+isSpecial '\\' = True
isSpecial '\8216' = True
isSpecial '\8217' = True
isSpecial '\8220' = True
@@ -1249,6 +1282,10 @@ isSpace _ = False
-- Instances
+instance HasMacros HTMLState where
+ extractMacros = macros
+ updateMacros f st = st{ macros = f $ macros st }
+
instance HasIdentifierList HTMLState where
extractIdentifierList = identifiers
updateIdentifierList f s = s{ identifiers = f (identifiers s) }
@@ -1281,7 +1318,7 @@ instance HasLastStrPosition HTMLState where
setLastStrPos s st = st {parserState = setLastStrPos s (parserState st)}
getLastStrPos = getLastStrPos . parserState
--- For now we need a special verison here; the one in Shared has String type
+-- For now we need a special version here; the one in Shared has String type
renderTags' :: [Tag Text] -> Text
renderTags' = renderTagsOptions
renderOptions{ optMinimize = matchTags ["hr", "br", "img",
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index 967037e4e..072bab350 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -44,11 +44,7 @@ readHaddockEither :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse
-> Either PandocError Pandoc
readHaddockEither _opts =
-#if MIN_VERSION_haddock_library(1,2,0)
- Right . B.doc . docHToBlocks . _doc . parseParas
-#else
- Right . B.doc . docHToBlocks . parseParas
-#endif
+ Right . B.doc . docHToBlocks . _doc . parseParas Nothing
docHToBlocks :: DocH String Identifier -> Blocks
docHToBlocks d' =
@@ -68,10 +64,8 @@ docHToBlocks d' =
DocEmphasis _ -> inlineFallback
DocMonospaced _ -> inlineFallback
DocBold _ -> inlineFallback
-#if MIN_VERSION_haddock_library(1,4,0)
DocMathInline _ -> inlineFallback
DocMathDisplay _ -> inlineFallback
-#endif
DocHeader h -> B.header (headerLevel h)
(docHToInlines False $ headerTitle h)
DocUnorderedList items -> B.bulletList (map docHToBlocks items)
@@ -87,7 +81,6 @@ docHToBlocks d' =
DocProperty s -> B.codeBlockWith ("",["property","haskell"],[]) (trim s)
DocExamples es -> mconcat $ map (\e ->
makeExample ">>>" (exampleExpression e) (exampleResult e)) es
-#if MIN_VERSION_haddock_library(1,5,0)
DocTable H.Table{ tableHeaderRows = headerRows
, tableBodyRows = bodyRows
}
@@ -100,7 +93,6 @@ docHToBlocks d' =
colspecs = replicate (maximum (map length body))
(AlignDefault, 0.0)
in B.table mempty colspecs header body
-#endif
where inlineFallback = B.plain $ docHToInlines False d'
consolidatePlains = B.fromList . consolidatePlains' . B.toList
@@ -133,10 +125,8 @@ docHToInlines isCode d' =
DocMonospaced (DocString s) -> B.code s
DocMonospaced d -> docHToInlines True d
DocBold d -> B.strong (docHToInlines isCode d)
-#if MIN_VERSION_haddock_library(1,4,0)
DocMathInline s -> B.math s
DocMathDisplay s -> B.displayMath s
-#endif
DocHeader _ -> mempty
DocUnorderedList _ -> mempty
DocOrderedList _ -> mempty
@@ -149,9 +139,7 @@ docHToInlines isCode d' =
DocAName s -> B.spanWith (s,["anchor"],[]) mempty
DocProperty _ -> mempty
DocExamples _ -> mempty
-#if MIN_VERSION_haddock_library(1,5,0)
DocTable _ -> mempty
-#endif
-- | Create an 'Example', stripping superfluous characters as appropriate
makeExample :: String -> String -> [String] -> Blocks
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index 59af76d23..695c86b5d 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -191,7 +191,7 @@ parseBlock (Elem e) =
listType -> do
let start = fromMaybe 1 $
(strContent <$> (filterElement (named "list-item") e
- >>= filterElement (named "lable")))
+ >>= filterElement (named "label")))
>>= safeRead
orderedListWith (start, parseListStyleType listType, DefaultDelim)
<$> listitems
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 39dffde76..7c5619165 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -47,8 +47,7 @@ import Prelude
import Control.Applicative (many, optional, (<|>))
import Control.Monad
import Control.Monad.Except (throwError)
-import Control.Monad.Trans (lift)
-import Data.Char (chr, isAlphaNum, isDigit, isLetter, ord, toLower, toUpper)
+import Data.Char (isDigit, isLetter, toLower, toUpper)
import Data.Default
import Data.List (intercalate, isPrefixOf)
import qualified Data.Map as M
@@ -63,7 +62,7 @@ import Text.Pandoc.Builder
import Text.Pandoc.Class (PandocMonad, PandocPure, getResourcePath, lookupEnv,
readFileFromDirs, report, setResourcePath,
setTranslations, translateTerm, trace)
-import Text.Pandoc.Error (PandocError (PandocMacroLoop, PandocParseError, PandocParsecError))
+import Text.Pandoc.Error (PandocError ( PandocParseError, PandocParsecError))
import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension)
import Text.Pandoc.ImageSize (numUnit, showFl)
import Text.Pandoc.Logging
@@ -71,12 +70,15 @@ import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
optional, space, spaces, withRaw, (<|>))
import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
- Tok (..), TokType (..))
+ ArgSpec (..), Tok (..), TokType (..))
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
+ babelLangToBCP47)
import Text.Pandoc.Shared
import qualified Text.Pandoc.Translations as Translations
import Text.Pandoc.Walk
-import Text.Parsec.Pos
import qualified Text.Pandoc.Builder as B
+import qualified Data.Text.Normalize as Normalize
-- for debugging:
-- import Text.Pandoc.Extensions (getDefaultExtensions)
@@ -137,482 +139,49 @@ resolveRefs _ x = x
-- Left e -> error (show e)
-- Right r -> return r
-newtype HeaderNum = HeaderNum [Int]
- deriving (Show)
-
-renderHeaderNum :: HeaderNum -> String
-renderHeaderNum (HeaderNum xs) =
- intercalate "." (map show xs)
-
-incrementHeaderNum :: Int -> HeaderNum -> HeaderNum
-incrementHeaderNum level (HeaderNum ns) = HeaderNum $
- case reverse (take level (ns ++ repeat 0)) of
- (x:xs) -> reverse (x+1 : xs)
- [] -> [] -- shouldn't happen
-
-data LaTeXState = LaTeXState{ sOptions :: ReaderOptions
- , sMeta :: Meta
- , sQuoteContext :: QuoteContext
- , sMacros :: M.Map Text Macro
- , sContainers :: [String]
- , sHeaders :: M.Map Inlines String
- , sLogMessages :: [LogMessage]
- , sIdentifiers :: Set.Set String
- , sVerbatimMode :: Bool
- , sCaption :: Maybe Inlines
- , sInListItem :: Bool
- , sInTableCell :: Bool
- , sLastHeaderNum :: HeaderNum
- , sLabels :: M.Map String [Inline]
- , sHasChapters :: Bool
- , sToggles :: M.Map String Bool
- }
- deriving Show
-
-defaultLaTeXState :: LaTeXState
-defaultLaTeXState = LaTeXState{ sOptions = def
- , sMeta = nullMeta
- , sQuoteContext = NoQuote
- , sMacros = M.empty
- , sContainers = []
- , sHeaders = M.empty
- , sLogMessages = []
- , sIdentifiers = Set.empty
- , sVerbatimMode = False
- , sCaption = Nothing
- , sInListItem = False
- , sInTableCell = False
- , sLastHeaderNum = HeaderNum []
- , sLabels = M.empty
- , sHasChapters = False
- , sToggles = M.empty
- }
-
-instance PandocMonad m => HasQuoteContext LaTeXState m where
- getQuoteContext = sQuoteContext <$> getState
- withQuoteContext context parser = do
- oldState <- getState
- let oldQuoteContext = sQuoteContext oldState
- setState oldState { sQuoteContext = context }
- result <- parser
- newState <- getState
- setState newState { sQuoteContext = oldQuoteContext }
- return result
-
-instance HasLogMessages LaTeXState where
- addLogMessage msg st = st{ sLogMessages = msg : sLogMessages st }
- getLogMessages st = reverse $ sLogMessages st
-
-instance HasIdentifierList LaTeXState where
- extractIdentifierList = sIdentifiers
- updateIdentifierList f st = st{ sIdentifiers = f $ sIdentifiers st }
-
-instance HasIncludeFiles LaTeXState where
- getIncludeFiles = sContainers
- addIncludeFile f s = s{ sContainers = f : sContainers s }
- dropLatestIncludeFile s = s { sContainers = drop 1 $ sContainers s }
-
-instance HasHeaderMap LaTeXState where
- extractHeaderMap = sHeaders
- updateHeaderMap f st = st{ sHeaders = f $ sHeaders st }
-
-instance HasMacros LaTeXState where
- extractMacros st = sMacros st
- updateMacros f st = st{ sMacros = f (sMacros st) }
-
-instance HasReaderOptions LaTeXState where
- extractReaderOptions = sOptions
-
-instance HasMeta LaTeXState where
- setMeta field val st =
- st{ sMeta = setMeta field val $ sMeta st }
- deleteMeta field st =
- st{ sMeta = deleteMeta field $ sMeta st }
-
-instance Default LaTeXState where
- def = defaultLaTeXState
-
-type LP m = ParserT [Tok] LaTeXState m
-
-withVerbatimMode :: PandocMonad m => LP m a -> LP m a
-withVerbatimMode parser = do
- updateState $ \st -> st{ sVerbatimMode = True }
- result <- parser
- updateState $ \st -> st{ sVerbatimMode = False }
- return result
-
-rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
- => LP m a -> LP m a -> ParserT String s m (a, String)
-rawLaTeXParser parser valParser = do
- inp <- getInput
- let toks = tokenize "source" $ T.pack inp
- pstate <- getState
- let lstate = def{ sOptions = extractReaderOptions pstate }
- let lstate' = lstate { sMacros = extractMacros pstate }
- let rawparser = (,) <$> withRaw valParser <*> getState
- res' <- lift $ runParserT (snd <$> withRaw parser) lstate "chunk" toks
- case res' of
- Left _ -> mzero
- Right toks' -> do
- res <- lift $ runParserT (do doMacros 0
- -- retokenize, applying macros
- ts <- many (satisfyTok (const True))
- setInput ts
- rawparser)
- lstate' "chunk" toks'
- case res of
- Left _ -> mzero
- Right ((val, raw), st) -> do
- updateState (updateMacros (sMacros st <>))
- _ <- takeP (T.length (untokenize toks'))
- return (val, T.unpack (untokenize raw))
-
-applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
- => String -> ParserT String s m String
-applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
- do let retokenize = doMacros 0 *>
- (toksToString <$> many (satisfyTok (const True)))
- pstate <- getState
- let lstate = def{ sOptions = extractReaderOptions pstate
- , sMacros = extractMacros pstate }
- res <- runParserT retokenize lstate "math" (tokenize "math" (T.pack s))
- case res of
- Left e -> fail (show e)
- Right s' -> return s'
rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s)
=> ParserT String s m String
rawLaTeXBlock = do
lookAhead (try (char '\\' >> letter))
- -- we don't want to apply newly defined latex macros to their own
- -- definitions:
- snd <$> rawLaTeXParser (environment <|> macroDef <|> blockCommand) blocks
+ snd <$> (rawLaTeXParser False macroDef blocks
+ <|> (rawLaTeXParser True
+ (do choice (map controlSeq
+ ["include", "input", "subfile", "usepackage"])
+ skipMany opt
+ braced
+ return mempty) blocks)
+ <|> rawLaTeXParser True
+ (environment <|> blockCommand)
+ (mconcat <$> (many (block <|> beginOrEndCommand))))
+
+-- See #4667 for motivation; sometimes people write macros
+-- that just evaluate to a begin or end command, which blockCommand
+-- won't accept.
+beginOrEndCommand :: PandocMonad m => LP m Blocks
+beginOrEndCommand = try $ do
+ Tok _ (CtrlSeq name) txt <- anyControlSeq
+ guard $ name == "begin" || name == "end"
+ (envname, rawargs) <- withRaw braced
+ if M.member (untokenize envname)
+ (inlineEnvironments :: M.Map Text (LP PandocPure Inlines))
+ then mzero
+ else return $ rawBlock "latex"
+ (T.unpack (txt <> untokenize rawargs))
rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s)
=> ParserT String s m String
rawLaTeXInline = do
lookAhead (try (char '\\' >> letter))
- snd <$> rawLaTeXParser (inlineEnvironment <|> inlineCommand') inlines
+ snd <$> ( rawLaTeXParser True
+ (mempty <$ (controlSeq "input" >> skipMany opt >> braced))
+ inlines
+ <|> rawLaTeXParser True (inlineEnvironment <|> inlineCommand') inlines)
inlineCommand :: PandocMonad m => ParserT String ParserState m Inlines
inlineCommand = do
lookAhead (try (char '\\' >> letter))
- fst <$> rawLaTeXParser (inlineEnvironment <|> inlineCommand') inlines
-
-tokenize :: SourceName -> Text -> [Tok]
-tokenize sourcename = totoks (initialPos sourcename)
-
-totoks :: SourcePos -> Text -> [Tok]
-totoks pos t =
- case T.uncons t of
- Nothing -> []
- Just (c, rest)
- | c == '\n' ->
- Tok pos Newline "\n"
- : totoks (setSourceColumn (incSourceLine pos 1) 1) rest
- | isSpaceOrTab c ->
- let (sps, rest') = T.span isSpaceOrTab t
- in Tok pos Spaces sps
- : totoks (incSourceColumn pos (T.length sps))
- rest'
- | isAlphaNum c ->
- let (ws, rest') = T.span isAlphaNum t
- in Tok pos Word ws
- : totoks (incSourceColumn pos (T.length ws)) rest'
- | c == '%' ->
- let (cs, rest') = T.break (== '\n') rest
- in Tok pos Comment ("%" <> cs)
- : totoks (incSourceColumn pos (1 + T.length cs)) rest'
- | c == '\\' ->
- case T.uncons rest of
- Nothing -> [Tok pos (CtrlSeq " ") "\\"]
- Just (d, rest')
- | isLetterOrAt d ->
- -- \makeatletter is common in macro defs;
- -- ideally we should make tokenization sensitive
- -- to \makeatletter and \makeatother, but this is
- -- probably best for now
- let (ws, rest'') = T.span isLetterOrAt rest
- (ss, rest''') = T.span isSpaceOrTab rest''
- in Tok pos (CtrlSeq ws) ("\\" <> ws <> ss)
- : totoks (incSourceColumn pos
- (1 + T.length ws + T.length ss)) rest'''
- | isSpaceOrTab d || d == '\n' ->
- let (w1, r1) = T.span isSpaceOrTab rest
- (w2, (w3, r3)) = case T.uncons r1 of
- Just ('\n', r2)
- -> (T.pack "\n",
- T.span isSpaceOrTab r2)
- _ -> (mempty, (mempty, r1))
- ws = "\\" <> w1 <> w2 <> w3
- in case T.uncons r3 of
- Just ('\n', _) ->
- Tok pos (CtrlSeq " ") ("\\" <> w1)
- : totoks (incSourceColumn pos (T.length ws))
- r1
- _ ->
- Tok pos (CtrlSeq " ") ws
- : totoks (incSourceColumn pos (T.length ws))
- r3
- | otherwise ->
- Tok pos (CtrlSeq (T.singleton d)) (T.pack [c,d])
- : totoks (incSourceColumn pos 2) rest'
- | c == '#' ->
- let (t1, t2) = T.span (\d -> d >= '0' && d <= '9') rest
- in case safeRead (T.unpack t1) of
- Just i ->
- Tok pos (Arg i) ("#" <> t1)
- : totoks (incSourceColumn pos (1 + T.length t1)) t2
- Nothing ->
- Tok pos Symbol "#"
- : totoks (incSourceColumn pos 1) t2
- | c == '^' ->
- case T.uncons rest of
- Just ('^', rest') ->
- case T.uncons rest' of
- Just (d, rest'')
- | isLowerHex d ->
- case T.uncons rest'' of
- Just (e, rest''') | isLowerHex e ->
- Tok pos Esc2 (T.pack ['^','^',d,e])
- : totoks (incSourceColumn pos 4) rest'''
- _ ->
- Tok pos Esc1 (T.pack ['^','^',d])
- : totoks (incSourceColumn pos 3) rest''
- | d < '\128' ->
- Tok pos Esc1 (T.pack ['^','^',d])
- : totoks (incSourceColumn pos 3) rest''
- _ -> Tok pos Symbol "^" :
- Tok (incSourceColumn pos 1) Symbol "^" :
- totoks (incSourceColumn pos 2) rest'
- _ -> Tok pos Symbol "^"
- : totoks (incSourceColumn pos 1) rest
- | otherwise ->
- Tok pos Symbol (T.singleton c) : totoks (incSourceColumn pos 1) rest
-
-isSpaceOrTab :: Char -> Bool
-isSpaceOrTab ' ' = True
-isSpaceOrTab '\t' = True
-isSpaceOrTab _ = False
-
-isLetterOrAt :: Char -> Bool
-isLetterOrAt '@' = True
-isLetterOrAt c = isLetter c
-
-isLowerHex :: Char -> Bool
-isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
-
-untokenize :: [Tok] -> Text
-untokenize = mconcat . map untoken
-
-untoken :: Tok -> Text
-untoken (Tok _ _ t) = t
-
-satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok
-satisfyTok f =
- try $ do
- res <- tokenPrim (T.unpack . untoken) updatePos matcher
- doMacros 0 -- apply macros on remaining input stream
- return res
- where matcher t | f t = Just t
- | otherwise = Nothing
- updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos
- updatePos _spos _ (Tok pos _ _ : _) = pos
- updatePos spos _ [] = incSourceColumn spos 1
-
-doMacros :: PandocMonad m => Int -> LP m ()
-doMacros n = do
- verbatimMode <- sVerbatimMode <$> getState
- unless verbatimMode $ do
- inp <- getInput
- case inp of
- Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
- Tok _ Word name : Tok _ Symbol "}" : ts
- -> handleMacros spos name ts
- Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
- Tok _ Word name : Tok _ Symbol "}" : ts
- -> handleMacros spos ("end" <> name) ts
- Tok _ (CtrlSeq "expandafter") _ : t : ts
- -> do setInput ts
- doMacros n
- getInput >>= setInput . combineTok t
- Tok spos (CtrlSeq name) _ : ts
- -> handleMacros spos name ts
- _ -> return ()
- where combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts)
- | T.all isLetterOrAt w =
- Tok spos (CtrlSeq (name <> w)) (x1 <> w <> x2) : ts
- where (x1, x2) = T.break isSpaceOrTab x
- combineTok t ts = t:ts
- handleMacros spos name ts = do
- macros <- sMacros <$> getState
- case M.lookup name macros of
- Nothing -> return ()
- Just (Macro expansionPoint numargs optarg newtoks) -> do
- setInput ts
- let getarg = try $ spaces >> bracedOrToken
- args <- case optarg of
- Nothing -> count numargs getarg
- Just o ->
- (:) <$> option o bracketedToks
- <*> count (numargs - 1) getarg
- -- first boolean param is true if we're tokenizing
- -- an argument (in which case we don't want to
- -- expand #1 etc.)
- let addTok False (Tok _ (Arg i) _) acc | i > 0
- , i <= numargs =
- foldr (addTok True) acc (args !! (i - 1))
- -- add space if needed after control sequence
- -- see #4007
- addTok _ (Tok _ (CtrlSeq x) txt)
- acc@(Tok _ Word _ : _)
- | not (T.null txt) &&
- isLetter (T.last txt) =
- Tok spos (CtrlSeq x) (txt <> " ") : acc
- addTok _ t acc = setpos spos t : acc
- ts' <- getInput
- setInput $ foldr (addTok False) ts' newtoks
- case expansionPoint of
- ExpandWhenUsed ->
- if n > 20 -- detect macro expansion loops
- then throwError $ PandocMacroLoop (T.unpack name)
- else doMacros (n + 1)
- ExpandWhenDefined -> return ()
-
-
-setpos :: SourcePos -> Tok -> Tok
-setpos spos (Tok _ tt txt) = Tok spos tt txt
-
-anyControlSeq :: PandocMonad m => LP m Tok
-anyControlSeq = satisfyTok isCtrlSeq
- where isCtrlSeq (Tok _ (CtrlSeq _) _) = True
- isCtrlSeq _ = False
-
-anySymbol :: PandocMonad m => LP m Tok
-anySymbol = satisfyTok isSym
- where isSym (Tok _ Symbol _) = True
- isSym _ = False
-
-spaces :: PandocMonad m => LP m ()
-spaces = skipMany (satisfyTok (tokTypeIn [Comment, Spaces, Newline]))
-
-spaces1 :: PandocMonad m => LP m ()
-spaces1 = skipMany1 (satisfyTok (tokTypeIn [Comment, Spaces, Newline]))
-
-tokTypeIn :: [TokType] -> Tok -> Bool
-tokTypeIn toktypes (Tok _ tt _) = tt `elem` toktypes
-
-controlSeq :: PandocMonad m => Text -> LP m Tok
-controlSeq name = satisfyTok isNamed
- where isNamed (Tok _ (CtrlSeq n) _) = n == name
- isNamed _ = False
-
-symbol :: PandocMonad m => Char -> LP m Tok
-symbol c = satisfyTok isc
- where isc (Tok _ Symbol d) = case T.uncons d of
- Just (c',_) -> c == c'
- _ -> False
- isc _ = False
-
-symbolIn :: PandocMonad m => [Char] -> LP m Tok
-symbolIn cs = satisfyTok isInCs
- where isInCs (Tok _ Symbol d) = case T.uncons d of
- Just (c,_) -> c `elem` cs
- _ -> False
- isInCs _ = False
-
-sp :: PandocMonad m => LP m ()
-sp = whitespace <|> endline
-
-whitespace :: PandocMonad m => LP m ()
-whitespace = () <$ satisfyTok isSpaceTok
- where isSpaceTok (Tok _ Spaces _) = True
- isSpaceTok _ = False
-
-newlineTok :: PandocMonad m => LP m ()
-newlineTok = () <$ satisfyTok isNewlineTok
-
-isNewlineTok :: Tok -> Bool
-isNewlineTok (Tok _ Newline _) = True
-isNewlineTok _ = False
-
-comment :: PandocMonad m => LP m ()
-comment = () <$ satisfyTok isCommentTok
- where isCommentTok (Tok _ Comment _) = True
- isCommentTok _ = False
-
-anyTok :: PandocMonad m => LP m Tok
-anyTok = satisfyTok (const True)
-
-endline :: PandocMonad m => LP m ()
-endline = try $ do
- newlineTok
- lookAhead anyTok
- notFollowedBy blankline
-
-blankline :: PandocMonad m => LP m ()
-blankline = try $ skipMany whitespace *> newlineTok
-
-primEscape :: PandocMonad m => LP m Char
-primEscape = do
- Tok _ toktype t <- satisfyTok (tokTypeIn [Esc1, Esc2])
- case toktype of
- Esc1 -> case T.uncons (T.drop 2 t) of
- Just (c, _)
- | c >= '\64' && c <= '\127' -> return (chr (ord c - 64))
- | otherwise -> return (chr (ord c + 64))
- Nothing -> fail "Empty content of Esc1"
- Esc2 -> case safeRead ('0':'x':T.unpack (T.drop 2 t)) of
- Just x -> return (chr x)
- Nothing -> fail $ "Could not read: " ++ T.unpack t
- _ -> fail "Expected an Esc1 or Esc2 token" -- should not happen
-
-bgroup :: PandocMonad m => LP m Tok
-bgroup = try $ do
- skipMany sp
- symbol '{' <|> controlSeq "bgroup" <|> controlSeq "begingroup"
-
-egroup :: PandocMonad m => LP m Tok
-egroup = symbol '}' <|> controlSeq "egroup" <|> controlSeq "endgroup"
-
-grouped :: (PandocMonad m, Monoid a) => LP m a -> LP m a
-grouped parser = try $ do
- bgroup
- -- first we check for an inner 'grouped', because
- -- {{a,b}} should be parsed the same as {a,b}
- try (grouped parser <* egroup) <|> (mconcat <$> manyTill parser egroup)
-
-braced :: PandocMonad m => LP m [Tok]
-braced = bgroup *> braced' 1
- where braced' (n :: Int) =
- handleEgroup n <|> handleBgroup n <|> handleOther n
- handleEgroup n = do
- t <- egroup
- if n == 1
- then return []
- else (t:) <$> braced' (n - 1)
- handleBgroup n = do
- t <- bgroup
- (t:) <$> braced' (n + 1)
- handleOther n = do
- t <- anyTok
- (t:) <$> braced' n
-
-bracketed :: PandocMonad m => Monoid a => LP m a -> LP m a
-bracketed parser = try $ do
- symbol '['
- mconcat <$> manyTill parser (symbol ']')
-
-dimenarg :: PandocMonad m => LP m Text
-dimenarg = try $ do
- ch <- option False $ True <$ symbol '='
- Tok _ _ s <- satisfyTok isWordTok
- guard $ T.take 2 (T.reverse s) `elem`
- ["pt","pc","in","bp","cm","mm","dd","cc","sp"]
- let num = T.take (T.length s - 2) s
- guard $ T.length num > 0
- guard $ T.all isDigit num
- return $ T.pack ['=' | ch] <> s
+ fst <$> rawLaTeXParser True (inlineEnvironment <|> inlineCommand') inlines
-- inline elements:
@@ -625,13 +194,6 @@ regularSymbol = (str . T.unpack . untoken) <$> satisfyTok isRegularSymbol
isRegularSymbol _ = False
isSpecial c = c `Set.member` specialChars
-specialChars :: Set.Set Char
-specialChars = Set.fromList "#$%&~_^\\{}"
-
-isWordTok :: Tok -> Bool
-isWordTok (Tok _ Word _) = True
-isWordTok _ = False
-
inlineGroup :: PandocMonad m => LP m Inlines
inlineGroup = do
ils <- grouped inline
@@ -678,7 +240,7 @@ dosiunitx = do
skipopts
value <- tok
valueprefix <- option "" $ bracketed tok
- unit <- inlineCommand' <|> tok
+ unit <- grouped (mconcat <$> many1 siUnit) <|> siUnit <|> tok
let emptyOr160 "" = ""
emptyOr160 _ = "\160"
return . mconcat $ [valueprefix,
@@ -687,11 +249,187 @@ dosiunitx = do
emptyOr160 unit,
unit]
--- siunitx's \square command
-dosquare :: PandocMonad m => LP m Inlines
-dosquare = do
- unit <- inlineCommand' <|> tok
- return . mconcat $ [unit, "\178"]
+siUnit :: PandocMonad m => LP m Inlines
+siUnit = do
+ Tok _ (CtrlSeq name) _ <- anyControlSeq
+ if name == "square"
+ then do
+ unit <- grouped (mconcat <$> many1 siUnit) <|> siUnit <|> tok
+ return . mconcat $ [unit, "\178"]
+ else
+ case M.lookup name siUnitMap of
+ Just il -> return il
+ Nothing -> mzero
+
+siUnitMap :: M.Map Text Inlines
+siUnitMap = M.fromList
+ [ ("fg", str "fg")
+ , ("pg", str "pg")
+ , ("ng", str "ng")
+ , ("ug", str "μg")
+ , ("mg", str "mg")
+ , ("g", str "g")
+ , ("kg", str "kg")
+ , ("amu", str "u")
+ , ("pm", str "pm")
+ , ("nm", str "nm")
+ , ("um", str "μm")
+ , ("mm", str "mm")
+ , ("cm", str "cm")
+ , ("dm", str "dm")
+ , ("m", str "m")
+ , ("km", str "km")
+ , ("as", str "as")
+ , ("fs", str "fs")
+ , ("ps", str "ps")
+ , ("ns", str "ns")
+ , ("us", str "μs")
+ , ("ms", str "ms")
+ , ("s", str "s")
+ , ("fmol", str "fmol")
+ , ("pmol", str "pmol")
+ , ("nmol", str "nmol")
+ , ("umol", str "μmol")
+ , ("mmol", str "mmol")
+ , ("mol", str "mol")
+ , ("kmol", str "kmol")
+ , ("pA", str "pA")
+ , ("nA", str "nA")
+ , ("uA", str "μA")
+ , ("mA", str "mA")
+ , ("A", str "A")
+ , ("kA", str "kA")
+ , ("ul", str "μl")
+ , ("ml", str "ml")
+ , ("l", str "l")
+ , ("hl", str "hl")
+ , ("uL", str "μL")
+ , ("mL", str "mL")
+ , ("L", str "L")
+ , ("hL", str "hL")
+ , ("mHz", str "mHz")
+ , ("Hz", str "Hz")
+ , ("kHz", str "kHz")
+ , ("MHz", str "MHz")
+ , ("GHz", str "GHz")
+ , ("THz", str "THz")
+ , ("mN", str "mN")
+ , ("N", str "N")
+ , ("kN", str "kN")
+ , ("MN", str "MN")
+ , ("Pa", str "Pa")
+ , ("kPa", str "kPa")
+ , ("MPa", str "MPa")
+ , ("GPa", str "GPa")
+ , ("mohm", str "mΩ")
+ , ("kohm", str "kΩ")
+ , ("Mohm", str "MΩ")
+ , ("pV", str "pV")
+ , ("nV", str "nV")
+ , ("uV", str "μV")
+ , ("mV", str "mV")
+ , ("V", str "V")
+ , ("kV", str "kV")
+ , ("W", str "W")
+ , ("uW", str "μW")
+ , ("mW", str "mW")
+ , ("kW", str "kW")
+ , ("MW", str "MW")
+ , ("GW", str "GW")
+ , ("J", str "J")
+ , ("uJ", str "μJ")
+ , ("mJ", str "mJ")
+ , ("kJ", str "kJ")
+ , ("eV", str "eV")
+ , ("meV", str "meV")
+ , ("keV", str "keV")
+ , ("MeV", str "MeV")
+ , ("GeV", str "GeV")
+ , ("TeV", str "TeV")
+ , ("kWh", str "kWh")
+ , ("F", str "F")
+ , ("fF", str "fF")
+ , ("pF", str "pF")
+ , ("K", str "K")
+ , ("dB", str "dB")
+ , ("angstrom", str "Å")
+ , ("arcmin", str "′")
+ , ("arcminute", str "′")
+ , ("arcsecond", str "″")
+ , ("astronomicalunit", str "ua")
+ , ("atomicmassunit", str "u")
+ , ("atto", str "a")
+ , ("bar", str "bar")
+ , ("barn", str "b")
+ , ("becquerel", str "Bq")
+ , ("bel", str "B")
+ , ("candela", str "cd")
+ , ("celsius", str "°C")
+ , ("centi", str "c")
+ , ("coulomb", str "C")
+ , ("dalton", str "Da")
+ , ("day", str "d")
+ , ("deca", str "d")
+ , ("deci", str "d")
+ , ("decibel", str "db")
+ , ("degreeCelsius",str "°C")
+ , ("degree", str "°")
+ , ("deka", str "d")
+ , ("electronvolt", str "eV")
+ , ("exa", str "E")
+ , ("farad", str "F")
+ , ("femto", str "f")
+ , ("giga", str "G")
+ , ("gram", str "g")
+ , ("hectare", str "ha")
+ , ("hecto", str "h")
+ , ("henry", str "H")
+ , ("hertz", str "Hz")
+ , ("hour", str "h")
+ , ("joule", str "J")
+ , ("katal", str "kat")
+ , ("kelvin", str "K")
+ , ("kilo", str "k")
+ , ("kilogram", str "kg")
+ , ("knot", str "kn")
+ , ("liter", str "L")
+ , ("litre", str "l")
+ , ("lumen", str "lm")
+ , ("lux", str "lx")
+ , ("mega", str "M")
+ , ("meter", str "m")
+ , ("metre", str "m")
+ , ("milli", str "m")
+ , ("minute", str "min")
+ , ("mmHg", str "mmHg")
+ , ("mole", str "mol")
+ , ("nano", str "n")
+ , ("nauticalmile", str "M")
+ , ("neper", str "Np")
+ , ("newton", str "N")
+ , ("ohm", str "Ω")
+ , ("Pa", str "Pa")
+ , ("pascal", str "Pa")
+ , ("percent", str "%")
+ , ("per", str "/")
+ , ("peta", str "P")
+ , ("pico", str "p")
+ , ("radian", str "rad")
+ , ("second", str "s")
+ , ("siemens", str "S")
+ , ("sievert", str "Sv")
+ , ("steradian", str "sr")
+ , ("tera", str "T")
+ , ("tesla", str "T")
+ , ("tonne", str "t")
+ , ("volt", str "V")
+ , ("watt", str "W")
+ , ("weber", str "Wb")
+ , ("yocto", str "y")
+ , ("yotta", str "Y")
+ , ("zepto", str "z")
+ , ("zetta", str "Z")
+ ]
lit :: String -> LP m Inlines
lit = pure . str
@@ -742,13 +480,31 @@ quoted' f starter ender = do
cs -> cs)
else lit startchs
-enquote :: PandocMonad m => LP m Inlines
-enquote = do
+enquote :: PandocMonad m => Bool -> Maybe Text -> LP m Inlines
+enquote starred mblang = do
skipopts
+ let lang = (T.unpack <$> mblang) >>= babelLangToBCP47
+ let langspan = case lang of
+ Nothing -> id
+ Just l -> spanWith ("",[],[("lang", renderLang l)])
quoteContext <- sQuoteContext <$> getState
- if quoteContext == InDoubleQuote
- then singleQuoted <$> withQuoteContext InSingleQuote tok
- else doubleQuoted <$> withQuoteContext InDoubleQuote tok
+ if starred || quoteContext == InDoubleQuote
+ then singleQuoted . langspan <$> withQuoteContext InSingleQuote tok
+ else doubleQuoted . langspan <$> withQuoteContext InDoubleQuote tok
+
+blockquote :: PandocMonad m => Bool -> Maybe Text -> LP m Blocks
+blockquote citations mblang = do
+ citePar <- if citations
+ then do
+ cs <- cites NormalCitation False
+ return $ para (cite cs mempty)
+ else return mempty
+ let lang = (T.unpack <$> mblang) >>= babelLangToBCP47
+ let langdiv = case lang of
+ Nothing -> id
+ Just l -> divWith ("",[],[("lang", renderLang l)])
+ bs <- grouped block
+ return $ blockQuote . langdiv $ (bs <> citePar)
doAcronym :: PandocMonad m => String -> LP m Inlines
doAcronym form = do
@@ -791,6 +547,16 @@ dolstinline :: PandocMonad m => LP m Inlines
dolstinline = do
options <- option [] keyvals
let classes = maybeToList $ lookup "language" options >>= fromListingsLanguage
+ doinlinecode classes
+
+domintinline :: PandocMonad m => LP m Inlines
+domintinline = do
+ skipopts
+ cls <- toksToString <$> braced
+ doinlinecode [cls]
+
+doinlinecode :: PandocMonad m => [String] -> LP m Inlines
+doinlinecode classes = do
Tok _ Symbol t <- anySymbol
marker <- case T.uncons t of
Just (c, ts) | T.null ts -> return c
@@ -803,246 +569,41 @@ dolstinline = do
keyval :: PandocMonad m => LP m (String, String)
keyval = try $ do
Tok _ Word key <- satisfyTok isWordTok
- let isSpecSym (Tok _ Symbol t) = t /= "]" && t /= ","
- isSpecSym _ = False
optional sp
- val <- option [] $ do
+ val <- option mempty $ do
symbol '='
optional sp
- braced <|> many1 (satisfyTok isWordTok <|> satisfyTok isSpecSym
- <|> anyControlSeq)
- optional sp
+ (untokenize <$> braced) <|>
+ (mconcat <$> many1 (
+ (untokenize . snd <$> withRaw braced)
+ <|>
+ (untokenize <$> (many1
+ (satisfyTok
+ (\t -> case t of
+ Tok _ Symbol "]" -> False
+ Tok _ Symbol "," -> False
+ Tok _ Symbol "{" -> False
+ Tok _ Symbol "}" -> False
+ _ -> True))))))
optional (symbol ',')
optional sp
- return (T.unpack key, T.unpack . untokenize $ val)
+ return (T.unpack key, T.unpack $ T.strip val)
keyvals :: PandocMonad m => LP m [(String, String)]
keyvals = try $ symbol '[' >> manyTill keyval (symbol ']')
-accent :: PandocMonad m => Char -> (Char -> String) -> LP m Inlines
-accent c f = try $ do
+accent :: PandocMonad m => Char -> Maybe Char -> LP m Inlines
+accent combiningAccent fallBack = try $ do
ils <- tok
case toList ils of
- (Str (x:xs) : ys) -> return $ fromList (Str (f x ++ xs) : ys)
- [Space] -> return $ str [c]
- [] -> return $ str [c]
+ (Str (x:xs) : ys) -> return $ fromList $
+ -- try to normalize to the combined character:
+ Str (T.unpack
+ (Normalize.normalize Normalize.NFC
+ (T.pack [x, combiningAccent])) ++ xs) : ys
+ [Space] -> return $ str [fromMaybe combiningAccent fallBack]
+ [] -> return $ str [fromMaybe combiningAccent fallBack]
_ -> return ils
-
-grave :: Char -> String
-grave 'A' = "À"
-grave 'E' = "È"
-grave 'I' = "Ì"
-grave 'O' = "Ò"
-grave 'U' = "Ù"
-grave 'a' = "à"
-grave 'e' = "è"
-grave 'i' = "ì"
-grave 'o' = "ò"
-grave 'u' = "ù"
-grave c = [c]
-
-acute :: Char -> String
-acute 'A' = "Á"
-acute 'E' = "É"
-acute 'I' = "Í"
-acute 'O' = "Ó"
-acute 'U' = "Ú"
-acute 'Y' = "Ý"
-acute 'a' = "á"
-acute 'e' = "é"
-acute 'i' = "í"
-acute 'o' = "ó"
-acute 'u' = "ú"
-acute 'y' = "ý"
-acute 'C' = "Ć"
-acute 'c' = "ć"
-acute 'L' = "Ĺ"
-acute 'l' = "ĺ"
-acute 'N' = "Ń"
-acute 'n' = "ń"
-acute 'R' = "Ŕ"
-acute 'r' = "ŕ"
-acute 'S' = "Ś"
-acute 's' = "ś"
-acute 'Z' = "Ź"
-acute 'z' = "ź"
-acute c = [c]
-
-circ :: Char -> String
-circ 'A' = "Â"
-circ 'E' = "Ê"
-circ 'I' = "Î"
-circ 'O' = "Ô"
-circ 'U' = "Û"
-circ 'a' = "â"
-circ 'e' = "ê"
-circ 'i' = "î"
-circ 'o' = "ô"
-circ 'u' = "û"
-circ 'C' = "Ĉ"
-circ 'c' = "ĉ"
-circ 'G' = "Ĝ"
-circ 'g' = "ĝ"
-circ 'H' = "Ĥ"
-circ 'h' = "ĥ"
-circ 'J' = "Ĵ"
-circ 'j' = "ĵ"
-circ 'S' = "Ŝ"
-circ 's' = "ŝ"
-circ 'W' = "Ŵ"
-circ 'w' = "ŵ"
-circ 'Y' = "Ŷ"
-circ 'y' = "ŷ"
-circ c = [c]
-
-tilde :: Char -> String
-tilde 'A' = "Ã"
-tilde 'a' = "ã"
-tilde 'O' = "Õ"
-tilde 'o' = "õ"
-tilde 'I' = "Ĩ"
-tilde 'i' = "ĩ"
-tilde 'U' = "Ũ"
-tilde 'u' = "ũ"
-tilde 'N' = "Ñ"
-tilde 'n' = "ñ"
-tilde c = [c]
-
-umlaut :: Char -> String
-umlaut 'A' = "Ä"
-umlaut 'E' = "Ë"
-umlaut 'I' = "Ï"
-umlaut 'O' = "Ö"
-umlaut 'U' = "Ü"
-umlaut 'a' = "ä"
-umlaut 'e' = "ë"
-umlaut 'i' = "ï"
-umlaut 'o' = "ö"
-umlaut 'u' = "ü"
-umlaut c = [c]
-
-hungarumlaut :: Char -> String
-hungarumlaut 'A' = "A̋"
-hungarumlaut 'E' = "E̋"
-hungarumlaut 'I' = "I̋"
-hungarumlaut 'O' = "Ő"
-hungarumlaut 'U' = "Ű"
-hungarumlaut 'Y' = "ӳ"
-hungarumlaut 'a' = "a̋"
-hungarumlaut 'e' = "e̋"
-hungarumlaut 'i' = "i̋"
-hungarumlaut 'o' = "ő"
-hungarumlaut 'u' = "ű"
-hungarumlaut 'y' = "ӳ"
-hungarumlaut c = [c]
-
-dot :: Char -> String
-dot 'C' = "Ċ"
-dot 'c' = "ċ"
-dot 'E' = "Ė"
-dot 'e' = "ė"
-dot 'G' = "Ġ"
-dot 'g' = "ġ"
-dot 'I' = "İ"
-dot 'Z' = "Ż"
-dot 'z' = "ż"
-dot c = [c]
-
-macron :: Char -> String
-macron 'A' = "Ā"
-macron 'E' = "Ē"
-macron 'I' = "Ī"
-macron 'O' = "Ō"
-macron 'U' = "Ū"
-macron 'a' = "ā"
-macron 'e' = "ē"
-macron 'i' = "ī"
-macron 'o' = "ō"
-macron 'u' = "ū"
-macron c = [c]
-
-cedilla :: Char -> String
-cedilla 'c' = "ç"
-cedilla 'C' = "Ç"
-cedilla 's' = "ş"
-cedilla 'S' = "Ş"
-cedilla 't' = "ţ"
-cedilla 'T' = "Ţ"
-cedilla 'e' = "ȩ"
-cedilla 'E' = "Ȩ"
-cedilla 'h' = "ḩ"
-cedilla 'H' = "Ḩ"
-cedilla 'o' = "o̧"
-cedilla 'O' = "O̧"
-cedilla c = [c]
-
-hacek :: Char -> String
-hacek 'A' = "Ǎ"
-hacek 'a' = "ǎ"
-hacek 'C' = "Č"
-hacek 'c' = "č"
-hacek 'D' = "Ď"
-hacek 'd' = "ď"
-hacek 'E' = "Ě"
-hacek 'e' = "ě"
-hacek 'G' = "Ǧ"
-hacek 'g' = "ǧ"
-hacek 'H' = "Ȟ"
-hacek 'h' = "ȟ"
-hacek 'I' = "Ǐ"
-hacek 'i' = "ǐ"
-hacek 'j' = "ǰ"
-hacek 'K' = "Ǩ"
-hacek 'k' = "ǩ"
-hacek 'L' = "Ľ"
-hacek 'l' = "ľ"
-hacek 'N' = "Ň"
-hacek 'n' = "ň"
-hacek 'O' = "Ǒ"
-hacek 'o' = "ǒ"
-hacek 'R' = "Ř"
-hacek 'r' = "ř"
-hacek 'S' = "Š"
-hacek 's' = "š"
-hacek 'T' = "Ť"
-hacek 't' = "ť"
-hacek 'U' = "Ǔ"
-hacek 'u' = "ǔ"
-hacek 'Z' = "Ž"
-hacek 'z' = "ž"
-hacek c = [c]
-
-ogonek :: Char -> String
-ogonek 'a' = "ą"
-ogonek 'e' = "ę"
-ogonek 'o' = "ǫ"
-ogonek 'i' = "į"
-ogonek 'u' = "ų"
-ogonek 'A' = "Ą"
-ogonek 'E' = "Ę"
-ogonek 'I' = "Į"
-ogonek 'O' = "Ǫ"
-ogonek 'U' = "Ų"
-ogonek c = [c]
-
-breve :: Char -> String
-breve 'A' = "Ă"
-breve 'a' = "ă"
-breve 'E' = "Ĕ"
-breve 'e' = "ĕ"
-breve 'G' = "Ğ"
-breve 'g' = "ğ"
-breve 'I' = "Ĭ"
-breve 'i' = "ĭ"
-breve 'O' = "Ŏ"
-breve 'o' = "ŏ"
-breve 'U' = "Ŭ"
-breve 'u' = "ŭ"
-breve c = [c]
-
-toksToString :: [Tok] -> String
-toksToString = T.unpack . untokenize
-
mathDisplay :: String -> Inlines
mathDisplay = displayMath . trim
@@ -1119,7 +680,21 @@ citationLabel = do
cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation]
cites mode multi = try $ do
cits <- if multi
- then many1 simpleCiteArgs
+ then do
+ multiprenote <- optionMaybe $ toList <$> paropt
+ multipostnote <- optionMaybe $ toList <$> paropt
+ let (pre, suf) = case (multiprenote, multipostnote) of
+ (Just s , Nothing) -> (mempty, s)
+ (Nothing , Just t) -> (mempty, t)
+ (Just s , Just t ) -> (s, t)
+ _ -> (mempty, mempty)
+ tempCits <- many1 simpleCiteArgs
+ case tempCits of
+ (k:ks) -> case ks of
+ (_:_) -> return $ ((addMprenote pre k):init ks) ++
+ [addMpostnote suf (last ks)]
+ _ -> return [addMprenote pre (addMpostnote suf k)]
+ _ -> return [[]]
else count 1 simpleCiteArgs
let cs = concat cits
return $ case mode of
@@ -1127,6 +702,17 @@ cites mode multi = try $ do
(c:rest) -> c {citationMode = mode} : rest
[] -> []
_ -> map (\a -> a {citationMode = mode}) cs
+ where mprenote (k:ks) = (k:ks) ++ [Space]
+ mprenote _ = mempty
+ mpostnote (k:ks) = [Str ",", Space] ++ (k:ks)
+ mpostnote _ = mempty
+ addMprenote mpn (k:ks) =
+ let mpnfinal = case citationPrefix k of
+ (_:_) -> mprenote mpn
+ _ -> mpn
+ in addPrefix mpnfinal (k:ks)
+ addMprenote _ _ = []
+ addMpostnote = addSuffix . mpostnote
citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines
citation name mode multi = do
@@ -1181,22 +767,12 @@ tok = try $ spaces >> grouped inline <|> inlineCommand' <|> singleChar'
Tok _ _ t <- singleChar
return (str (T.unpack t))
-singleChar :: PandocMonad m => LP m Tok
-singleChar = try $ do
- Tok pos toktype t <- satisfyTok (tokTypeIn [Word, Symbol])
- guard $ not $ toktype == Symbol &&
- T.any (`Set.member` specialChars) t
- if T.length t > 1
- then do
- let (t1, t2) = (T.take 1 t, T.drop 1 t)
- inp <- getInput
- setInput $ Tok (incSourceColumn pos 1) toktype t2 : inp
- return $ Tok pos toktype t1
- else return $ Tok pos toktype t
-
opt :: PandocMonad m => LP m Inlines
opt = bracketed inline <|> (str . T.unpack <$> rawopt)
+paropt :: PandocMonad m => LP m Inlines
+paropt = parenWrapped inline
+
rawopt :: PandocMonad m => LP m Text
rawopt = do
inner <- untokenize <$> bracketedToks
@@ -1204,30 +780,28 @@ rawopt = do
return $ "[" <> inner <> "]"
skipopts :: PandocMonad m => LP m ()
-skipopts = skipMany rawopt
+skipopts = skipMany (overlaySpecification <|> void rawopt)
-- opts in angle brackets are used in beamer
-rawangle :: PandocMonad m => LP m ()
-rawangle = try $ do
+overlaySpecification :: PandocMonad m => LP m ()
+overlaySpecification = try $ do
symbol '<'
- () <$ manyTill anyTok (symbol '>')
-
-skipangles :: PandocMonad m => LP m ()
-skipangles = skipMany rawangle
-
-ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a
-ignore raw = do
- pos <- getPosition
- report $ SkippedContent raw pos
- return mempty
-
-withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok])
-withRaw parser = do
- inp <- getInput
- result <- parser
- nxt <- option (Tok (initialPos "source") Word "") (lookAhead anyTok)
- let raw = takeWhile (/= nxt) inp
- return (result, raw)
+ ts <- manyTill overlayTok (symbol '>')
+ guard $ case ts of
+ -- see issue #3368
+ [Tok _ Word s] | T.all isLetter s -> s `elem`
+ ["beamer","presentation", "trans",
+ "handout","article", "second"]
+ _ -> True
+
+overlayTok :: PandocMonad m => LP m Tok
+overlayTok =
+ satisfyTok (\t ->
+ case t of
+ Tok _ Word _ -> True
+ Tok _ Spaces _ -> True
+ Tok _ Symbol c -> c `elem` ["-","+","@","|",":",","]
+ _ -> False)
inBrackets :: Inlines -> Inlines
inBrackets x = str "[" <> x <> str "]"
@@ -1275,6 +849,12 @@ inlineEnvironments = M.fromList [
, ("align*", mathEnvWith id (Just "aligned") "align*")
, ("alignat", mathEnvWith id (Just "aligned") "alignat")
, ("alignat*", mathEnvWith id (Just "aligned") "alignat*")
+ , ("dmath", mathEnvWith id Nothing "dmath")
+ , ("dmath*", mathEnvWith id Nothing "dmath*")
+ , ("dgroup", mathEnvWith id (Just "aligned") "dgroup")
+ , ("dgroup*", mathEnvWith id (Just "aligned") "dgroup*")
+ , ("darray", mathEnvWith id (Just "aligned") "darray")
+ , ("darray*", mathEnvWith id (Just "aligned") "darray*")
]
inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines)
@@ -1289,7 +869,7 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
, ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok)
, ("texttt", ttfamily)
, ("sout", extractSpaces strikeout <$> tok)
- , ("alert", skipangles >> spanWith ("",["alert"],[]) <$> tok) -- beamer
+ , ("alert", skipopts >> spanWith ("",["alert"],[]) <$> tok) -- beamer
, ("lq", return (str "‘"))
, ("rq", return (str "’"))
, ("textquoteleft", return (str "‘"))
@@ -1318,7 +898,7 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
, ("(", mathInline . toksToString <$> manyTill anyTok (controlSeq ")"))
, ("[", mathDisplay . toksToString <$> manyTill anyTok (controlSeq "]"))
, ("ensuremath", mathInline . toksToString <$> braced)
- , ("texorpdfstring", (\_ x -> x) <$> tok <*> tok)
+ , ("texorpdfstring", (\x _ -> x) <$> tok <*> tok)
, ("P", lit "¶")
, ("S", lit "§")
, ("$", lit "$")
@@ -1361,20 +941,32 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
, ("copyright", lit "©")
, ("textasciicircum", lit "^")
, ("textasciitilde", lit "~")
- , ("H", accent '\779' hungarumlaut)
- , ("`", accent '`' grave)
- , ("'", accent '\'' acute)
- , ("^", accent '^' circ)
- , ("~", accent '~' tilde)
- , ("\"", accent '\776' umlaut)
- , (".", accent '\775' dot)
- , ("=", accent '\772' macron)
- , ("c", accent '\807' cedilla)
- , ("v", accent 'ˇ' hacek)
- , ("u", accent '\774' breve)
- , ("k", accent '\808' ogonek)
- , ("textogonekcentered", accent '\808' ogonek)
- , ("i", lit "i")
+ , ("H", accent '\779' Nothing) -- hungarumlaut
+ , ("`", accent '\768' (Just '`')) -- grave
+ , ("'", accent '\769' (Just '\'')) -- acute
+ , ("^", accent '\770' (Just '^')) -- circ
+ , ("~", accent '\771' (Just '~')) -- tilde
+ , ("\"", accent '\776' Nothing) -- umlaut
+ , (".", accent '\775' Nothing) -- dot
+ , ("=", accent '\772' Nothing) -- macron
+ , ("|", accent '\781' Nothing) -- vertical line above
+ , ("b", accent '\817' Nothing) -- macron below
+ , ("c", accent '\807' Nothing) -- cedilla
+ , ("G", accent '\783' Nothing) -- doublegrave
+ , ("h", accent '\777' Nothing) -- hookabove
+ , ("d", accent '\803' Nothing) -- dotbelow
+ , ("f", accent '\785' Nothing) -- inverted breve
+ , ("r", accent '\778' Nothing) -- ringabove
+ , ("t", accent '\865' Nothing) -- double inverted breve
+ , ("U", accent '\782' Nothing) -- double vertical line above
+ , ("v", accent '\780' Nothing) -- hacek
+ , ("u", accent '\774' Nothing) -- breve
+ , ("k", accent '\808' Nothing) -- ogonek
+ , ("textogonekcentered", accent '\808' Nothing) -- ogonek
+ , ("i", lit "ı") -- dotless i
+ , ("j", lit "ȷ") -- dotless j
+ , ("newtie", accent '\785' Nothing) -- inverted breve
+ , ("textcircled", accent '\8413' Nothing) -- combining circle
, ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState
guard $ not inTableCell
optional opt
@@ -1392,17 +984,25 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
, ("footnote", skipopts >> note <$> grouped block)
, ("verb", doverb)
, ("lstinline", dolstinline)
+ , ("mintinline", domintinline)
, ("Verb", doverb)
- , ("url", ((unescapeURL . T.unpack . untokenize) <$> braced) >>= \url ->
+ , ("url", ((unescapeURL . T.unpack . untokenize) <$> bracedUrl) >>= \url ->
pure (link url "" (str url)))
, ("href", (unescapeURL . toksToString <$>
- braced <* optional sp) >>= \url ->
+ bracedUrl <* optional sp) >>= \url ->
tok >>= \lab -> pure (link url "" lab))
, ("includegraphics", do options <- option [] keyvals
src <- unescapeURL . T.unpack .
removeDoubleQuotes . untokenize <$> braced
mkImage options src)
- , ("enquote", enquote)
+ , ("enquote*", enquote True Nothing)
+ , ("enquote", enquote False Nothing)
+ -- foreignquote is supposed to use native quote marks
+ , ("foreignquote*", braced >>= enquote True . Just . untokenize)
+ , ("foreignquote", braced >>= enquote False . Just . untokenize)
+ -- hypehnquote uses regular quotes
+ , ("hyphenquote*", braced >>= enquote True . Just . untokenize)
+ , ("hyphenquote", braced >>= enquote False . Just . untokenize)
, ("figurename", doTerm Translations.Figure)
, ("prefacename", doTerm Translations.Preface)
, ("refname", doTerm Translations.References)
@@ -1507,13 +1107,6 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
, ("acsp", doAcronymPlural "abbrv")
-- siuntix
, ("SI", dosiunitx)
- -- units of siuntix
- , ("celsius", lit "°C")
- , ("degreeCelsius", lit "°C")
- , ("gram", lit "g")
- , ("meter", lit "m")
- , ("milli", lit "m")
- , ("square", dosquare)
-- hyphenat
, ("bshyp", lit "\\\173")
, ("fshyp", lit "/\173")
@@ -1542,8 +1135,18 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
, ("Rn", romanNumeralLower)
-- babel
, ("foreignlanguage", foreignlanguage)
+ -- include
+ , ("input", include "input")
+ -- plain tex stuff that should just be passed through as raw tex
+ , ("ifdim", ifdim)
]
+ifdim :: PandocMonad m => LP m Inlines
+ifdim = do
+ contents <- manyTill anyTok (controlSeq "fi")
+ return $ rawInline "latex" $ T.unpack $
+ "\\ifdim" <> untokenize contents <> "\\fi"
+
makeUppercase :: Inlines -> Inlines
makeUppercase = fromList . walk (alterStr (map toUpper)) . toList
@@ -1693,7 +1296,6 @@ getRawCommand name txt = do
"def" ->
void $ manyTill anyTok braced
_ -> do
- skipangles
skipopts
option "" (try (optional sp *> dimenarg))
void $ many braced
@@ -1818,7 +1420,6 @@ end_ t = try (do
preamble :: PandocMonad m => LP m Blocks
preamble = mempty <$ many preambleBlock
where preambleBlock = spaces1
- <|> void include
<|> void macroDef
<|> void blockCommand
<|> void braced
@@ -1831,11 +1432,8 @@ paragraph = do
then return mempty
else return $ para x
-include :: PandocMonad m => LP m Blocks
-include = do
- (Tok _ (CtrlSeq name) _) <-
- controlSeq "include" <|> controlSeq "input" <|>
- controlSeq "subfile" <|> controlSeq "usepackage"
+include :: (PandocMonad m, Monoid a) => Text -> LP m a
+include name = do
skipMany opt
fs <- (map (T.unpack . removeDoubleQuotes . T.strip) . T.splitOn "," .
untokenize) <$> braced
@@ -1912,31 +1510,28 @@ letmacro = do
optional $ symbol '='
spaces
contents <- bracedOrToken
- return (name, Macro ExpandWhenDefined 0 Nothing contents)
+ return (name, Macro ExpandWhenDefined [] Nothing contents)
defmacro :: PandocMonad m => LP m (Text, Macro)
defmacro = try $ do
controlSeq "def"
Tok _ (CtrlSeq name) _ <- anyControlSeq
- numargs <- option 0 $ argSeq 1
+ argspecs <- many (argspecArg <|> argspecPattern)
-- we use withVerbatimMode, because macros are to be expanded
-- at point of use, not point of definition
contents <- withVerbatimMode bracedOrToken
- return (name, Macro ExpandWhenUsed numargs Nothing contents)
+ return (name, Macro ExpandWhenUsed argspecs Nothing contents)
--- Note: we don't yet support fancy things like #1.#2
-argSeq :: PandocMonad m => Int -> LP m Int
-argSeq n = do
+argspecArg :: PandocMonad m => LP m ArgSpec
+argspecArg = do
Tok _ (Arg i) _ <- satisfyTok isArgTok
- guard $ i == n
- argSeq (n+1) <|> return n
+ return $ ArgNum i
-isArgTok :: Tok -> Bool
-isArgTok (Tok _ (Arg _) _) = True
-isArgTok _ = False
-
-bracedOrToken :: PandocMonad m => LP m [Tok]
-bracedOrToken = braced <|> ((:[]) <$> (anyControlSeq <|> singleChar))
+argspecPattern :: PandocMonad m => LP m ArgSpec
+argspecPattern =
+ Pattern <$> many1 (satisfyTok (\(Tok _ toktype' txt) ->
+ (toktype' == Symbol || toktype' == Word) &&
+ (txt /= "{" && txt /= "\\" && txt /= "}")))
newcommand :: PandocMonad m => LP m (Text, Macro)
newcommand = do
@@ -1950,6 +1545,7 @@ newcommand = do
(symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}')
spaces
numargs <- option 0 $ try bracketedNum
+ let argspecs = map (\i -> ArgNum i) [1..numargs]
spaces
optarg <- option Nothing $ Just <$> try bracketedToks
spaces
@@ -1959,7 +1555,7 @@ newcommand = do
case M.lookup name macros of
Just _ -> report $ MacroAlreadyDefined (T.unpack txt) pos
Nothing -> return ()
- return (name, Macro ExpandWhenUsed numargs optarg contents)
+ return (name, Macro ExpandWhenUsed argspecs optarg contents)
newenvironment :: PandocMonad m => LP m (Text, Macro, Macro)
newenvironment = do
@@ -1972,6 +1568,7 @@ newenvironment = do
name <- untokenize <$> braced
spaces
numargs <- option 0 $ try bracketedNum
+ let argspecs = map (\i -> ArgNum i) [1..numargs]
spaces
optarg <- option Nothing $ Just <$> try bracketedToks
spaces
@@ -1983,13 +1580,8 @@ newenvironment = do
case M.lookup name macros of
Just _ -> report $ MacroAlreadyDefined (T.unpack name) pos
Nothing -> return ()
- return (name, Macro ExpandWhenUsed numargs optarg startcontents,
- Macro ExpandWhenUsed 0 Nothing endcontents)
-
-bracketedToks :: PandocMonad m => LP m [Tok]
-bracketedToks = do
- symbol '['
- mconcat <$> manyTill (braced <|> (:[]) <$> anyTok) (symbol ']')
+ return (name, Macro ExpandWhenUsed argspecs optarg startcontents,
+ Macro ExpandWhenUsed [] Nothing endcontents)
bracketedNum :: PandocMonad m => LP m Int
bracketedNum = do
@@ -2003,11 +1595,13 @@ setCaption = do
ils <- tok
mblabel <- option Nothing $
try $ spaces >> controlSeq "label" >> (Just <$> tok)
- let ils' = case mblabel of
- Just lab -> ils <> spanWith
- ("",[],[("label", stringify lab)]) mempty
- Nothing -> ils
- updateState $ \st -> st{ sCaption = Just ils' }
+ let capt = case mblabel of
+ Just lab -> let slab = stringify lab
+ ils' = ils <> spanWith
+ ("",[],[("label", slab)]) mempty
+ in (Just ils', Just slab)
+ Nothing -> (Just ils, Nothing)
+ updateState $ \st -> st{ sCaption = capt }
return mempty
looseItem :: PandocMonad m => LP m Blocks
@@ -2018,28 +1612,27 @@ looseItem = do
return mempty
resetCaption :: PandocMonad m => LP m ()
-resetCaption = updateState $ \st -> st{ sCaption = Nothing }
+resetCaption = updateState $ \st -> st{ sCaption = (Nothing, Nothing) }
-section :: PandocMonad m => Bool -> Attr -> Int -> LP m Blocks
-section starred (ident, classes, kvs) lvl = do
+section :: PandocMonad m => Attr -> Int -> LP m Blocks
+section (ident, classes, kvs) lvl = do
skipopts
contents <- grouped inline
lab <- option ident $
try (spaces >> controlSeq "label"
>> spaces >> toksToString <$> braced)
- let classes' = if starred then "unnumbered" : classes else classes
when (lvl == 0) $
updateState $ \st -> st{ sHasChapters = True }
- unless starred $ do
+ unless ("unnumbered" `elem` classes) $ do
hn <- sLastHeaderNum <$> getState
hasChapters <- sHasChapters <$> getState
let lvl' = lvl + if hasChapters then 1 else 0
- let num = incrementHeaderNum lvl' hn
- updateState $ \st -> st{ sLastHeaderNum = num }
- updateState $ \st -> st{ sLabels = M.insert lab
- [Str (renderHeaderNum num)]
- (sLabels st) }
- attr' <- registerHeader (lab, classes', kvs) contents
+ let num = incrementDottedNum lvl' hn
+ updateState $ \st -> st{ sLastHeaderNum = num
+ , sLabels = M.insert lab
+ [Str (renderDottedNum num)]
+ (sLabels st) }
+ attr' <- registerHeader (lab, classes, kvs) contents
return $ headerWith attr' lvl contents
blockCommand :: PandocMonad m => LP m Blocks
@@ -2100,23 +1693,23 @@ blockCommands = M.fromList
-- Koma-script metadata commands
, ("dedication", mempty <$ (skipopts *> tok >>= addMeta "dedication"))
-- sectioning
- , ("part", section False nullAttr (-1))
- , ("part*", section True nullAttr (-1))
- , ("chapter", section False nullAttr 0)
- , ("chapter*", section True ("",["unnumbered"],[]) 0)
- , ("section", section False nullAttr 1)
- , ("section*", section True ("",["unnumbered"],[]) 1)
- , ("subsection", section False nullAttr 2)
- , ("subsection*", section True ("",["unnumbered"],[]) 2)
- , ("subsubsection", section False nullAttr 3)
- , ("subsubsection*", section True ("",["unnumbered"],[]) 3)
- , ("paragraph", section False nullAttr 4)
- , ("paragraph*", section True ("",["unnumbered"],[]) 4)
- , ("subparagraph", section False nullAttr 5)
- , ("subparagraph*", section True ("",["unnumbered"],[]) 5)
+ , ("part", section nullAttr (-1))
+ , ("part*", section nullAttr (-1))
+ , ("chapter", section nullAttr 0)
+ , ("chapter*", section ("",["unnumbered"],[]) 0)
+ , ("section", section nullAttr 1)
+ , ("section*", section ("",["unnumbered"],[]) 1)
+ , ("subsection", section nullAttr 2)
+ , ("subsection*", section ("",["unnumbered"],[]) 2)
+ , ("subsubsection", section nullAttr 3)
+ , ("subsubsection*", section ("",["unnumbered"],[]) 3)
+ , ("paragraph", section nullAttr 4)
+ , ("paragraph*", section ("",["unnumbered"],[]) 4)
+ , ("subparagraph", section nullAttr 5)
+ , ("subparagraph*", section ("",["unnumbered"],[]) 5)
-- beamer slides
- , ("frametitle", section False nullAttr 3)
- , ("framesubtitle", section False nullAttr 4)
+ , ("frametitle", section nullAttr 3)
+ , ("framesubtitle", section nullAttr 4)
-- letters
, ("opening", (para . trimInlines) <$> (skipopts *> tok))
, ("closing", skipopts *> closing)
@@ -2152,6 +1745,18 @@ blockCommands = M.fromList
-- LaTeX colors
, ("textcolor", coloredBlock "color")
, ("colorbox", coloredBlock "background-color")
+ -- csquotes
+ , ("blockquote", blockquote False Nothing)
+ , ("blockcquote", blockquote True Nothing)
+ , ("foreignblockquote", braced >>= blockquote False . Just . untokenize)
+ , ("foreignblockcquote", braced >>= blockquote True . Just . untokenize)
+ , ("hyphenblockquote", braced >>= blockquote False . Just . untokenize)
+ , ("hyphenblockcquote", braced >>= blockquote True . Just . untokenize)
+ -- include
+ , ("include", include "include")
+ , ("input", include "input")
+ , ("subfile", include "subfile")
+ , ("usepackage", include "usepackage")
]
@@ -2192,6 +1797,7 @@ environments = M.fromList
, ("minted", minted)
, ("obeylines", obeylines)
, ("tikzpicture", rawVerbEnv "tikzpicture")
+ , ("lilypond", rawVerbEnv "lilypond")
-- etoolbox
, ("ifstrequal", ifstrequal)
, ("newtoggle", braced >>= newToggle)
@@ -2234,7 +1840,7 @@ rawVerbEnv :: PandocMonad m => Text -> LP m Blocks
rawVerbEnv name = do
pos <- getPosition
(_, raw) <- withRaw $ verbEnv name
- let raw' = "\\begin{tikzpicture}" ++ toksToString raw
+ let raw' = "\\begin{" ++ T.unpack name ++ "}" ++ toksToString raw
exts <- getOption readerExtensions
let parseRaw = extensionEnabled Ext_raw_tex exts
if parseRaw
@@ -2248,7 +1854,20 @@ verbEnv name = withVerbatimMode $ do
skipopts
optional blankline
res <- manyTill anyTok (end_ name)
- return $ stripTrailingNewlines $ toksToString res
+ return $ T.unpack
+ $ stripTrailingNewline
+ $ untokenize
+ $ res
+
+-- Strip single final newline and any spaces following it.
+-- Input is unchanged if it doesn't end with newline +
+-- optional spaces.
+stripTrailingNewline :: Text -> Text
+stripTrailingNewline t =
+ let (b, e) = T.breakOnEnd "\n" t
+ in if T.all (== ' ') e
+ then T.dropEnd 1 b
+ else t
fancyverbEnv :: PandocMonad m => Text -> LP m Blocks
fancyverbEnv name = do
@@ -2303,12 +1922,43 @@ figure = try $ do
addImageCaption :: PandocMonad m => Blocks -> LP m Blocks
addImageCaption = walkM go
- where go (Image attr alt (src,tit))
+ where go (Image attr@(_, cls, kvs) alt (src,tit))
| not ("fig:" `isPrefixOf` tit) = do
- mbcapt <- sCaption <$> getState
- return $ case mbcapt of
- Just ils -> Image attr (toList ils) (src, "fig:" ++ tit)
- Nothing -> Image attr alt (src,tit)
+ (mbcapt, mblab) <- sCaption <$> getState
+ let (alt', tit') = case mbcapt of
+ Just ils -> (toList ils, "fig:" ++ tit)
+ Nothing -> (alt, tit)
+ attr' = case mblab of
+ Just lab -> (lab, cls, kvs)
+ Nothing -> attr
+ case attr' of
+ ("", _, _) -> return ()
+ (ident, _, _) -> do
+ st <- getState
+ let chapnum =
+ case (sHasChapters st, sLastHeaderNum st) of
+ (True, DottedNum (n:_)) -> Just n
+ _ -> Nothing
+ let num = case sLastFigureNum st of
+ DottedNum [m,n] ->
+ case chapnum of
+ Just m' | m' == m -> DottedNum [m, n+1]
+ | otherwise -> DottedNum [m', 1]
+ Nothing -> DottedNum [1]
+ -- shouldn't happen
+ DottedNum [n] ->
+ case chapnum of
+ Just m -> DottedNum [m, 1]
+ Nothing -> DottedNum [n + 1]
+ _ ->
+ case chapnum of
+ Just n -> DottedNum [n, 1]
+ Nothing -> DottedNum [1]
+ setState $
+ st{ sLastFigureNum = num
+ , sLabels = M.insert ident
+ [Str (renderDottedNum num)] (sLabels st) }
+ return $ Image attr' alt' (src, tit')
go x = return x
coloredBlock :: PandocMonad m => String -> LP m Blocks
@@ -2321,7 +1971,8 @@ coloredBlock stylename = try $ do
graphicsPath :: PandocMonad m => LP m Blocks
graphicsPath = do
- ps <- map toksToString <$> (bgroup *> manyTill braced egroup)
+ ps <- map toksToString <$>
+ (bgroup *> spaces *> manyTill (braced <* spaces) egroup)
getResourcePath >>= setResourcePath . (++ ps)
return mempty
@@ -2579,7 +2230,7 @@ simpTable envname hasWidthParameter = try $ do
addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
addTableCaption = walkM go
where go (Table c als ws hs rs) = do
- mbcapt <- sCaption <$> getState
+ (mbcapt, _) <- sCaption <$> getState
return $ case mbcapt of
Just ils -> Table (toList ils) als ws hs rs
Nothing -> Table c als ws hs rs
@@ -2590,7 +2241,6 @@ block :: PandocMonad m => LP m Blocks
block = do
res <- (mempty <$ spaces1)
<|> environment
- <|> include
<|> macroDef
<|> blockCommand
<|> paragraph
@@ -2613,137 +2263,3 @@ setDefaultLanguage = do
setTranslations l
updateState $ setMeta "lang" $ str (renderLang l)
return mempty
-
-polyglossiaLangToBCP47 :: M.Map String (String -> Lang)
-polyglossiaLangToBCP47 = M.fromList
- [ ("arabic", \o -> case filter (/=' ') o of
- "locale=algeria" -> Lang "ar" "" "DZ" []
- "locale=mashriq" -> Lang "ar" "" "SY" []
- "locale=libya" -> Lang "ar" "" "LY" []
- "locale=morocco" -> Lang "ar" "" "MA" []
- "locale=mauritania" -> Lang "ar" "" "MR" []
- "locale=tunisia" -> Lang "ar" "" "TN" []
- _ -> Lang "ar" "" "" [])
- , ("german", \o -> case filter (/=' ') o of
- "spelling=old" -> Lang "de" "" "DE" ["1901"]
- "variant=austrian,spelling=old"
- -> Lang "de" "" "AT" ["1901"]
- "variant=austrian" -> Lang "de" "" "AT" []
- "variant=swiss,spelling=old"
- -> Lang "de" "" "CH" ["1901"]
- "variant=swiss" -> Lang "de" "" "CH" []
- _ -> Lang "de" "" "" [])
- , ("lsorbian", \_ -> Lang "dsb" "" "" [])
- , ("greek", \o -> case filter (/=' ') o of
- "variant=poly" -> Lang "el" "" "polyton" []
- "variant=ancient" -> Lang "grc" "" "" []
- _ -> Lang "el" "" "" [])
- , ("english", \o -> case filter (/=' ') o of
- "variant=australian" -> Lang "en" "" "AU" []
- "variant=canadian" -> Lang "en" "" "CA" []
- "variant=british" -> Lang "en" "" "GB" []
- "variant=newzealand" -> Lang "en" "" "NZ" []
- "variant=american" -> Lang "en" "" "US" []
- _ -> Lang "en" "" "" [])
- , ("usorbian", \_ -> Lang "hsb" "" "" [])
- , ("latin", \o -> case filter (/=' ') o of
- "variant=classic" -> Lang "la" "" "" ["x-classic"]
- _ -> Lang "la" "" "" [])
- , ("slovenian", \_ -> Lang "sl" "" "" [])
- , ("serbianc", \_ -> Lang "sr" "cyrl" "" [])
- , ("pinyin", \_ -> Lang "zh" "Latn" "" ["pinyin"])
- , ("afrikaans", \_ -> Lang "af" "" "" [])
- , ("amharic", \_ -> Lang "am" "" "" [])
- , ("assamese", \_ -> Lang "as" "" "" [])
- , ("asturian", \_ -> Lang "ast" "" "" [])
- , ("bulgarian", \_ -> Lang "bg" "" "" [])
- , ("bengali", \_ -> Lang "bn" "" "" [])
- , ("tibetan", \_ -> Lang "bo" "" "" [])
- , ("breton", \_ -> Lang "br" "" "" [])
- , ("catalan", \_ -> Lang "ca" "" "" [])
- , ("welsh", \_ -> Lang "cy" "" "" [])
- , ("czech", \_ -> Lang "cs" "" "" [])
- , ("coptic", \_ -> Lang "cop" "" "" [])
- , ("danish", \_ -> Lang "da" "" "" [])
- , ("divehi", \_ -> Lang "dv" "" "" [])
- , ("esperanto", \_ -> Lang "eo" "" "" [])
- , ("spanish", \_ -> Lang "es" "" "" [])
- , ("estonian", \_ -> Lang "et" "" "" [])
- , ("basque", \_ -> Lang "eu" "" "" [])
- , ("farsi", \_ -> Lang "fa" "" "" [])
- , ("finnish", \_ -> Lang "fi" "" "" [])
- , ("french", \_ -> Lang "fr" "" "" [])
- , ("friulan", \_ -> Lang "fur" "" "" [])
- , ("irish", \_ -> Lang "ga" "" "" [])
- , ("scottish", \_ -> Lang "gd" "" "" [])
- , ("ethiopic", \_ -> Lang "gez" "" "" [])
- , ("galician", \_ -> Lang "gl" "" "" [])
- , ("hebrew", \_ -> Lang "he" "" "" [])
- , ("hindi", \_ -> Lang "hi" "" "" [])
- , ("croatian", \_ -> Lang "hr" "" "" [])
- , ("magyar", \_ -> Lang "hu" "" "" [])
- , ("armenian", \_ -> Lang "hy" "" "" [])
- , ("interlingua", \_ -> Lang "ia" "" "" [])
- , ("indonesian", \_ -> Lang "id" "" "" [])
- , ("icelandic", \_ -> Lang "is" "" "" [])
- , ("italian", \_ -> Lang "it" "" "" [])
- , ("japanese", \_ -> Lang "jp" "" "" [])
- , ("khmer", \_ -> Lang "km" "" "" [])
- , ("kurmanji", \_ -> Lang "kmr" "" "" [])
- , ("kannada", \_ -> Lang "kn" "" "" [])
- , ("korean", \_ -> Lang "ko" "" "" [])
- , ("lao", \_ -> Lang "lo" "" "" [])
- , ("lithuanian", \_ -> Lang "lt" "" "" [])
- , ("latvian", \_ -> Lang "lv" "" "" [])
- , ("malayalam", \_ -> Lang "ml" "" "" [])
- , ("mongolian", \_ -> Lang "mn" "" "" [])
- , ("marathi", \_ -> Lang "mr" "" "" [])
- , ("dutch", \_ -> Lang "nl" "" "" [])
- , ("nynorsk", \_ -> Lang "nn" "" "" [])
- , ("norsk", \_ -> Lang "no" "" "" [])
- , ("nko", \_ -> Lang "nqo" "" "" [])
- , ("occitan", \_ -> Lang "oc" "" "" [])
- , ("panjabi", \_ -> Lang "pa" "" "" [])
- , ("polish", \_ -> Lang "pl" "" "" [])
- , ("piedmontese", \_ -> Lang "pms" "" "" [])
- , ("portuguese", \_ -> Lang "pt" "" "" [])
- , ("romansh", \_ -> Lang "rm" "" "" [])
- , ("romanian", \_ -> Lang "ro" "" "" [])
- , ("russian", \_ -> Lang "ru" "" "" [])
- , ("sanskrit", \_ -> Lang "sa" "" "" [])
- , ("samin", \_ -> Lang "se" "" "" [])
- , ("slovak", \_ -> Lang "sk" "" "" [])
- , ("albanian", \_ -> Lang "sq" "" "" [])
- , ("serbian", \_ -> Lang "sr" "" "" [])
- , ("swedish", \_ -> Lang "sv" "" "" [])
- , ("syriac", \_ -> Lang "syr" "" "" [])
- , ("tamil", \_ -> Lang "ta" "" "" [])
- , ("telugu", \_ -> Lang "te" "" "" [])
- , ("thai", \_ -> Lang "th" "" "" [])
- , ("turkmen", \_ -> Lang "tk" "" "" [])
- , ("turkish", \_ -> Lang "tr" "" "" [])
- , ("ukrainian", \_ -> Lang "uk" "" "" [])
- , ("urdu", \_ -> Lang "ur" "" "" [])
- , ("vietnamese", \_ -> Lang "vi" "" "" [])
- ]
-
-babelLangToBCP47 :: String -> Maybe Lang
-babelLangToBCP47 s =
- case s of
- "austrian" -> Just $ Lang "de" "" "AT" ["1901"]
- "naustrian" -> Just $ Lang "de" "" "AT" []
- "swissgerman" -> Just $ Lang "de" "" "CH" ["1901"]
- "nswissgerman" -> Just $ Lang "de" "" "CH" []
- "german" -> Just $ Lang "de" "" "DE" ["1901"]
- "ngerman" -> Just $ Lang "de" "" "DE" []
- "lowersorbian" -> Just $ Lang "dsb" "" "" []
- "uppersorbian" -> Just $ Lang "hsb" "" "" []
- "polutonikogreek" -> Just $ Lang "el" "" "" ["polyton"]
- "slovene" -> Just $ Lang "sl" "" "" []
- "australian" -> Just $ Lang "en" "" "AU" []
- "canadian" -> Just $ Lang "en" "" "CA" []
- "british" -> Just $ Lang "en" "" "GB" []
- "newzealand" -> Just $ Lang "en" "" "NZ" []
- "american" -> Just $ Lang "en" "" "US" []
- "classiclatin" -> Just $ Lang "la" "" "" ["x-classic"]
- _ -> fmap ($ "") $ M.lookup s polyglossiaLangToBCP47
diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
new file mode 100644
index 000000000..9b57c98fd
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
@@ -0,0 +1,173 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+{-
+Copyright (C) 2018 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.LaTeX.Lang
+ Copyright : Copyright (C) 2018 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Functions for parsing polyglossia and babel language specifiers to
+BCP47 'Lang'.
+-}
+module Text.Pandoc.Readers.LaTeX.Lang
+ ( polyglossiaLangToBCP47
+ , babelLangToBCP47
+ )
+where
+import Prelude
+import qualified Data.Map as M
+import Text.Pandoc.BCP47 (Lang(..))
+
+polyglossiaLangToBCP47 :: M.Map String (String -> Lang)
+polyglossiaLangToBCP47 = M.fromList
+ [ ("arabic", \o -> case filter (/=' ') o of
+ "locale=algeria" -> Lang "ar" "" "DZ" []
+ "locale=mashriq" -> Lang "ar" "" "SY" []
+ "locale=libya" -> Lang "ar" "" "LY" []
+ "locale=morocco" -> Lang "ar" "" "MA" []
+ "locale=mauritania" -> Lang "ar" "" "MR" []
+ "locale=tunisia" -> Lang "ar" "" "TN" []
+ _ -> Lang "ar" "" "" [])
+ , ("german", \o -> case filter (/=' ') o of
+ "spelling=old" -> Lang "de" "" "DE" ["1901"]
+ "variant=austrian,spelling=old"
+ -> Lang "de" "" "AT" ["1901"]
+ "variant=austrian" -> Lang "de" "" "AT" []
+ "variant=swiss,spelling=old"
+ -> Lang "de" "" "CH" ["1901"]
+ "variant=swiss" -> Lang "de" "" "CH" []
+ _ -> Lang "de" "" "" [])
+ , ("lsorbian", \_ -> Lang "dsb" "" "" [])
+ , ("greek", \o -> case filter (/=' ') o of
+ "variant=poly" -> Lang "el" "" "polyton" []
+ "variant=ancient" -> Lang "grc" "" "" []
+ _ -> Lang "el" "" "" [])
+ , ("english", \o -> case filter (/=' ') o of
+ "variant=australian" -> Lang "en" "" "AU" []
+ "variant=canadian" -> Lang "en" "" "CA" []
+ "variant=british" -> Lang "en" "" "GB" []
+ "variant=newzealand" -> Lang "en" "" "NZ" []
+ "variant=american" -> Lang "en" "" "US" []
+ _ -> Lang "en" "" "" [])
+ , ("usorbian", \_ -> Lang "hsb" "" "" [])
+ , ("latin", \o -> case filter (/=' ') o of
+ "variant=classic" -> Lang "la" "" "" ["x-classic"]
+ _ -> Lang "la" "" "" [])
+ , ("slovenian", \_ -> Lang "sl" "" "" [])
+ , ("serbianc", \_ -> Lang "sr" "cyrl" "" [])
+ , ("pinyin", \_ -> Lang "zh" "Latn" "" ["pinyin"])
+ , ("afrikaans", \_ -> Lang "af" "" "" [])
+ , ("amharic", \_ -> Lang "am" "" "" [])
+ , ("assamese", \_ -> Lang "as" "" "" [])
+ , ("asturian", \_ -> Lang "ast" "" "" [])
+ , ("bulgarian", \_ -> Lang "bg" "" "" [])
+ , ("bengali", \_ -> Lang "bn" "" "" [])
+ , ("tibetan", \_ -> Lang "bo" "" "" [])
+ , ("breton", \_ -> Lang "br" "" "" [])
+ , ("catalan", \_ -> Lang "ca" "" "" [])
+ , ("welsh", \_ -> Lang "cy" "" "" [])
+ , ("czech", \_ -> Lang "cs" "" "" [])
+ , ("coptic", \_ -> Lang "cop" "" "" [])
+ , ("danish", \_ -> Lang "da" "" "" [])
+ , ("divehi", \_ -> Lang "dv" "" "" [])
+ , ("esperanto", \_ -> Lang "eo" "" "" [])
+ , ("spanish", \_ -> Lang "es" "" "" [])
+ , ("estonian", \_ -> Lang "et" "" "" [])
+ , ("basque", \_ -> Lang "eu" "" "" [])
+ , ("farsi", \_ -> Lang "fa" "" "" [])
+ , ("finnish", \_ -> Lang "fi" "" "" [])
+ , ("french", \_ -> Lang "fr" "" "" [])
+ , ("friulan", \_ -> Lang "fur" "" "" [])
+ , ("irish", \_ -> Lang "ga" "" "" [])
+ , ("scottish", \_ -> Lang "gd" "" "" [])
+ , ("ethiopic", \_ -> Lang "gez" "" "" [])
+ , ("galician", \_ -> Lang "gl" "" "" [])
+ , ("hebrew", \_ -> Lang "he" "" "" [])
+ , ("hindi", \_ -> Lang "hi" "" "" [])
+ , ("croatian", \_ -> Lang "hr" "" "" [])
+ , ("magyar", \_ -> Lang "hu" "" "" [])
+ , ("armenian", \_ -> Lang "hy" "" "" [])
+ , ("interlingua", \_ -> Lang "ia" "" "" [])
+ , ("indonesian", \_ -> Lang "id" "" "" [])
+ , ("icelandic", \_ -> Lang "is" "" "" [])
+ , ("italian", \_ -> Lang "it" "" "" [])
+ , ("japanese", \_ -> Lang "jp" "" "" [])
+ , ("khmer", \_ -> Lang "km" "" "" [])
+ , ("kurmanji", \_ -> Lang "kmr" "" "" [])
+ , ("kannada", \_ -> Lang "kn" "" "" [])
+ , ("korean", \_ -> Lang "ko" "" "" [])
+ , ("lao", \_ -> Lang "lo" "" "" [])
+ , ("lithuanian", \_ -> Lang "lt" "" "" [])
+ , ("latvian", \_ -> Lang "lv" "" "" [])
+ , ("malayalam", \_ -> Lang "ml" "" "" [])
+ , ("mongolian", \_ -> Lang "mn" "" "" [])
+ , ("marathi", \_ -> Lang "mr" "" "" [])
+ , ("dutch", \_ -> Lang "nl" "" "" [])
+ , ("nynorsk", \_ -> Lang "nn" "" "" [])
+ , ("norsk", \_ -> Lang "no" "" "" [])
+ , ("nko", \_ -> Lang "nqo" "" "" [])
+ , ("occitan", \_ -> Lang "oc" "" "" [])
+ , ("panjabi", \_ -> Lang "pa" "" "" [])
+ , ("polish", \_ -> Lang "pl" "" "" [])
+ , ("piedmontese", \_ -> Lang "pms" "" "" [])
+ , ("portuguese", \_ -> Lang "pt" "" "" [])
+ , ("romansh", \_ -> Lang "rm" "" "" [])
+ , ("romanian", \_ -> Lang "ro" "" "" [])
+ , ("russian", \_ -> Lang "ru" "" "" [])
+ , ("sanskrit", \_ -> Lang "sa" "" "" [])
+ , ("samin", \_ -> Lang "se" "" "" [])
+ , ("slovak", \_ -> Lang "sk" "" "" [])
+ , ("albanian", \_ -> Lang "sq" "" "" [])
+ , ("serbian", \_ -> Lang "sr" "" "" [])
+ , ("swedish", \_ -> Lang "sv" "" "" [])
+ , ("syriac", \_ -> Lang "syr" "" "" [])
+ , ("tamil", \_ -> Lang "ta" "" "" [])
+ , ("telugu", \_ -> Lang "te" "" "" [])
+ , ("thai", \_ -> Lang "th" "" "" [])
+ , ("turkmen", \_ -> Lang "tk" "" "" [])
+ , ("turkish", \_ -> Lang "tr" "" "" [])
+ , ("ukrainian", \_ -> Lang "uk" "" "" [])
+ , ("urdu", \_ -> Lang "ur" "" "" [])
+ , ("vietnamese", \_ -> Lang "vi" "" "" [])
+ ]
+
+babelLangToBCP47 :: String -> Maybe Lang
+babelLangToBCP47 s =
+ case s of
+ "austrian" -> Just $ Lang "de" "" "AT" ["1901"]
+ "naustrian" -> Just $ Lang "de" "" "AT" []
+ "swissgerman" -> Just $ Lang "de" "" "CH" ["1901"]
+ "nswissgerman" -> Just $ Lang "de" "" "CH" []
+ "german" -> Just $ Lang "de" "" "DE" ["1901"]
+ "ngerman" -> Just $ Lang "de" "" "DE" []
+ "lowersorbian" -> Just $ Lang "dsb" "" "" []
+ "uppersorbian" -> Just $ Lang "hsb" "" "" []
+ "polutonikogreek" -> Just $ Lang "el" "" "" ["polyton"]
+ "slovene" -> Just $ Lang "sl" "" "" []
+ "australian" -> Just $ Lang "en" "" "AU" []
+ "canadian" -> Just $ Lang "en" "" "CA" []
+ "british" -> Just $ Lang "en" "" "GB" []
+ "newzealand" -> Just $ Lang "en" "" "NZ" []
+ "american" -> Just $ Lang "en" "" "US" []
+ "classiclatin" -> Just $ Lang "la" "" "" ["x-classic"]
+ _ -> fmap ($ "") $ M.lookup s polyglossiaLangToBCP47
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
new file mode 100644
index 000000000..9256217fe
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -0,0 +1,668 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.LaTeX.Parsing
+ Copyright : Copyright (C) 2006-2018 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+General parsing types and functions for LaTeX.
+-}
+module Text.Pandoc.Readers.LaTeX.Parsing
+ ( DottedNum(..)
+ , renderDottedNum
+ , incrementDottedNum
+ , LaTeXState(..)
+ , defaultLaTeXState
+ , LP
+ , withVerbatimMode
+ , rawLaTeXParser
+ , applyMacros
+ , tokenize
+ , untokenize
+ , untoken
+ , totoks
+ , toksToString
+ , satisfyTok
+ , doMacros
+ , setpos
+ , anyControlSeq
+ , anySymbol
+ , isNewlineTok
+ , isWordTok
+ , isArgTok
+ , spaces
+ , spaces1
+ , tokTypeIn
+ , controlSeq
+ , symbol
+ , symbolIn
+ , sp
+ , whitespace
+ , newlineTok
+ , comment
+ , anyTok
+ , singleChar
+ , specialChars
+ , endline
+ , blankline
+ , primEscape
+ , bgroup
+ , egroup
+ , grouped
+ , braced
+ , braced'
+ , bracedUrl
+ , bracedOrToken
+ , bracketed
+ , bracketedToks
+ , parenWrapped
+ , dimenarg
+ , ignore
+ , withRaw
+ ) where
+
+import Prelude
+import Control.Applicative (many, (<|>))
+import Control.Monad
+import Control.Monad.Except (throwError)
+import Control.Monad.Trans (lift)
+import Data.Char (chr, isAlphaNum, isDigit, isLetter, ord)
+import Data.Default
+import Data.List (intercalate)
+import qualified Data.Map as M
+import qualified Data.Set as Set
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.Pandoc.Builder
+import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Error (PandocError (PandocMacroLoop))
+import Text.Pandoc.Logging
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
+ optional, space, spaces, withRaw, (<|>))
+import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
+ ArgSpec (..), Tok (..), TokType (..))
+import Text.Pandoc.Shared
+import Text.Parsec.Pos
+
+newtype DottedNum = DottedNum [Int]
+ deriving (Show)
+
+renderDottedNum :: DottedNum -> String
+renderDottedNum (DottedNum xs) =
+ intercalate "." (map show xs)
+
+incrementDottedNum :: Int -> DottedNum -> DottedNum
+incrementDottedNum level (DottedNum ns) = DottedNum $
+ case reverse (take level (ns ++ repeat 0)) of
+ (x:xs) -> reverse (x+1 : xs)
+ [] -> [] -- shouldn't happen
+
+data LaTeXState = LaTeXState{ sOptions :: ReaderOptions
+ , sMeta :: Meta
+ , sQuoteContext :: QuoteContext
+ , sMacros :: M.Map Text Macro
+ , sContainers :: [String]
+ , sHeaders :: M.Map Inlines String
+ , sLogMessages :: [LogMessage]
+ , sIdentifiers :: Set.Set String
+ , sVerbatimMode :: Bool
+ , sCaption :: (Maybe Inlines, Maybe String)
+ , sInListItem :: Bool
+ , sInTableCell :: Bool
+ , sLastHeaderNum :: DottedNum
+ , sLastFigureNum :: DottedNum
+ , sLabels :: M.Map String [Inline]
+ , sHasChapters :: Bool
+ , sToggles :: M.Map String Bool
+ }
+ deriving Show
+
+defaultLaTeXState :: LaTeXState
+defaultLaTeXState = LaTeXState{ sOptions = def
+ , sMeta = nullMeta
+ , sQuoteContext = NoQuote
+ , sMacros = M.empty
+ , sContainers = []
+ , sHeaders = M.empty
+ , sLogMessages = []
+ , sIdentifiers = Set.empty
+ , sVerbatimMode = False
+ , sCaption = (Nothing, Nothing)
+ , sInListItem = False
+ , sInTableCell = False
+ , sLastHeaderNum = DottedNum []
+ , sLastFigureNum = DottedNum []
+ , sLabels = M.empty
+ , sHasChapters = False
+ , sToggles = M.empty
+ }
+
+instance PandocMonad m => HasQuoteContext LaTeXState m where
+ getQuoteContext = sQuoteContext <$> getState
+ withQuoteContext context parser = do
+ oldState <- getState
+ let oldQuoteContext = sQuoteContext oldState
+ setState oldState { sQuoteContext = context }
+ result <- parser
+ newState <- getState
+ setState newState { sQuoteContext = oldQuoteContext }
+ return result
+
+instance HasLogMessages LaTeXState where
+ addLogMessage msg st = st{ sLogMessages = msg : sLogMessages st }
+ getLogMessages st = reverse $ sLogMessages st
+
+instance HasIdentifierList LaTeXState where
+ extractIdentifierList = sIdentifiers
+ updateIdentifierList f st = st{ sIdentifiers = f $ sIdentifiers st }
+
+instance HasIncludeFiles LaTeXState where
+ getIncludeFiles = sContainers
+ addIncludeFile f s = s{ sContainers = f : sContainers s }
+ dropLatestIncludeFile s = s { sContainers = drop 1 $ sContainers s }
+
+instance HasHeaderMap LaTeXState where
+ extractHeaderMap = sHeaders
+ updateHeaderMap f st = st{ sHeaders = f $ sHeaders st }
+
+instance HasMacros LaTeXState where
+ extractMacros st = sMacros st
+ updateMacros f st = st{ sMacros = f (sMacros st) }
+
+instance HasReaderOptions LaTeXState where
+ extractReaderOptions = sOptions
+
+instance HasMeta LaTeXState where
+ setMeta field val st =
+ st{ sMeta = setMeta field val $ sMeta st }
+ deleteMeta field st =
+ st{ sMeta = deleteMeta field $ sMeta st }
+
+instance Default LaTeXState where
+ def = defaultLaTeXState
+
+type LP m = ParserT [Tok] LaTeXState m
+
+withVerbatimMode :: PandocMonad m => LP m a -> LP m a
+withVerbatimMode parser = do
+ updateState $ \st -> st{ sVerbatimMode = True }
+ result <- parser
+ updateState $ \st -> st{ sVerbatimMode = False }
+ return result
+
+rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
+ => Bool -> LP m a -> LP m a -> ParserT String s m (a, String)
+rawLaTeXParser retokenize parser valParser = do
+ inp <- getInput
+ let toks = tokenize "source" $ T.pack inp
+ pstate <- getState
+ let lstate = def{ sOptions = extractReaderOptions pstate }
+ let lstate' = lstate { sMacros = extractMacros pstate }
+ let rawparser = (,) <$> withRaw valParser <*> getState
+ res' <- lift $ runParserT (snd <$> withRaw parser) lstate "chunk" toks
+ case res' of
+ Left _ -> mzero
+ Right toks' -> do
+ res <- lift $ runParserT (do when retokenize $ do
+ -- retokenize, applying macros
+ doMacros 0
+ ts <- many (satisfyTok (const True))
+ setInput ts
+ rawparser)
+ lstate' "chunk" toks'
+ case res of
+ Left _ -> mzero
+ Right ((val, raw), st) -> do
+ updateState (updateMacros (sMacros st <>))
+ _ <- takeP (T.length (untokenize toks'))
+ return (val, T.unpack (untokenize raw))
+
+applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
+ => String -> ParserT String s m String
+applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
+ do let retokenize = doMacros 0 *>
+ (toksToString <$> many (satisfyTok (const True)))
+ pstate <- getState
+ let lstate = def{ sOptions = extractReaderOptions pstate
+ , sMacros = extractMacros pstate }
+ res <- runParserT retokenize lstate "math" (tokenize "math" (T.pack s))
+ case res of
+ Left e -> fail (show e)
+ Right s' -> return s'
+tokenize :: SourceName -> Text -> [Tok]
+tokenize sourcename = totoks (initialPos sourcename)
+
+totoks :: SourcePos -> Text -> [Tok]
+totoks pos t =
+ case T.uncons t of
+ Nothing -> []
+ Just (c, rest)
+ | c == '\n' ->
+ Tok pos Newline "\n"
+ : totoks (setSourceColumn (incSourceLine pos 1) 1) rest
+ | isSpaceOrTab c ->
+ let (sps, rest') = T.span isSpaceOrTab t
+ in Tok pos Spaces sps
+ : totoks (incSourceColumn pos (T.length sps))
+ rest'
+ | isAlphaNum c ->
+ let (ws, rest') = T.span isAlphaNum t
+ in Tok pos Word ws
+ : totoks (incSourceColumn pos (T.length ws)) rest'
+ | c == '%' ->
+ let (cs, rest') = T.break (== '\n') rest
+ in Tok pos Comment ("%" <> cs)
+ : totoks (incSourceColumn pos (1 + T.length cs)) rest'
+ | c == '\\' ->
+ case T.uncons rest of
+ Nothing -> [Tok pos (CtrlSeq " ") "\\"]
+ Just (d, rest')
+ | isLetterOrAt d ->
+ -- \makeatletter is common in macro defs;
+ -- ideally we should make tokenization sensitive
+ -- to \makeatletter and \makeatother, but this is
+ -- probably best for now
+ let (ws, rest'') = T.span isLetterOrAt rest
+ (ss, rest''') = T.span isSpaceOrTab rest''
+ in Tok pos (CtrlSeq ws) ("\\" <> ws <> ss)
+ : totoks (incSourceColumn pos
+ (1 + T.length ws + T.length ss)) rest'''
+ | isSpaceOrTab d || d == '\n' ->
+ let (w1, r1) = T.span isSpaceOrTab rest
+ (w2, (w3, r3)) = case T.uncons r1 of
+ Just ('\n', r2)
+ -> (T.pack "\n",
+ T.span isSpaceOrTab r2)
+ _ -> (mempty, (mempty, r1))
+ ws = "\\" <> w1 <> w2 <> w3
+ in case T.uncons r3 of
+ Just ('\n', _) ->
+ Tok pos (CtrlSeq " ") ("\\" <> w1)
+ : totoks (incSourceColumn pos (T.length ws))
+ r1
+ _ ->
+ Tok pos (CtrlSeq " ") ws
+ : totoks (incSourceColumn pos (T.length ws))
+ r3
+ | otherwise ->
+ Tok pos (CtrlSeq (T.singleton d)) (T.pack [c,d])
+ : totoks (incSourceColumn pos 2) rest'
+ | c == '#' ->
+ let (t1, t2) = T.span (\d -> d >= '0' && d <= '9') rest
+ in case safeRead (T.unpack t1) of
+ Just i ->
+ Tok pos (Arg i) ("#" <> t1)
+ : totoks (incSourceColumn pos (1 + T.length t1)) t2
+ Nothing ->
+ Tok pos Symbol "#"
+ : totoks (incSourceColumn pos 1) t2
+ | c == '^' ->
+ case T.uncons rest of
+ Just ('^', rest') ->
+ case T.uncons rest' of
+ Just (d, rest'')
+ | isLowerHex d ->
+ case T.uncons rest'' of
+ Just (e, rest''') | isLowerHex e ->
+ Tok pos Esc2 (T.pack ['^','^',d,e])
+ : totoks (incSourceColumn pos 4) rest'''
+ _ ->
+ Tok pos Esc1 (T.pack ['^','^',d])
+ : totoks (incSourceColumn pos 3) rest''
+ | d < '\128' ->
+ Tok pos Esc1 (T.pack ['^','^',d])
+ : totoks (incSourceColumn pos 3) rest''
+ _ -> Tok pos Symbol "^" :
+ Tok (incSourceColumn pos 1) Symbol "^" :
+ totoks (incSourceColumn pos 2) rest'
+ _ -> Tok pos Symbol "^"
+ : totoks (incSourceColumn pos 1) rest
+ | otherwise ->
+ Tok pos Symbol (T.singleton c) : totoks (incSourceColumn pos 1) rest
+
+isSpaceOrTab :: Char -> Bool
+isSpaceOrTab ' ' = True
+isSpaceOrTab '\t' = True
+isSpaceOrTab _ = False
+
+isLetterOrAt :: Char -> Bool
+isLetterOrAt '@' = True
+isLetterOrAt c = isLetter c
+
+isLowerHex :: Char -> Bool
+isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
+
+untokenize :: [Tok] -> Text
+untokenize = mconcat . map untoken
+
+untoken :: Tok -> Text
+untoken (Tok _ _ t) = t
+
+toksToString :: [Tok] -> String
+toksToString = T.unpack . untokenize
+
+satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok
+satisfyTok f =
+ try $ do
+ res <- tokenPrim (T.unpack . untoken) updatePos matcher
+ doMacros 0 -- apply macros on remaining input stream
+ return res
+ where matcher t | f t = Just t
+ | otherwise = Nothing
+ updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos
+ updatePos _spos _ (Tok pos _ _ : _) = pos
+ updatePos spos _ [] = incSourceColumn spos 1
+
+doMacros :: PandocMonad m => Int -> LP m ()
+doMacros n = do
+ verbatimMode <- sVerbatimMode <$> getState
+ unless verbatimMode $ do
+ inp <- getInput
+ case inp of
+ Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
+ Tok _ Word name : Tok _ Symbol "}" : ts
+ -> handleMacros spos name ts
+ Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
+ Tok _ Word name : Tok _ Symbol "}" : ts
+ -> handleMacros spos ("end" <> name) ts
+ Tok _ (CtrlSeq "expandafter") _ : t : ts
+ -> do setInput ts
+ doMacros n
+ getInput >>= setInput . combineTok t
+ Tok spos (CtrlSeq name) _ : ts
+ -> handleMacros spos name ts
+ _ -> return ()
+ where combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts)
+ | T.all isLetterOrAt w =
+ Tok spos (CtrlSeq (name <> w)) (x1 <> w <> x2) : ts
+ where (x1, x2) = T.break isSpaceOrTab x
+ combineTok t ts = t:ts
+ handleMacros spos name ts = do
+ macros <- sMacros <$> getState
+ case M.lookup name macros of
+ Nothing -> return ()
+ Just (Macro expansionPoint argspecs optarg newtoks) -> do
+ setInput ts
+ let matchTok (Tok _ toktype txt) =
+ satisfyTok (\(Tok _ toktype' txt') ->
+ toktype == toktype' &&
+ txt == txt')
+ let matchPattern toks = try $ mapM_ matchTok toks
+ let getargs argmap [] = return argmap
+ getargs argmap (Pattern toks : rest) = try $ do
+ matchPattern toks
+ getargs argmap rest
+ getargs argmap (ArgNum i : Pattern toks : rest) =
+ try $ do
+ x <- mconcat <$> manyTill
+ (braced <|> ((:[]) <$> anyTok))
+ (matchPattern toks)
+ getargs (M.insert i x argmap) rest
+ getargs argmap (ArgNum i : rest) = do
+ x <- try $ spaces >> bracedOrToken
+ getargs (M.insert i x argmap) rest
+ args <- case optarg of
+ Nothing -> getargs M.empty argspecs
+ Just o -> do
+ x <- option o bracketedToks
+ getargs (M.singleton 1 x) argspecs
+ -- first boolean param is true if we're tokenizing
+ -- an argument (in which case we don't want to
+ -- expand #1 etc.)
+ let addTok False (Tok _ (Arg i) _) acc =
+ case M.lookup i args of
+ Nothing -> mzero
+ Just xs -> foldr (addTok True) acc xs
+ -- see #4007
+ addTok _ (Tok _ (CtrlSeq x) txt)
+ acc@(Tok _ Word _ : _)
+ | not (T.null txt) &&
+ isLetter (T.last txt) =
+ Tok spos (CtrlSeq x) (txt <> " ") : acc
+ addTok _ t acc = setpos spos t : acc
+ ts' <- getInput
+ setInput $ foldr (addTok False) ts' newtoks
+ case expansionPoint of
+ ExpandWhenUsed ->
+ if n > 20 -- detect macro expansion loops
+ then throwError $ PandocMacroLoop (T.unpack name)
+ else doMacros (n + 1)
+ ExpandWhenDefined -> return ()
+
+
+setpos :: SourcePos -> Tok -> Tok
+setpos spos (Tok _ tt txt) = Tok spos tt txt
+
+anyControlSeq :: PandocMonad m => LP m Tok
+anyControlSeq = satisfyTok isCtrlSeq
+
+isCtrlSeq :: Tok -> Bool
+isCtrlSeq (Tok _ (CtrlSeq _) _) = True
+isCtrlSeq _ = False
+
+anySymbol :: PandocMonad m => LP m Tok
+anySymbol = satisfyTok isSymbolTok
+
+isSymbolTok :: Tok -> Bool
+isSymbolTok (Tok _ Symbol _) = True
+isSymbolTok _ = False
+
+isWordTok :: Tok -> Bool
+isWordTok (Tok _ Word _) = True
+isWordTok _ = False
+
+isArgTok :: Tok -> Bool
+isArgTok (Tok _ (Arg _) _) = True
+isArgTok _ = False
+
+spaces :: PandocMonad m => LP m ()
+spaces = skipMany (satisfyTok (tokTypeIn [Comment, Spaces, Newline]))
+
+spaces1 :: PandocMonad m => LP m ()
+spaces1 = skipMany1 (satisfyTok (tokTypeIn [Comment, Spaces, Newline]))
+
+tokTypeIn :: [TokType] -> Tok -> Bool
+tokTypeIn toktypes (Tok _ tt _) = tt `elem` toktypes
+
+controlSeq :: PandocMonad m => Text -> LP m Tok
+controlSeq name = satisfyTok isNamed
+ where isNamed (Tok _ (CtrlSeq n) _) = n == name
+ isNamed _ = False
+
+symbol :: PandocMonad m => Char -> LP m Tok
+symbol c = satisfyTok isc
+ where isc (Tok _ Symbol d) = case T.uncons d of
+ Just (c',_) -> c == c'
+ _ -> False
+ isc _ = False
+
+symbolIn :: PandocMonad m => [Char] -> LP m Tok
+symbolIn cs = satisfyTok isInCs
+ where isInCs (Tok _ Symbol d) = case T.uncons d of
+ Just (c,_) -> c `elem` cs
+ _ -> False
+ isInCs _ = False
+
+sp :: PandocMonad m => LP m ()
+sp = whitespace <|> endline
+
+whitespace :: PandocMonad m => LP m ()
+whitespace = () <$ satisfyTok isSpaceTok
+
+isSpaceTok :: Tok -> Bool
+isSpaceTok (Tok _ Spaces _) = True
+isSpaceTok _ = False
+
+newlineTok :: PandocMonad m => LP m ()
+newlineTok = () <$ satisfyTok isNewlineTok
+
+isNewlineTok :: Tok -> Bool
+isNewlineTok (Tok _ Newline _) = True
+isNewlineTok _ = False
+
+comment :: PandocMonad m => LP m ()
+comment = () <$ satisfyTok isCommentTok
+
+isCommentTok :: Tok -> Bool
+isCommentTok (Tok _ Comment _) = True
+isCommentTok _ = False
+
+anyTok :: PandocMonad m => LP m Tok
+anyTok = satisfyTok (const True)
+
+singleChar :: PandocMonad m => LP m Tok
+singleChar = try $ do
+ Tok pos toktype t <- satisfyTok (tokTypeIn [Word, Symbol])
+ guard $ not $ toktype == Symbol &&
+ T.any (`Set.member` specialChars) t
+ if T.length t > 1
+ then do
+ let (t1, t2) = (T.take 1 t, T.drop 1 t)
+ inp <- getInput
+ setInput $ Tok (incSourceColumn pos 1) toktype t2 : inp
+ return $ Tok pos toktype t1
+ else return $ Tok pos toktype t
+
+specialChars :: Set.Set Char
+specialChars = Set.fromList "#$%&~_^\\{}"
+
+endline :: PandocMonad m => LP m ()
+endline = try $ do
+ newlineTok
+ lookAhead anyTok
+ notFollowedBy blankline
+
+blankline :: PandocMonad m => LP m ()
+blankline = try $ skipMany whitespace *> newlineTok
+
+primEscape :: PandocMonad m => LP m Char
+primEscape = do
+ Tok _ toktype t <- satisfyTok (tokTypeIn [Esc1, Esc2])
+ case toktype of
+ Esc1 -> case T.uncons (T.drop 2 t) of
+ Just (c, _)
+ | c >= '\64' && c <= '\127' -> return (chr (ord c - 64))
+ | otherwise -> return (chr (ord c + 64))
+ Nothing -> fail "Empty content of Esc1"
+ Esc2 -> case safeRead ('0':'x':T.unpack (T.drop 2 t)) of
+ Just x -> return (chr x)
+ Nothing -> fail $ "Could not read: " ++ T.unpack t
+ _ -> fail "Expected an Esc1 or Esc2 token" -- should not happen
+
+bgroup :: PandocMonad m => LP m Tok
+bgroup = try $ do
+ skipMany sp
+ symbol '{' <|> controlSeq "bgroup" <|> controlSeq "begingroup"
+
+egroup :: PandocMonad m => LP m Tok
+egroup = symbol '}' <|> controlSeq "egroup" <|> controlSeq "endgroup"
+
+grouped :: (PandocMonad m, Monoid a) => LP m a -> LP m a
+grouped parser = try $ do
+ bgroup
+ -- first we check for an inner 'grouped', because
+ -- {{a,b}} should be parsed the same as {a,b}
+ try (grouped parser <* egroup) <|> (mconcat <$> manyTill parser egroup)
+
+braced' :: PandocMonad m => LP m Tok -> Int -> LP m [Tok]
+braced' getTok n =
+ handleEgroup <|> handleBgroup <|> handleOther
+ where handleEgroup = do
+ t <- egroup
+ if n == 1
+ then return []
+ else (t:) <$> braced' getTok (n - 1)
+ handleBgroup = do
+ t <- bgroup
+ (t:) <$> braced' getTok (n + 1)
+ handleOther = do
+ t <- getTok
+ (t:) <$> braced' getTok n
+
+braced :: PandocMonad m => LP m [Tok]
+braced = bgroup *> braced' anyTok 1
+
+-- URLs require special handling, because they can contain %
+-- characters. So we retonenize comments as we go...
+bracedUrl :: PandocMonad m => LP m [Tok]
+bracedUrl = bgroup *> braced' (retokenizeComment >> anyTok) 1
+
+-- For handling URLs, which allow literal % characters...
+retokenizeComment :: PandocMonad m => LP m ()
+retokenizeComment = (do
+ Tok pos Comment txt <- satisfyTok isCommentTok
+ let updPos (Tok pos' toktype' txt') =
+ Tok (incSourceColumn (incSourceLine pos' (sourceLine pos - 1))
+ (sourceColumn pos)) toktype' txt'
+ let newtoks = map updPos $ tokenize (sourceName pos) $ T.tail txt
+ getInput >>= setInput . ((Tok pos Symbol "%" : newtoks) ++))
+ <|> return ()
+
+bracedOrToken :: PandocMonad m => LP m [Tok]
+bracedOrToken = braced <|> ((:[]) <$> (anyControlSeq <|> singleChar))
+
+bracketed :: PandocMonad m => Monoid a => LP m a -> LP m a
+bracketed parser = try $ do
+ symbol '['
+ mconcat <$> manyTill parser (symbol ']')
+
+bracketedToks :: PandocMonad m => LP m [Tok]
+bracketedToks = do
+ symbol '['
+ mconcat <$> manyTill (braced <|> (:[]) <$> anyTok) (symbol ']')
+
+parenWrapped :: PandocMonad m => Monoid a => LP m a -> LP m a
+parenWrapped parser = try $ do
+ symbol '('
+ mconcat <$> manyTill parser (symbol ')')
+
+dimenarg :: PandocMonad m => LP m Text
+dimenarg = try $ do
+ ch <- option False $ True <$ symbol '='
+ Tok _ _ s <- satisfyTok isWordTok
+ guard $ T.take 2 (T.reverse s) `elem`
+ ["pt","pc","in","bp","cm","mm","dd","cc","sp"]
+ let num = T.take (T.length s - 2) s
+ guard $ T.length num > 0
+ guard $ T.all isDigit num
+ return $ T.pack ['=' | ch] <> s
+
+ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a
+ignore raw = do
+ pos <- getPosition
+ report $ SkippedContent raw pos
+ return mempty
+
+withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok])
+withRaw parser = do
+ inp <- getInput
+ result <- parser
+ nxt <- option (Tok (initialPos "source") Word "") (lookAhead anyTok)
+ let raw = takeWhile (/= nxt) inp
+ return (result, raw)
diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs
index fa832114b..e3a302d49 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Types.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs
@@ -31,6 +31,7 @@ Types for LaTeX tokens and macros.
module Text.Pandoc.Readers.LaTeX.Types ( Tok(..)
, TokType(..)
, Macro(..)
+ , ArgSpec(..)
, ExpansionPoint(..)
, SourcePos
)
@@ -49,5 +50,8 @@ data Tok = Tok SourcePos TokType Text
data ExpansionPoint = ExpandWhenDefined | ExpandWhenUsed
deriving (Eq, Ord, Show)
-data Macro = Macro ExpansionPoint Int (Maybe [Tok]) [Tok]
+data Macro = Macro ExpansionPoint [ArgSpec] (Maybe [Tok]) [Tok]
+ deriving Show
+
+data ArgSpec = ArgNum Int | Pattern [Tok]
deriving Show
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 156b2b622..d1ea7a1a5 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1,6 +1,7 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE RelaxedPolyRec #-}
{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TupleSections #-}
{-
Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
@@ -31,31 +32,28 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of markdown-formatted plain text to 'Pandoc' document.
-}
-module Text.Pandoc.Readers.Markdown ( readMarkdown ) where
+module Text.Pandoc.Readers.Markdown ( readMarkdown, yamlToMeta ) where
import Prelude
import Control.Monad
import Control.Monad.Except (throwError)
+import qualified Data.ByteString.Lazy as BS
import Data.Char (isAlphaNum, isPunctuation, isSpace, toLower)
-import qualified Data.HashMap.Strict as H
import Data.List (intercalate, sortBy, transpose, elemIndex)
import qualified Data.Map as M
import Data.Maybe
import Data.Ord (comparing)
-import Data.Scientific (base10Exponent, coefficient)
import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as T
-import qualified Data.Vector as V
-import Data.Yaml (ParseException (..), YamlException (..), YamlMark (..))
-import qualified Data.Yaml as Yaml
+import qualified Data.YAML as YAML
import System.FilePath (addExtension, takeExtension)
import Text.HTML.TagSoup
import Text.Pandoc.Builder (Blocks, Inlines)
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad (..), report)
import Text.Pandoc.Definition
-import Text.Pandoc.Emoji (emojis)
+import Text.Pandoc.Emoji (emojiToInline)
import Text.Pandoc.Error
import Text.Pandoc.Logging
import Text.Pandoc.Options
@@ -234,11 +232,9 @@ pandocTitleBlock = try $ do
$ nullMeta
updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
-
yamlMetaBlock :: PandocMonad m => MarkdownParser m (F Blocks)
yamlMetaBlock = try $ do
guardEnabled Ext_yaml_metadata_block
- pos <- getPosition
string "---"
blankline
notFollowedBy blankline -- if --- is followed by a blank it's an HRULE
@@ -246,52 +242,44 @@ yamlMetaBlock = try $ do
-- by including --- and ..., we allow yaml blocks with just comments:
let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."]))
optional blanklines
- case Yaml.decodeEither' $ UTF8.fromString rawYaml of
- Right (Yaml.Object hashmap) -> do
- let alist = H.toList hashmap
- mapM_ (\(k, v) ->
- if ignorable k
- then return ()
- else do
- v' <- yamlToMeta v
- let k' = T.unpack k
- updateState $ \st -> st{ stateMeta' =
- do m <- stateMeta' st
- -- if there's already a value, leave it unchanged
- case lookupMeta k' m of
- Just _ -> return m
- Nothing -> do
- v'' <- v'
- return $ B.setMeta (T.unpack k) v'' m}
- ) alist
- Right Yaml.Null -> return ()
+ newMetaF <- yamlBsToMeta $ UTF8.fromStringLazy rawYaml
+ -- Since `<>` is left-biased, existing values are not touched:
+ updateState $ \st -> st{ stateMeta' = (stateMeta' st) <> newMetaF }
+ return mempty
+
+-- | Read a YAML string and convert it to pandoc metadata.
+-- String scalars in the YAML are parsed as Markdown.
+yamlToMeta :: PandocMonad m => BS.ByteString -> m Meta
+yamlToMeta bstr = do
+ let parser = do
+ meta <- yamlBsToMeta bstr
+ return $ runF meta defaultParserState
+ parsed <- readWithM parser def ""
+ case parsed of
+ Right result -> return result
+ Left e -> throwError e
+
+yamlBsToMeta :: PandocMonad m => BS.ByteString -> MarkdownParser m (F Meta)
+yamlBsToMeta bstr = do
+ pos <- getPosition
+ case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
+ Right ((YAML.Doc (YAML.Mapping _ o)):_) -> (fmap Meta) <$> yamlMap o
+ Right [] -> return . return $ mempty
+ Right [YAML.Doc (YAML.Scalar YAML.SNull)] -> return . return $ mempty
Right _ -> do
- logMessage $
- CouldNotParseYamlMetadata "not an object"
- pos
- return ()
+ logMessage $
+ CouldNotParseYamlMetadata "not an object"
+ pos
+ return . return $ mempty
Left err' -> do
- case err' of
- InvalidYaml (Just YamlParseException{
- yamlProblem = problem
- , yamlContext = _ctxt
- , yamlProblemMark = Yaml.YamlMark {
- yamlLine = yline
- , yamlColumn = ycol
- }}) ->
- logMessage $ CouldNotParseYamlMetadata
- problem (setSourceLine
- (setSourceColumn pos
- (sourceColumn pos + ycol))
- (sourceLine pos + 1 + yline))
- _ -> logMessage $ CouldNotParseYamlMetadata
- (show err') pos
- return ()
- return mempty
+ logMessage $ CouldNotParseYamlMetadata
+ err' pos
+ return . return $ mempty
--- ignore fields ending with _
-ignorable :: Text -> Bool
-ignorable t = (T.pack "_") `T.isSuffixOf` t
+nodeToKey :: Monad m => YAML.Node -> m Text
+nodeToKey (YAML.Scalar (YAML.SStr t)) = return t
+nodeToKey (YAML.Scalar (YAML.SUnknown _ t)) = return t
+nodeToKey _ = fail "Non-string key in YAML mapping"
toMetaValue :: PandocMonad m
=> Text -> MarkdownParser m (F MetaValue)
@@ -312,34 +300,51 @@ toMetaValue x =
-- not end in a newline, but a "block" set off with
-- `|` or `>` will.
-yamlToMeta :: PandocMonad m
- => Yaml.Value -> MarkdownParser m (F MetaValue)
-yamlToMeta (Yaml.String t) = toMetaValue t
-yamlToMeta (Yaml.Number n)
- -- avoid decimal points for numbers that don't need them:
- | base10Exponent n >= 0 = return $ return $ MetaString $ show
- $ coefficient n * (10 ^ base10Exponent n)
- | otherwise = return $ return $ MetaString $ show n
-yamlToMeta (Yaml.Bool b) = return $ return $ MetaBool b
-yamlToMeta (Yaml.Array xs) = do
- xs' <- mapM yamlToMeta (V.toList xs)
+checkBoolean :: Text -> Maybe Bool
+checkBoolean t =
+ if t == T.pack "true" || t == T.pack "True" || t == T.pack "TRUE"
+ then Just True
+ else if t == T.pack "false" || t == T.pack "False" || t == T.pack "FALSE"
+ then Just False
+ else Nothing
+
+yamlToMetaValue :: PandocMonad m
+ => YAML.Node -> MarkdownParser m (F MetaValue)
+yamlToMetaValue (YAML.Scalar x) =
+ case x of
+ YAML.SStr t -> toMetaValue t
+ YAML.SBool b -> return $ return $ MetaBool b
+ YAML.SFloat d -> return $ return $ MetaString (show d)
+ YAML.SInt i -> return $ return $ MetaString (show i)
+ YAML.SUnknown _ t ->
+ case checkBoolean t of
+ Just b -> return $ return $ MetaBool b
+ Nothing -> toMetaValue t
+ YAML.SNull -> return $ return $ MetaString ""
+yamlToMetaValue (YAML.Sequence _ xs) = do
+ xs' <- mapM yamlToMetaValue xs
return $ do
xs'' <- sequence xs'
return $ B.toMetaValue xs''
-yamlToMeta (Yaml.Object o) = do
- let alist = H.toList o
- foldM (\m (k,v) ->
- if ignorable k
- then return m
- else do
- v' <- yamlToMeta v
- return $ do
- MetaMap m' <- m
- v'' <- v'
- return (MetaMap $ M.insert (T.unpack k) v'' m'))
- (return $ MetaMap M.empty)
- alist
-yamlToMeta _ = return $ return $ MetaString ""
+yamlToMetaValue (YAML.Mapping _ o) = fmap B.toMetaValue <$> yamlMap o
+yamlToMetaValue _ = return $ return $ MetaString ""
+
+yamlMap :: PandocMonad m
+ => M.Map YAML.Node YAML.Node
+ -> MarkdownParser m (F (M.Map String MetaValue))
+yamlMap o = do
+ kvs <- forM (M.toList o) $ \(key, v) -> do
+ k <- nodeToKey key
+ return (k, v)
+ let kvs' = filter (not . ignorable . fst) kvs
+ (fmap M.fromList . sequence) <$> mapM toMeta kvs'
+ where
+ ignorable t = (T.pack "_") `T.isSuffixOf` t
+ toMeta (k, v) = do
+ fv <- yamlToMetaValue v
+ return $ do
+ v' <- fv
+ return (T.unpack k, v')
stopLine :: PandocMonad m => MarkdownParser m ()
stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
@@ -966,7 +971,9 @@ orderedList = try $ do
<|> return (style == Example)
items <- fmap sequence $ many1 $ listItem fourSpaceRule
(orderedListStart (Just (style, delim)))
- start' <- (start <$ guardEnabled Ext_startnum) <|> return 1
+ start' <- if style == Example
+ then return start
+ else (start <$ guardEnabled Ext_startnum) <|> return 1
return $ B.orderedListWith (start', style, delim) <$> fmap compactify items
bulletList :: PandocMonad m => MarkdownParser m (F Blocks)
@@ -1142,10 +1149,9 @@ rawTeXBlock :: PandocMonad m => MarkdownParser m (F Blocks)
rawTeXBlock = do
guardEnabled Ext_raw_tex
lookAhead $ try $ char '\\' >> letter
- result <- (B.rawBlock "context" . trim . concat <$>
- many1 ((++) <$> (rawConTeXtEnvironment <|> conTeXtCommand)
- <*> spnl'))
- <|> (B.rawBlock "latex" . trim . concat <$>
+ result <- (B.rawBlock "tex" . trim . concat <$>
+ many1 ((++) <$> rawConTeXtEnvironment <*> spnl'))
+ <|> (B.rawBlock "tex" . trim . concat <$>
many1 ((++) <$> rawLaTeXBlock <*> spnl'))
return $ case B.toList result of
[RawBlock _ cs]
@@ -1153,9 +1159,6 @@ rawTeXBlock = do
-- don't create a raw block for suppressed macro defs
_ -> return result
-conTeXtCommand :: PandocMonad m => MarkdownParser m String
-conTeXtCommand = oneOfStrings ["\\placeformula"]
-
rawHtmlBlocks :: PandocMonad m => MarkdownParser m (F Blocks)
rawHtmlBlocks = do
(TagOpen tagtype _, raw) <- htmlTag isBlockTag
@@ -1591,7 +1594,7 @@ code = try $ do
starts <- many1 (char '`')
skipSpaces
result <- (trim . concat) <$>
- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
+ manyTill (many1 (noneOf "`\n") <|> many1 (char '`') <|>
(char '\n' >> notFollowedBy' blankline >> return " "))
(try (skipSpaces >> count (length starts) (char '`') >>
notFollowedBy (char '`')))
@@ -1877,23 +1880,24 @@ bareURL :: PandocMonad m => MarkdownParser m (F Inlines)
bareURL = try $ do
guardEnabled Ext_autolink_bare_uris
getState >>= guard . stateAllowLinks
- (orig, src) <- uri <|> emailAddress
+ (cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress)
notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a")
- return $ return $ B.link src "" (B.str orig)
+ return $ return $ B.linkWith ("",[cls],[]) src "" (B.str orig)
autoLink :: PandocMonad m => MarkdownParser m (F Inlines)
autoLink = try $ do
getState >>= guard . stateAllowLinks
char '<'
- (orig, src) <- uri <|> emailAddress
+ (cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress)
-- in rare cases, something may remain after the uri parser
-- is finished, because the uri parser tries to avoid parsing
-- final punctuation. for example: in `<http://hi---there>`,
-- the URI parser will stop before the dashes.
extra <- fromEntities <$> manyTill nonspaceChar (char '>')
- attr <- option nullAttr $ try $
+ attr <- option ("", [cls], []) $ try $
guardEnabled Ext_link_attributes >> attributes
- return $ return $ B.linkWith attr (src ++ escapeURI extra) "" (B.str $ orig ++ extra)
+ return $ return $ B.linkWith attr (src ++ escapeURI extra) ""
+ (B.str $ orig ++ extra)
image :: PandocMonad m => MarkdownParser m (F Inlines)
image = try $ do
@@ -2037,9 +2041,9 @@ emoji = try $ do
char ':'
emojikey <- many1 (oneOf emojiChars)
char ':'
- case M.lookup emojikey emojis of
- Just s -> return (return (B.str s))
- Nothing -> mzero
+ case emojiToInline emojikey of
+ Just i -> return (return $ B.singleton i)
+ Nothing -> mzero
-- Citations
diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs
index fe6b3698c..134598c07 100644
--- a/src/Text/Pandoc/Readers/Muse.hs
+++ b/src/Text/Pandoc/Readers/Muse.hs
@@ -35,34 +35,32 @@ TODO:
- Page breaks (five "*")
- Org tables
- table.el tables
-- Images with attributes (floating and width)
- <cite> tag
-}
module Text.Pandoc.Readers.Muse (readMuse) where
import Prelude
import Control.Monad
+import Control.Monad.Reader
import Control.Monad.Except (throwError)
import Data.Bifunctor
-import Data.Char (isLetter)
+import Data.Char (isAlphaNum)
import Data.Default
-import Data.List (stripPrefix, intercalate)
+import Data.List (intercalate)
import Data.List.Split (splitOn)
import qualified Data.Map as M
import qualified Data.Set as Set
-import Data.Maybe (fromMaybe, isNothing)
+import Data.Maybe (fromMaybe, isNothing, maybeToList)
import Data.Text (Text, unpack)
-import System.FilePath (takeExtension)
-import Text.HTML.TagSoup
import Text.Pandoc.Builder (Blocks, Inlines)
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad (..))
import Text.Pandoc.Definition
+import Text.Pandoc.Error (PandocError (PandocParsecError))
import Text.Pandoc.Logging
import Text.Pandoc.Options
-import Text.Pandoc.Parsing hiding (F)
-import Text.Pandoc.Readers.HTML (htmlTag)
-import Text.Pandoc.Shared (crFilter, underlineSpan)
+import Text.Pandoc.Parsing hiding (F, enclosed)
+import Text.Pandoc.Shared (crFilter, underlineSpan, mapLeft)
-- | Read Muse from an input string and return a Pandoc document.
readMuse :: PandocMonad m
@@ -70,7 +68,8 @@ readMuse :: PandocMonad m
-> Text
-> m Pandoc
readMuse opts s = do
- res <- readWithM parseMuse def{ museOptions = opts } (unpack (crFilter s))
+ let input = crFilter s
+ res <- mapLeft (PandocParsecError $ unpack input) `liftM` runReaderT (runParserT parseMuse def{ museOptions = opts } "source" input) def
case res of
Left e -> throwError e
Right d -> return d
@@ -84,7 +83,6 @@ data MuseState = MuseState { museMeta :: F Meta -- ^ Document metadata
, museLastStrPos :: Maybe SourcePos -- ^ Position after last str parsed
, museLogMessages :: [LogMessage]
, museNotes :: M.Map String (SourcePos, F Blocks)
- , museInLink :: Bool -- ^ True when parsing a link description to avoid nested links
, museInPara :: Bool -- ^ True when looking for a paragraph terminator
}
@@ -96,11 +94,17 @@ instance Default MuseState where
, museLastStrPos = Nothing
, museLogMessages = []
, museNotes = M.empty
- , museInLink = False
, museInPara = False
}
-type MuseParser = ParserT String MuseState
+data MuseEnv =
+ MuseEnv { museInLink :: Bool -- ^ True when parsing a link description to avoid nested links
+ }
+
+instance Default MuseEnv where
+ def = MuseEnv { museInLink = False }
+
+type MuseParser m = ParserT Text MuseState (ReaderT MuseEnv m)
instance HasReaderOptions MuseState where
extractReaderOptions = museOptions
@@ -125,11 +129,9 @@ instance HasLogMessages MuseState where
parseMuse :: PandocMonad m => MuseParser m Pandoc
parseMuse = do
many directive
- blocks <- parseBlocks
+ blocks <- (:) <$> parseBlocks <*> many parseSection
st <- getState
- let doc = runF (do Pandoc _ bs <- B.doc <$> blocks
- meta <- museMeta st
- return $ Pandoc meta bs) st
+ let doc = runF (Pandoc <$> museMeta st <*> fmap B.toList (mconcat blocks)) st
reportLogMessages
return doc
@@ -144,9 +146,8 @@ commonPrefix (x:xs) (y:ys)
-- | Trim up to one newline from the beginning of the string.
lchop :: String -> String
-lchop s = case s of
- '\n':ss -> ss
- _ -> s
+lchop ('\n':xs) = xs
+lchop s = s
-- | Trim up to one newline from the end of the string.
rchop :: String -> String
@@ -165,12 +166,19 @@ atStart p = do
guard $ museLastStrPos st /= Just pos
p
+firstColumn :: PandocMonad m => MuseParser m ()
+firstColumn = getPosition >>= \pos -> guard (sourceColumn pos == 1)
+
-- * Parsers
-- | Parse end-of-line, which can be either a newline or end-of-file.
eol :: Stream s m Char => ParserT s st m ()
eol = void newline <|> eof
+getIndent :: PandocMonad m
+ => MuseParser m Int
+getIndent = subtract 1 . sourceColumn <$ many spaceChar <*> getPosition
+
someUntil :: (Stream s m t)
=> ParserT s u m a
-> ParserT s u m b
@@ -179,28 +187,21 @@ someUntil p end = first <$> ((:) <$> p) <*> manyUntil p end
-- ** HTML parsers
--- | Parse HTML tag, returning its attributes and literal contents.
-htmlElement :: PandocMonad m
- => String -- ^ Tag name
- -> MuseParser m (Attr, String)
-htmlElement tag = try $ do
- (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag [])
- content <- manyTill anyChar endtag
- return (htmlAttrToPandoc attr, content)
+openTag :: PandocMonad m => String -> MuseParser m [(String, String)]
+openTag tag = try $
+ char '<' *> string tag *> manyTill attr (char '>')
where
- endtag = void $ htmlTag (~== TagClose tag)
+ attr = try $ (,)
+ <$ many1 spaceChar
+ <*> many1 (noneOf "=\n")
+ <* string "=\""
+ <*> manyTill (noneOf "\"") (char '"')
-htmlBlock :: PandocMonad m
- => String -- ^ Tag name
- -> MuseParser m (Attr, String)
-htmlBlock tag = try $ do
- many spaceChar
- res <- htmlElement tag
- manyTill spaceChar eol
- return res
+closeTag :: PandocMonad m => String -> MuseParser m ()
+closeTag tag = try $ string "</" *> string tag *> void (char '>')
-- | Convert HTML attributes to Pandoc 'Attr'
-htmlAttrToPandoc :: [Attribute String] -> Attr
+htmlAttrToPandoc :: [(String, String)] -> Attr
htmlAttrToPandoc attrs = (ident, classes, keyvals)
where
ident = fromMaybe "" $ lookup "id" attrs
@@ -211,15 +212,12 @@ parseHtmlContent :: PandocMonad m
=> String -- ^ Tag name
-> MuseParser m (Attr, F Blocks)
parseHtmlContent tag = try $ do
- many spaceChar
- pos <- getPosition
- (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag [])
+ indent <- getIndent
+ attr <- openTag tag
manyTill spaceChar eol
- content <- parseBlocksTill $ try $ count (sourceColumn pos - 1) spaceChar >> endtag
+ content <- parseBlocksTill $ try $ count indent spaceChar *> closeTag tag
manyTill spaceChar eol -- closing tag must be followed by optional whitespace and newline
return (htmlAttrToPandoc attr, content)
- where
- endtag = void $ htmlTag (~== TagClose tag)
-- ** Directive parsers
@@ -228,21 +226,19 @@ parseDirectiveKey :: PandocMonad m => MuseParser m String
parseDirectiveKey = char '#' *> many (letter <|> char '-')
parseEmacsDirective :: PandocMonad m => MuseParser m (String, F Inlines)
-parseEmacsDirective = do
- key <- parseDirectiveKey
- spaceChar
- value <- trimInlinesF . mconcat <$> manyTill (choice inlineList) eol
- return (key, value)
+parseEmacsDirective = (,)
+ <$> parseDirectiveKey
+ <* spaceChar
+ <*> (trimInlinesF . mconcat <$> manyTill inline' eol)
parseAmuseDirective :: PandocMonad m => MuseParser m (String, F Inlines)
-parseAmuseDirective = do
- key <- parseDirectiveKey
- many1 spaceChar
- value <- trimInlinesF . mconcat <$> many1Till inline endOfDirective
- many blankline
- return (key, value)
+parseAmuseDirective = (,)
+ <$> parseDirectiveKey
+ <* many1 spaceChar
+ <*> (trimInlinesF . mconcat <$> many1Till inline endOfDirective)
+ <* many blankline
where
- endOfDirective = lookAhead $ eof <|> try (newline >> (void blankline <|> void parseDirectiveKey))
+ endOfDirective = lookAhead $ eof <|> try (newline *> (void blankline <|> void parseDirectiveKey))
directive :: PandocMonad m => MuseParser m ()
directive = do
@@ -254,17 +250,20 @@ directive = do
-- ** Block parsers
+-- | Parse section contents until EOF or next header
parseBlocks :: PandocMonad m
=> MuseParser m (F Blocks)
parseBlocks =
try (parseEnd <|>
+ nextSection <|>
blockStart <|>
listStart <|>
paraStart)
where
+ nextSection = mempty <$ lookAhead headingStart
parseEnd = mempty <$ eof
- blockStart = ((B.<>) <$> (emacsHeading <|> blockElements <|> emacsNoteBlock)
- <*> parseBlocks) <|> (uncurry (B.<>) <$> amuseHeadingUntil parseBlocks)
+ blockStart = (B.<>) <$> (blockElements <|> emacsNoteBlock)
+ <*> parseBlocks
listStart = do
updateState (\st -> st { museInPara = False })
uncurry (B.<>) <$> (anyListUntil parseBlocks <|> amuseNoteBlockUntil parseBlocks)
@@ -273,6 +272,13 @@ parseBlocks =
uncurry (B.<>) . first (p indent) <$> paraUntil parseBlocks
where p indent = if indent >= 2 && indent < 6 then fmap B.blockQuote else id
+-- | Parse section that starts with a header
+parseSection :: PandocMonad m
+ => MuseParser m (F Blocks)
+parseSection =
+ ((B.<>) <$> emacsHeading <*> parseBlocks) <|>
+ (uncurry (B.<>) <$> amuseHeadingUntil parseBlocks)
+
parseBlocksTill :: PandocMonad m
=> MuseParser m a
-> MuseParser m (F Blocks)
@@ -347,31 +353,32 @@ blockElements = do
-- | Parse a line comment, starting with @;@ in the first column.
comment :: PandocMonad m => MuseParser m (F Blocks)
-comment = try $ do
- getPosition >>= \pos -> guard (sourceColumn pos == 1)
- char ';'
- optional (spaceChar >> many (noneOf "\n"))
- eol
- return mempty
+comment = try $ mempty
+ <$ firstColumn
+ <* char ';'
+ <* optional (spaceChar *> many (noneOf "\n"))
+ <* eol
-- | Parse a horizontal rule, consisting of 4 or more @\'-\'@ characters.
separator :: PandocMonad m => MuseParser m (F Blocks)
-separator = try $ do
- string "----"
- many $ char '-'
- many spaceChar
- eol
- return $ return B.horizontalRule
+separator = try $ pure B.horizontalRule
+ <$ string "----"
+ <* many (char '-')
+ <* many spaceChar
+ <* eol
+
+headingStart :: PandocMonad m => MuseParser m (String, Int)
+headingStart = try $ (,)
+ <$> option "" (try (parseAnchor <* manyTill spaceChar eol))
+ <* firstColumn
+ <*> fmap length (many1 $ char '*')
+ <* spaceChar
-- | Parse a single-line heading.
emacsHeading :: PandocMonad m => MuseParser m (F Blocks)
emacsHeading = try $ do
guardDisabled Ext_amuse
- anchorId <- option "" $ try (parseAnchor <* manyTill spaceChar eol)
- getPosition >>= \pos -> guard (sourceColumn pos == 1)
- level <- fmap length $ many1 $ char '*'
- guard $ level <= 5
- spaceChar
+ (anchorId, level) <- headingStart
content <- trimInlinesF . mconcat <$> manyTill inline eol
attr <- registerHeader (anchorId, [], []) (runF content def)
return $ B.headerWith attr level <$> content
@@ -383,11 +390,7 @@ amuseHeadingUntil :: PandocMonad m
-> MuseParser m (F Blocks, a)
amuseHeadingUntil end = try $ do
guardEnabled Ext_amuse
- anchorId <- option "" $ try (parseAnchor <* manyTill spaceChar eol)
- getPosition >>= \pos -> guard (sourceColumn pos == 1)
- level <- fmap length $ many1 $ char '*'
- guard $ level <= 5
- spaceChar
+ (anchorId, level) <- headingStart
(content, e) <- paraContentsUntil end
attr <- registerHeader (anchorId, [], []) (runF content def)
return (B.headerWith attr level <$> content, e)
@@ -395,33 +398,28 @@ amuseHeadingUntil end = try $ do
-- | Parse an example between @{{{@ and @}}}@.
-- It is an Amusewiki extension influenced by Creole wiki, as described in @Text::Amuse@ documentation.
example :: PandocMonad m => MuseParser m (F Blocks)
-example = try $ do
- string "{{{"
- optional blankline
- contents <- manyTill anyChar $ try (optional blankline >> string "}}}")
- return $ return $ B.codeBlock contents
+example = try $ pure . B.codeBlock
+ <$ string "{{{"
+ <* optional blankline
+ <*> manyTill anyChar (try (optional blankline *> string "}}}"))
-- | Parse an @\<example>@ tag.
exampleTag :: PandocMonad m => MuseParser m (F Blocks)
-exampleTag = try $ do
- (attr, contents) <- htmlBlock "example"
- return $ return $ B.codeBlockWith attr $ rchop $ intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop contents
+exampleTag = try $ fmap pure $ B.codeBlockWith
+ <$ many spaceChar
+ <*> (htmlAttrToPandoc <$> openTag "example")
+ <*> (rchop . intercalate "\n" . dropSpacePrefix . splitOn "\n" . lchop <$> manyTill anyChar (closeTag "example"))
+ <* manyTill spaceChar eol
-- | Parse a @\<literal>@ tag as a raw block.
-- For 'RawInline' @\<literal>@ parser, see 'inlineLiteralTag'.
literalTag :: PandocMonad m => MuseParser m (F Blocks)
-literalTag = try $ do
- many spaceChar
- (TagOpen _ attr, _) <- htmlTag (~== TagOpen "literal" [])
- manyTill spaceChar eol
- content <- manyTill anyChar endtag
- manyTill spaceChar eol
- return $ return $ rawBlock (htmlAttrToPandoc attr, content)
- where
- endtag = void $ htmlTag (~== TagClose "literal")
- -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML
- format (_, _, kvs) = fromMaybe "html" $ lookup "style" kvs
- rawBlock (attrs, content) = B.rawBlock (format attrs) $ rchop $ intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop content
+literalTag = try $ fmap pure $ B.rawBlock
+ <$ many spaceChar
+ <*> (fromMaybe "html" . lookup "style" <$> openTag "literal") -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML
+ <* manyTill spaceChar eol
+ <*> (rchop . intercalate "\n" . dropSpacePrefix . splitOn "\n" . lchop <$> manyTill anyChar (closeTag "literal"))
+ <* manyTill spaceChar eol
-- | Parse @\<center>@ tag.
-- Currently it is ignored as Pandoc cannot represent centered blocks.
@@ -459,25 +457,27 @@ playTag = do
fmap (B.divWith ("", ["play"], [])) . snd <$> parseHtmlContent "play"
verseLine :: PandocMonad m => MuseParser m (F Inlines)
-verseLine = do
- indent <- (B.str <$> many1 (char ' ' >> pure '\160')) <|> pure mempty
- rest <- manyTill (choice inlineList) newline
- return $ trimInlinesF $ mconcat (pure indent : rest)
-
-verseLines :: PandocMonad m => MuseParser m (F Blocks)
-verseLines = do
- lns <- many verseLine
- return $ B.lineBlock <$> sequence lns
+verseLine = (<>)
+ <$> fmap pure (option mempty (B.str <$> many1 ('\160' <$ char ' ')))
+ <*> fmap (trimInlinesF . mconcat) (manyTill inline' eol)
-- | Parse @\<verse>@ tag.
verseTag :: PandocMonad m => MuseParser m (F Blocks)
-verseTag = do
- (_, content) <- htmlBlock "verse"
- parseFromString verseLines (intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop content)
+verseTag = try $ do
+ indent <- getIndent
+ openTag "verse"
+ manyTill spaceChar eol
+ content <- sequence <$> manyTill (count indent spaceChar *> verseLine) (try $ count indent spaceChar *> closeTag "verse")
+ manyTill spaceChar eol
+ return $ B.lineBlock <$> content
-- | Parse @\<comment>@ tag.
commentTag :: PandocMonad m => MuseParser m (F Blocks)
-commentTag = htmlBlock "comment" >> return mempty
+commentTag = try $ mempty
+ <$ many spaceChar
+ <* openTag "comment"
+ <* manyTill anyChar (closeTag "comment")
+ <* manyTill spaceChar eol
-- | Parse paragraph contents.
paraContentsUntil :: PandocMonad m
@@ -485,7 +485,7 @@ paraContentsUntil :: PandocMonad m
-> MuseParser m (F Inlines, a)
paraContentsUntil end = do
updateState (\st -> st { museInPara = True })
- (l, e) <- someUntil inline $ try (manyTill spaceChar eol >> end)
+ (l, e) <- someUntil inline $ try (manyTill spaceChar eol *> end)
updateState (\st -> st { museInPara = False })
return (trimInlinesF $ mconcat l, e)
@@ -499,9 +499,10 @@ paraUntil end = do
first (fmap B.para) <$> paraContentsUntil end
noteMarker :: PandocMonad m => MuseParser m String
-noteMarker = try $ do
- char '['
- (:) <$> oneOf "123456789" <*> manyTill digit (char ']')
+noteMarker = try $ (:)
+ <$ char '['
+ <*> oneOf "123456789"
+ <*> manyTill digit (char ']')
-- Amusewiki version of note
-- Parsing is similar to list item, except that note marker is used instead of list marker
@@ -541,27 +542,15 @@ emacsNoteBlock = try $ do
-- Verse markup
--
-lineVerseLine :: PandocMonad m => MuseParser m (F Inlines)
-lineVerseLine = try $ do
- string "> "
- indent <- many (char ' ' >> pure '\160')
- let indentEl = if null indent then mempty else B.str indent
- rest <- manyTill (choice inlineList) eol
- return $ trimInlinesF $ mconcat (pure indentEl : rest)
-
-blanklineVerseLine :: PandocMonad m => MuseParser m (F Inlines)
-blanklineVerseLine = try $ do
- char '>'
- blankline
- pure mempty
-
-- | Parse a line block indicated by @\'>\'@ characters.
lineBlock :: PandocMonad m => MuseParser m (F Blocks)
lineBlock = try $ do
- many spaceChar
- col <- sourceColumn <$> getPosition
- lns <- (blanklineVerseLine <|> lineVerseLine) `sepBy1'` try (indentWith (col - 1))
+ indent <- getIndent
+ lns <- (blankVerseLine <|> nonblankVerseLine) `sepBy1'` try (indentWith indent)
return $ B.lineBlock <$> sequence lns
+ where
+ blankVerseLine = try $ mempty <$ char '>' <* blankline
+ nonblankVerseLine = try (string "> ") *> verseLine
-- *** List parsers
@@ -573,7 +562,7 @@ bulletListItemsUntil indent end = try $ do
char '-'
void spaceChar <|> lookAhead eol
updateState (\st -> st { museInPara = False })
- (x, (xs, e)) <- listItemContentsUntil (indent + 2) (try (optional blankline >> indentWith indent >> bulletListItemsUntil indent end)) (([],) <$> end)
+ (x, (xs, e)) <- listItemContentsUntil (indent + 2) (try (optional blankline *> indentWith indent *> bulletListItemsUntil indent end)) (([],) <$> end)
return (x:xs, e)
-- | Parse a bullet list.
@@ -581,19 +570,9 @@ bulletListUntil :: PandocMonad m
=> MuseParser m a
-> MuseParser m (F Blocks, a)
bulletListUntil end = try $ do
- many spaceChar
- pos <- getPosition
- let indent = sourceColumn pos - 1
+ indent <- getIndent
guard $ indent /= 0
- (items, e) <- bulletListItemsUntil indent end
- return (B.bulletList <$> sequence items, e)
-
--- | Parses an ordered list marker and returns list attributes.
-anyMuseOrderedListMarker :: PandocMonad m => MuseParser m ListAttributes
-anyMuseOrderedListMarker = do
- (style, start) <- decimal <|> lowerRoman <|> upperRoman <|> lowerAlpha <|> upperAlpha
- char '.'
- return (start, style, Period)
+ first (fmap B.bulletList . sequence) <$> bulletListItemsUntil indent end
museOrderedListMarker :: PandocMonad m
=> ListNumberStyle
@@ -620,7 +599,7 @@ orderedListItemsUntil indent style end =
pos <- getPosition
void spaceChar <|> lookAhead eol
updateState (\st -> st { museInPara = False })
- (x, (xs, e)) <- listItemContentsUntil (sourceColumn pos) (try (optional blankline >> indentWith indent >> museOrderedListMarker style >> continuation)) (([],) <$> end)
+ (x, (xs, e)) <- listItemContentsUntil (sourceColumn pos) (try (optional blankline *> indentWith indent *> museOrderedListMarker style *> continuation)) (([],) <$> end)
return (x:xs, e)
-- | Parse an ordered list.
@@ -628,14 +607,12 @@ orderedListUntil :: PandocMonad m
=> MuseParser m a
-> MuseParser m (F Blocks, a)
orderedListUntil end = try $ do
- many spaceChar
- pos <- getPosition
- let indent = sourceColumn pos - 1
+ indent <- getIndent
guard $ indent /= 0
- p@(_, style, _) <- anyMuseOrderedListMarker
- guard $ style `elem` [Decimal, LowerAlpha, UpperAlpha, LowerRoman, UpperRoman]
- (items, e) <- orderedListItemsUntil indent style end
- return (B.orderedListWith p <$> sequence items, e)
+ (style, start) <- decimal <|> lowerRoman <|> upperRoman <|> lowerAlpha <|> upperAlpha
+ char '.'
+ first (fmap (B.orderedListWith (start, style, Period)) . sequence)
+ <$> orderedListItemsUntil indent style end
descriptionsUntil :: PandocMonad m
=> Int
@@ -644,7 +621,7 @@ descriptionsUntil :: PandocMonad m
descriptionsUntil indent end = do
void spaceChar <|> lookAhead eol
updateState (\st -> st { museInPara = False })
- (x, (xs, e)) <- listItemContentsUntil indent (try (optional blankline >> indentWith indent >> manyTill spaceChar (string "::") >> descriptionsUntil indent end)) (([],) <$> end)
+ (x, (xs, e)) <- listItemContentsUntil indent (try (optional blankline *> indentWith indent *> manyTill spaceChar (string "::") *> descriptionsUntil indent end)) (([],) <$> end)
return (x:xs, e)
definitionListItemsUntil :: PandocMonad m
@@ -656,8 +633,8 @@ definitionListItemsUntil indent end =
where
continuation = try $ do
pos <- getPosition
- term <- trimInlinesF . mconcat <$> manyTill (choice inlineList) (try $ string "::")
- (x, (xs, e)) <- descriptionsUntil (sourceColumn pos) (try (optional blankline >> indentWith indent >> continuation) <|> (([],) <$> end))
+ term <- trimInlinesF . mconcat <$> manyTill inline' (try $ string "::")
+ (x, (xs, e)) <- descriptionsUntil (sourceColumn pos) (try (optional blankline *> indentWith indent *> continuation) <|> (([],) <$> end))
let xx = (,) <$> term <*> sequence x
return (xx:xs, e)
@@ -666,9 +643,7 @@ definitionListUntil :: PandocMonad m
=> MuseParser m a -- ^ Terminator parser
-> MuseParser m (F Blocks, a)
definitionListUntil end = try $ do
- many spaceChar
- pos <- getPosition
- let indent = sourceColumn pos - 1
+ indent <- getIndent
guardDisabled Ext_amuse <|> guard (indent /= 0) -- Initial space is required by Amusewiki, but not Emacs Muse
first (fmap B.definitionList . sequence) <$> definitionListItemsUntil indent end
@@ -713,7 +688,7 @@ museAppendElement element tbl =
tableCell :: PandocMonad m => MuseParser m (F Blocks)
tableCell = try $ fmap B.plain . trimInlinesF . mconcat <$> manyTill inline (lookAhead cellEnd)
- where cellEnd = try $ void (many1 spaceChar >> char '|') <|> eol
+ where cellEnd = try $ void (many1 spaceChar *> char '|') <|> eol
tableElements :: PandocMonad m => MuseParser m (F [MuseTableElement])
tableElements = sequence <$> (tableParseElement `sepEndBy1` eol)
@@ -735,11 +710,10 @@ tableParseElement = tableParseHeader
tableParseRow :: PandocMonad m
=> Int -- ^ Number of separator characters
-> MuseParser m (F [Blocks])
-tableParseRow n = try $ do
- fields <- tableCell `sepBy2` fieldSep
- return $ sequence fields
- where p `sepBy2` sep = (:) <$> p <*> many1 (sep >> p)
- fieldSep = many1 spaceChar >> count n (char '|') >> (void (many1 spaceChar) <|> void (lookAhead newline))
+tableParseRow n = try $
+ sequence <$> (tableCell `sepBy2` fieldSep)
+ where p `sepBy2` sep = (:) <$> p <*> many1 (sep *> p)
+ fieldSep = many1 spaceChar *> count n (char '|') *> (void (many1 spaceChar) <|> void (lookAhead newline))
-- | Parse a table header row.
tableParseHeader :: PandocMonad m => MuseParser m (F MuseTableElement)
@@ -755,53 +729,51 @@ tableParseFooter = fmap MuseFooterRow <$> tableParseRow 3
-- | Parse table caption.
tableParseCaption :: PandocMonad m => MuseParser m (F MuseTableElement)
-tableParseCaption = try $ do
- many spaceChar
- string "|+"
- fmap MuseCaption <$> (trimInlinesF . mconcat <$> many1Till inline (string "+|"))
+tableParseCaption = try $ fmap MuseCaption . trimInlinesF . mconcat
+ <$ many spaceChar
+ <* string "|+"
+ <*> many1Till inline (try $ string "+|")
-- ** Inline parsers
-inlineList :: PandocMonad m => [MuseParser m (F Inlines)]
-inlineList = [ whitespace
- , br
- , anchor
- , footnote
- , strong
- , strongTag
- , emph
- , emphTag
- , underlined
- , superscriptTag
- , subscriptTag
- , strikeoutTag
- , verbatimTag
- , classTag
- , nbsp
- , link
- , code
- , codeTag
- , mathTag
- , inlineLiteralTag
- , str
- , symbol
- ]
+inline' :: PandocMonad m => MuseParser m (F Inlines)
+inline' = whitespace
+ <|> br
+ <|> anchor
+ <|> footnote
+ <|> strong
+ <|> strongTag
+ <|> emph
+ <|> emphTag
+ <|> underlined
+ <|> superscriptTag
+ <|> subscriptTag
+ <|> strikeoutTag
+ <|> verbatimTag
+ <|> classTag
+ <|> nbsp
+ <|> linkOrImage
+ <|> code
+ <|> codeTag
+ <|> mathTag
+ <|> inlineLiteralTag
+ <|> str
+ <|> symbol
+ <?> "inline"
inline :: PandocMonad m => MuseParser m (F Inlines)
-inline = endline <|> choice inlineList <?> "inline"
+inline = endline <|> inline'
-- | Parse a soft break.
endline :: PandocMonad m => MuseParser m (F Inlines)
-endline = try $ do
- newline
- notFollowedBy blankline
- return $ return B.softbreak
+endline = try $ pure B.softbreak <$ newline <* notFollowedBy blankline
parseAnchor :: PandocMonad m => MuseParser m String
-parseAnchor = try $ do
- getPosition >>= \pos -> guard (sourceColumn pos == 1)
- char '#'
- (:) <$> letter <*> many (letter <|> digit <|> char '-')
+parseAnchor = try $ (:)
+ <$ firstColumn
+ <* char '#'
+ <*> letter
+ <*> many (letter <|> digit <|> char '-')
anchor :: PandocMonad m => MuseParser m (F Inlines)
anchor = try $ do
@@ -812,7 +784,7 @@ anchor = try $ do
-- | Parse a footnote reference.
footnote :: PandocMonad m => MuseParser m (F Inlines)
footnote = try $ do
- inLink <- museInLink <$> getState
+ inLink <- asks museInLink
guard $ not inLink
ref <- noteMarker
return $ do
@@ -825,33 +797,38 @@ footnote = try $ do
return $ B.note contents'
whitespace :: PandocMonad m => MuseParser m (F Inlines)
-whitespace = try $ do
- skipMany1 spaceChar
- return $ return B.space
+whitespace = try $ pure B.space <$ skipMany1 spaceChar
-- | Parse @\<br>@ tag.
br :: PandocMonad m => MuseParser m (F Inlines)
-br = try $ do
- string "<br>"
- return $ return B.linebreak
+br = try $ pure B.linebreak <$ string "<br>"
emphasisBetween :: (PandocMonad m, Show a) => MuseParser m a -> MuseParser m (F Inlines)
emphasisBetween c = try $ enclosedInlines c c
+-- | Parses material enclosed between start and end parsers.
+enclosed :: (Show end, Stream s m Char) => ParserT s st m t -- ^ start parser
+ -> ParserT s st m end -- ^ end parser
+ -> ParserT s st m a -- ^ content parser (to be used repeatedly)
+ -> ParserT s st m [a]
+enclosed start end parser = try $
+ start *> notFollowedBy spaceChar *> many1Till parser end
+
enclosedInlines :: (PandocMonad m, Show a, Show b)
=> MuseParser m a
-> MuseParser m b
-> MuseParser m (F Inlines)
-enclosedInlines start end = try $
- trimInlinesF . mconcat <$> (enclosed (atStart start) end inline <* notFollowedBy (satisfy isLetter))
+enclosedInlines start end = try $ trimInlinesF . mconcat
+ <$> enclosed (atStart start) end inline
+ <* notFollowedBy (satisfy isAlphaNum)
-- | Parse an inline tag, such as @\<em>@ and @\<strong>@.
inlineTag :: PandocMonad m
=> String -- ^ Tag name
-> MuseParser m (F Inlines)
-inlineTag tag = try $ do
- htmlTag (~== TagOpen tag [])
- mconcat <$> manyTill inline (void $ htmlTag (~== TagClose tag))
+inlineTag tag = try $ mconcat
+ <$ openTag tag
+ <*> manyTill inline (closeTag tag)
-- | Parse strong inline markup, indicated by @**@.
strong :: PandocMonad m => MuseParser m (F Inlines)
@@ -864,9 +841,9 @@ emph = fmap B.emph <$> emphasisBetween (char '*')
-- | Parse underline inline markup, indicated by @_@.
-- Supported only in Emacs Muse mode, not Text::Amuse.
underlined :: PandocMonad m => MuseParser m (F Inlines)
-underlined = do
- guardDisabled Ext_amuse -- Supported only by Emacs Muse
- fmap underlineSpan <$> emphasisBetween (char '_')
+underlined = fmap underlineSpan
+ <$ guardDisabled Ext_amuse -- Supported only by Emacs Muse
+ <*> emphasisBetween (char '_')
-- | Parse @\<strong>@ tag.
strongTag :: PandocMonad m => MuseParser m (F Inlines)
@@ -890,21 +867,20 @@ strikeoutTag = fmap B.strikeout <$> inlineTag "del"
-- | Parse @\<verbatim>@ tag.
verbatimTag :: PandocMonad m => MuseParser m (F Inlines)
-verbatimTag = return . B.text . snd <$> htmlElement "verbatim"
+verbatimTag = return . B.text
+ <$ openTag "verbatim"
+ <*> manyTill anyChar (closeTag "verbatim")
-- | Parse @\<class>@ tag.
classTag :: PandocMonad m => MuseParser m (F Inlines)
classTag = do
- (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "class" [])
- res <- manyTill inline (void $ htmlTag (~== TagClose "class"))
- let classes = maybe [] words $ lookup "name" attrs
+ classes <- maybe [] words . lookup "name" <$> openTag "class"
+ res <- manyTill inline $ closeTag "class"
return $ B.spanWith ("", classes, []) <$> mconcat res
-- | Parse "~~" as nonbreaking space.
nbsp :: PandocMonad m => MuseParser m (F Inlines)
-nbsp = try $ do
- string "~~"
- return $ return $ B.str "\160"
+nbsp = try $ pure (B.str "\160") <$ string "~~"
-- | Parse code markup, indicated by @\'=\'@ characters.
code :: PandocMonad m => MuseParser m (F Inlines)
@@ -914,26 +890,27 @@ code = try $ do
guard $ not $ null contents
guard $ head contents `notElem` " \t\n"
guard $ last contents `notElem` " \t\n"
- notFollowedBy $ satisfy isLetter
+ notFollowedBy $ satisfy isAlphaNum
return $ return $ B.code contents
-- | Parse @\<code>@ tag.
codeTag :: PandocMonad m => MuseParser m (F Inlines)
-codeTag = return . uncurry B.codeWith <$> htmlElement "code"
+codeTag = fmap pure $ B.codeWith
+ <$> (htmlAttrToPandoc <$> openTag "code")
+ <*> manyTill anyChar (closeTag "code")
-- | Parse @\<math>@ tag.
-- @\<math>@ tag is an Emacs Muse extension enabled by @(require 'muse-latex2png)@
mathTag :: PandocMonad m => MuseParser m (F Inlines)
-mathTag = return . B.math . snd <$> htmlElement "math"
+mathTag = return . B.math
+ <$ openTag "math"
+ <*> manyTill anyChar (closeTag "math")
-- | Parse inline @\<literal>@ tag as a raw inline.
inlineLiteralTag :: PandocMonad m => MuseParser m (F Inlines)
-inlineLiteralTag =
- (return . rawInline) <$> htmlElement "literal"
- where
- -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML
- format (_, _, kvs) = fromMaybe "html" $ lookup "style" kvs
- rawInline (attrs, content) = B.rawInline (format attrs) content
+inlineLiteralTag = try $ fmap pure $ B.rawInline
+ <$> (fromMaybe "html" . lookup "style" <$> openTag "literal") -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML
+ <*> manyTill anyChar (closeTag "literal")
str :: PandocMonad m => MuseParser m (F Inlines)
str = return . B.str <$> many1 alphaNum <* updateLastStrPos
@@ -942,29 +919,58 @@ symbol :: PandocMonad m => MuseParser m (F Inlines)
symbol = return . B.str <$> count 1 nonspaceChar
-- | Parse a link or image.
-link :: PandocMonad m => MuseParser m (F Inlines)
-link = try $ do
- st <- getState
- guard $ not $ museInLink st
- setState $ st{ museInLink = True }
- (url, content) <- linkText
- updateState (\state -> state { museInLink = False })
- return $ case stripPrefix "URL:" url of
- Nothing -> if isImageUrl url
- then B.image url "" <$> fromMaybe (return mempty) content
- else B.link url "" <$> fromMaybe (return $ B.str url) content
- Just url' -> B.link url' "" <$> fromMaybe (return $ B.str url') content
- where -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el
- imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"]
- isImageUrl = (`elem` imageExtensions) . takeExtension
+linkOrImage :: PandocMonad m => MuseParser m (F Inlines)
+linkOrImage = try $ do
+ inLink <- asks museInLink
+ guard $ not inLink
+ local (\s -> s { museInLink = True }) (explicitLink <|> image <|> link)
linkContent :: PandocMonad m => MuseParser m (F Inlines)
-linkContent = char '[' >> trimInlinesF . mconcat <$> manyTill inline (string "]")
+linkContent = trimInlinesF . mconcat
+ <$ char '['
+ <*> manyTill inline (char ']')
+
+-- | Parse a link starting with @URL:@
+explicitLink :: PandocMonad m => MuseParser m (F Inlines)
+explicitLink = try $ do
+ string "[[URL:"
+ url <- manyTill anyChar $ char ']'
+ content <- option (pure $ B.str url) linkContent
+ char ']'
+ return $ B.link url "" <$> content
-linkText :: PandocMonad m => MuseParser m (String, Maybe (F Inlines))
-linkText = do
+image :: PandocMonad m => MuseParser m (F Inlines)
+image = try $ do
+ string "[["
+ (url, (ext, width, align)) <- manyUntil (noneOf "]") (imageExtensionAndOptions <* char ']')
+ content <- option mempty linkContent
+ char ']'
+ let widthAttr = case align of
+ Just 'f' -> [("width", fromMaybe "100" width ++ "%"), ("height", "75%")]
+ _ -> maybeToList (("width",) . (++ "%") <$> width)
+ let alignClass = case align of
+ Just 'r' -> ["align-right"]
+ Just 'l' -> ["align-left"]
+ Just 'f' -> []
+ _ -> []
+ return $ B.imageWith ("", alignClass, widthAttr) (url ++ ext) mempty <$> content
+ where -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el
+ imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"]
+ imageExtension = choice (try . string <$> imageExtensions)
+ imageExtensionAndOptions = do
+ ext <- imageExtension
+ (width, align) <- option (Nothing, Nothing) imageAttrs
+ return (ext, width, align)
+ imageAttrs = (,)
+ <$ many1 spaceChar
+ <*> optionMaybe (many1 digit)
+ <* many spaceChar
+ <*> optionMaybe (oneOf "rlf")
+
+link :: PandocMonad m => MuseParser m (F Inlines)
+link = try $ do
string "[["
url <- manyTill anyChar $ char ']'
content <- optionMaybe linkContent
char ']'
- return (url, content)
+ return $ B.link url "" <$> fromMaybe (return $ B.str url) content
diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs
index d3db3a9e2..9e8221248 100644
--- a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs
+++ b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs
@@ -100,7 +100,7 @@ liftA fun a = a >>^ fun
-- | Duplicate a value to subsequently feed it into different arrows.
-- Can almost always be replaced with '(&&&)', 'keepingTheValue',
-- or even '(|||)'.
--- Aequivalent to
+-- Equivalent to
-- > returnA &&& returnA
duplicate :: (Arrow a) => a b (b,b)
duplicate = arr $ join (,)
@@ -114,7 +114,7 @@ infixr 2 >>%
-- | Duplicate a value and apply an arrow to the second instance.
--- Aequivalent to
+-- Equivalent to
-- > \a -> duplicate >>> second a
-- or
-- > \a -> returnA &&& a
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
index 6d96897aa..e76bbf5cf 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
@@ -50,7 +50,7 @@ class (Eq nsID, Ord nsID) => NameSpaceID nsID where
getNamespaceID :: NameSpaceIRI
-> NameSpaceIRIs nsID
-> Maybe (NameSpaceIRIs nsID, nsID)
- -- | Given a namespace id, lookup its IRI. May be overriden for performance.
+ -- | Given a namespace id, lookup its IRI. May be overridden for performance.
getIRI :: nsID
-> NameSpaceIRIs nsID
-> Maybe NameSpaceIRI
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
index 616d9290b..45c6cd58c 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
@@ -61,7 +61,7 @@ import qualified Data.Foldable as F (Foldable, foldr)
import Data.Maybe
--- | Aequivalent to
+-- | Equivalent to
-- > foldr (.) id
-- where '(.)' are 'id' are the ones from "Control.Category"
-- and 'foldr' is the one from "Data.Foldable".
@@ -72,7 +72,7 @@ import Data.Maybe
composition :: (Category cat, F.Foldable f) => f (cat a a) -> cat a a
composition = F.foldr (<<<) Cat.id
--- | Aequivalent to
+-- | Equivalent to
-- > foldr (flip (.)) id
-- where '(.)' are 'id' are the ones from "Control.Category"
-- and 'foldr' is the one from "Data.Foldable".
@@ -133,9 +133,7 @@ class Lookupable a where
-- can be used directly in almost any case.
readLookupables :: (Lookupable a) => String -> [(a,String)]
readLookupables s = [ (a,rest) | (word,rest) <- lex s,
- let result = lookup word lookupTable,
- isJust result,
- let Just a = result
+ a <- maybeToList (lookup word lookupTable)
]
-- | Very similar to a simple 'lookup' in the 'lookupTable', but with a lexer.
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
index 81392e16b..2327ea908 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
@@ -261,7 +261,7 @@ convertingExtraState v a = withSubStateF setVAsExtraState modifyWithA
-- The resulting converter even behaves like an identity converter on the
-- value level.
--
--- Aequivalent to
+-- Equivalent to
--
-- > \v x a -> convertingExtraState v (returnV x >>> a)
--
diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
index e0444559b..6a1682829 100644
--- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
@@ -138,7 +138,7 @@ fontPitchReader = executeIn NsOffice "font-face-decls" (
lookupDefaultingAttr NsStyle "font-pitch"
))
>>?^ ( M.fromList . foldl accumLegalPitches [] )
- )
+ ) `ifFailedDo` (returnV (Right M.empty))
where accumLegalPitches ls (Nothing,_) = ls
accumLegalPitches ls (Just n,p) = (n,p):ls
@@ -342,7 +342,7 @@ instance Read XslUnit where
readsPrec _ _ = []
-- | Rough conversion of measures into millimetres.
--- Pixels and em's are actually implementation dependant/relative measures,
+-- Pixels and em's are actually implementation dependent/relative measures,
-- so I could not really easily calculate anything exact here even if I wanted.
-- But I do not care about exactness right now, as I only use measures
-- to determine if a paragraph is "indented" or not.
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index 888cd9307..1c52c3477 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -40,7 +40,7 @@ import Text.Pandoc.Readers.Org.Meta (metaExport, metaKey, metaLine)
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename,
- originalLang, translateLang)
+ originalLang, translateLang, exportsCode)
import Text.Pandoc.Builder (Blocks, Inlines)
import Text.Pandoc.Class (PandocMonad)
@@ -314,9 +314,6 @@ codeBlock blockAttrs blockType = do
labelledBlock :: F Inlines -> F Blocks
labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
- exportsCode :: [(String, String)] -> Bool
- exportsCode = maybe True (`elem` ["code", "both"]) . lookup "exports"
-
exportsResults :: [(String, String)] -> Bool
exportsResults = maybe False (`elem` ["results", "both"]) . lookup "exports"
@@ -743,7 +740,7 @@ latexEnd envName = try $
--
--- Footnote defintions
+-- Footnote definitions
--
noteBlock :: PandocMonad m => OrgParser m (F Blocks)
noteBlock = try $ do
diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
index c9465581a..7d55892fe 100644
--- a/src/Text/Pandoc/Readers/Org/DocumentTree.hs
+++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
{-
Copyright (C) 2014-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -17,8 +16,7 @@ along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE RecordWildCards #-}
-{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE NoImplicitPrelude #-}
{- |
Module : Text.Pandoc.Readers.Org.DocumentTree
Copyright : Copyright (C) 2014-2018 Albert Krewinkel
@@ -45,7 +43,7 @@ import Text.Pandoc.Readers.Org.BlockStarts
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
-import qualified Data.Map as Map
+import qualified Data.Set as Set
import qualified Text.Pandoc.Builder as B
--
@@ -60,7 +58,7 @@ documentTree :: PandocMonad m
documentTree blocks inline = do
initialBlocks <- blocks
headlines <- sequence <$> manyTill (headline blocks inline 1) eof
- title <- fmap (getTitle . unMeta) . orgStateMeta <$> getState
+ title <- fmap docTitle . orgStateMeta <$> getState
return $ do
headlines' <- headlines
initialBlocks' <- initialBlocks
@@ -70,19 +68,11 @@ documentTree blocks inline = do
, headlineTodoMarker = Nothing
, headlineText = B.fromList title'
, headlineTags = mempty
+ , headlinePlanning = emptyPlanning
, headlineProperties = mempty
, headlineContents = initialBlocks'
, headlineChildren = headlines'
}
- where
- getTitle :: Map.Map String MetaValue -> [Inline]
- getTitle metamap =
- case Map.lookup "title" metamap of
- Just (MetaInlines inlns) -> inlns
- _ -> []
-
-newtype Tag = Tag { fromTag :: String }
- deriving (Show, Eq)
-- | Create a tag containing the given string.
toTag :: String -> Tag
@@ -117,6 +107,7 @@ data Headline = Headline
, headlineTodoMarker :: Maybe TodoMarker
, headlineText :: Inlines
, headlineTags :: [Tag]
+ , headlinePlanning :: PlanningInfo -- ^ subtree planning information
, headlineProperties :: Properties
, headlineContents :: Blocks
, headlineChildren :: [Headline]
@@ -136,6 +127,7 @@ headline blocks inline lvl = try $ do
title <- trimInlinesF . mconcat <$> manyTill inline endOfTitle
tags <- option [] headerTags
newline
+ planning <- option emptyPlanning planningInfo
properties <- option mempty propertiesDrawer
contents <- blocks
children <- many (headline blocks inline (level + 1))
@@ -148,6 +140,7 @@ headline blocks inline lvl = try $ do
, headlineTodoMarker = todoKw
, headlineText = title'
, headlineTags = tags
+ , headlinePlanning = planning
, headlineProperties = properties
, headlineContents = contents'
, headlineChildren = children'
@@ -158,22 +151,27 @@ headline blocks inline lvl = try $ do
headerTags :: Monad m => OrgParser m [Tag]
headerTags = try $
- let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':'
+ let tag = orgTagWord <* char ':'
in map toTag <$> (skipSpaces *> char ':' *> many1 tag <* skipSpaces)
-- | Convert an Org mode headline (i.e. a document tree) into pandoc's Blocks
headlineToBlocks :: Monad m => Headline -> OrgParser m Blocks
-headlineToBlocks hdln@Headline {..} = do
- maxHeadlineLevels <- getExportSetting exportHeadlineLevels
+headlineToBlocks hdln = do
+ maxLevel <- getExportSetting exportHeadlineLevels
+ let tags = headlineTags hdln
+ let text = headlineText hdln
+ let level = headlineLevel hdln
+ shouldNotExport <- hasDoNotExportTag tags
case () of
- _ | any isNoExportTag headlineTags -> return mempty
- _ | any isArchiveTag headlineTags -> archivedHeadlineToBlocks hdln
- _ | isCommentTitle headlineText -> return mempty
- _ | headlineLevel >= maxHeadlineLevels -> headlineToHeaderWithList hdln
- _ | otherwise -> headlineToHeaderWithContents hdln
+ _ | shouldNotExport -> return mempty
+ _ | any isArchiveTag tags -> archivedHeadlineToBlocks hdln
+ _ | isCommentTitle text -> return mempty
+ _ | maxLevel <= level -> headlineToHeaderWithList hdln
+ _ | otherwise -> headlineToHeaderWithContents hdln
-isNoExportTag :: Tag -> Bool
-isNoExportTag = (== toTag "noexport")
+hasDoNotExportTag :: Monad m => [Tag] -> OrgParser m Bool
+hasDoNotExportTag tags = containsExcludedTag . orgStateExcludedTags <$> getState
+ where containsExcludedTag s = any (`Set.member` s) tags
isArchiveTag :: Tag -> Bool
isArchiveTag = (== toTag "ARCHIVE")
@@ -182,8 +180,9 @@ isArchiveTag = (== toTag "ARCHIVE")
-- FIXME: This accesses builder internals not intended for use in situations
-- like these. Replace once keyword parsing is supported.
isCommentTitle :: Inlines -> Bool
-isCommentTitle (B.toList -> (Str "COMMENT":_)) = True
-isCommentTitle _ = False
+isCommentTitle inlns = case B.toList inlns of
+ (Str "COMMENT":_) -> True
+ _ -> False
archivedHeadlineToBlocks :: Monad m => Headline -> OrgParser m Blocks
archivedHeadlineToBlocks hdln = do
@@ -194,17 +193,23 @@ archivedHeadlineToBlocks hdln = do
ArchivedTreesHeadlineOnly -> headlineToHeader hdln
headlineToHeaderWithList :: Monad m => Headline -> OrgParser m Blocks
-headlineToHeaderWithList hdln@Headline {..} = do
+headlineToHeaderWithList hdln = do
maxHeadlineLevels <- getExportSetting exportHeadlineLevels
header <- headlineToHeader hdln
- listElements <- mapM headlineToBlocks headlineChildren
+ listElements <- mapM headlineToBlocks (headlineChildren hdln)
+ planningBlock <- planningToBlock (headlinePlanning hdln)
let listBlock = if null listElements
then mempty
else B.orderedList listElements
- let headerText = if maxHeadlineLevels == headlineLevel
+ let headerText = if maxHeadlineLevels == headlineLevel hdln
then header
else flattenHeader header
- return $ headerText <> headlineContents <> listBlock
+ return . mconcat $
+ [ headerText
+ , headlineContents hdln
+ , planningBlock
+ , listBlock
+ ]
where
flattenHeader :: Blocks -> Blocks
flattenHeader blks =
@@ -213,27 +218,28 @@ headlineToHeaderWithList hdln@Headline {..} = do
_ -> mempty
headlineToHeaderWithContents :: Monad m => Headline -> OrgParser m Blocks
-headlineToHeaderWithContents hdln@Headline {..} = do
+headlineToHeaderWithContents hdln = do
header <- headlineToHeader hdln
- childrenBlocks <- mconcat <$> mapM headlineToBlocks headlineChildren
- return $ header <> headlineContents <> childrenBlocks
+ planningBlock <- planningToBlock (headlinePlanning hdln)
+ childrenBlocks <- mconcat <$> mapM headlineToBlocks (headlineChildren hdln)
+ return $ header <> planningBlock <> headlineContents hdln <> childrenBlocks
headlineToHeader :: Monad m => Headline -> OrgParser m Blocks
-headlineToHeader Headline {..} = do
+headlineToHeader hdln = do
exportTodoKeyword <- getExportSetting exportWithTodoKeywords
exportTags <- getExportSetting exportWithTags
let todoText = if exportTodoKeyword
- then case headlineTodoMarker of
+ then case headlineTodoMarker hdln of
Just kw -> todoKeywordToInlines kw <> B.space
Nothing -> mempty
else mempty
- let text = todoText <> headlineText <>
+ let text = todoText <> headlineText hdln <>
if exportTags
- then tagsToInlines headlineTags
+ then tagsToInlines (headlineTags hdln)
else mempty
- let propAttr = propertiesToAttr headlineProperties
- attr <- registerHeader propAttr headlineText
- return $ B.headerWith attr headlineLevel text
+ let propAttr = propertiesToAttr (headlineProperties hdln)
+ attr <- registerHeader propAttr (headlineText hdln)
+ return $ B.headerWith attr (headlineLevel hdln) text
todoKeyword :: Monad m => OrgParser m TodoMarker
todoKeyword = try $ do
@@ -277,9 +283,60 @@ tagsToInlines tags =
tagSpan :: Tag -> Inlines -> Inlines
tagSpan t = B.spanWith ("", ["tag"], [("tag-name", fromTag t)])
+-- | Render planning info as a block iff the respective export setting is
+-- enabled.
+planningToBlock :: Monad m => PlanningInfo -> OrgParser m Blocks
+planningToBlock planning = do
+ includePlanning <- getExportSetting exportWithPlanning
+ return $
+ if includePlanning
+ then B.plain . mconcat . intersperse B.space . filter (/= mempty) $
+ [ datumInlines planningClosed "CLOSED"
+ , datumInlines planningDeadline "DEADLINE"
+ , datumInlines planningScheduled "SCHEDULED"
+ ]
+ else mempty
+ where
+ datumInlines field name =
+ case field planning of
+ Nothing -> mempty
+ Just time -> B.strong (B.str name <> B.str ":")
+ <> B.space
+ <> B.emph (B.str time)
+
+-- | An Org timestamp, including repetition marks. TODO: improve
+type Timestamp = String
+
+timestamp :: Monad m => OrgParser m Timestamp
+timestamp = try $ do
+ openChar <- oneOf "<["
+ let isActive = openChar == '<'
+ let closeChar = if isActive then '>' else ']'
+ content <- many1Till anyChar (char closeChar)
+ return (openChar : content ++ [closeChar])
+
+-- | Planning information for a subtree/headline.
+data PlanningInfo = PlanningInfo
+ { planningClosed :: Maybe Timestamp
+ , planningDeadline :: Maybe Timestamp
+ , planningScheduled :: Maybe Timestamp
+ }
+emptyPlanning :: PlanningInfo
+emptyPlanning = PlanningInfo Nothing Nothing Nothing
-
+-- | Read a single planning-related and timestamped line.
+planningInfo :: Monad m => OrgParser m PlanningInfo
+planningInfo = try $ do
+ updaters <- many1 planningDatum <* skipSpaces <* newline
+ return $ foldr ($) emptyPlanning updaters
+ where
+ planningDatum = skipSpaces *> choice
+ [ updateWith (\s p -> p { planningScheduled = Just s}) "SCHEDULED"
+ , updateWith (\d p -> p { planningDeadline = Just d}) "DEADLINE"
+ , updateWith (\c p -> p { planningClosed = Just c}) "CLOSED"
+ ]
+ updateWith fn cs = fn <$> (string cs *> char ':' *> skipSpaces *> timestamp)
-- | Read a :PROPERTIES: drawer and return the key/value pairs contained
-- within.
diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
index d02eb37c5..f79ee0d64 100644
--- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs
+++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
@@ -69,7 +69,7 @@ exportSetting = choice
, integerSetting "H" (\val es -> es { exportHeadlineLevels = val })
, ignoredSetting "inline"
, ignoredSetting "num"
- , ignoredSetting "p"
+ , booleanSetting "p" (\val es -> es { exportWithPlanning = val })
, ignoredSetting "pri"
, ignoredSetting "prop"
, ignoredSetting "stat"
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index 7d1568b80..a5335ca57 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -39,7 +39,7 @@ import Text.Pandoc.Readers.Org.BlockStarts (endOfBlock, noteMarker)
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename,
- originalLang, translateLang)
+ originalLang, translateLang, exportsCode)
import Text.Pandoc.Builder (Inlines)
import qualified Text.Pandoc.Builder as B
@@ -510,7 +510,7 @@ anchor = try $ do
<* string ">>"
<* skipSpaces
--- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'. This mirrors
+-- | Replace every char but [a-zA-Z0-9_.-:] with a hyphen '-'. This mirrors
-- the org function @org-export-solidify-link-text@.
solidify :: String -> String
@@ -525,11 +525,13 @@ inlineCodeBlock :: PandocMonad m => OrgParser m (F Inlines)
inlineCodeBlock = try $ do
string "src_"
lang <- many1 orgArgWordChar
- opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption
+ opts <- option [] $ try (enclosedByPair '[' ']' inlineBlockOption)
+ <|> (mempty <$ string "[]")
inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r")
let attrClasses = [translateLang lang]
let attrKeyVal = originalLang lang <> opts
- returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
+ let codeInlineBlck = B.codeWith ("", attrClasses, attrKeyVal) inlineCode
+ returnF $ (if exportsCode opts then codeInlineBlck else mempty)
where
inlineBlockOption :: PandocMonad m => OrgParser m (String, String)
inlineBlockOption = try $ do
diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs
index 965e33d94..cad1d7123 100644
--- a/src/Text/Pandoc/Readers/Org/Meta.hs
+++ b/src/Text/Pandoc/Readers/Org/Meta.hs
@@ -52,6 +52,7 @@ import Data.Char (toLower)
import Data.List (intersperse)
import Data.Maybe (fromMaybe)
import qualified Data.Map as M
+import qualified Data.Set as Set
import Network.HTTP (urlEncode)
-- | Returns the current meta, respecting export options.
@@ -158,6 +159,7 @@ optionLine = try $ do
"seq_todo" -> todoSequence >>= updateState . registerTodoSequence
"typ_todo" -> todoSequence >>= updateState . registerTodoSequence
"macro" -> macroDefinition >>= updateState . registerMacro
+ "exclude_tags" -> excludedTagList >>= updateState . setExcludedTags
"pandoc-emphasis-pre" -> emphChars >>= updateState . setEmphasisPreChar
"pandoc-emphasis-post" -> emphChars >>= updateState . setEmphasisPostChar
_ -> mzero
@@ -190,6 +192,18 @@ parseFormat = try $ replacePlain <|> replaceUrl <|> justAppend
rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r")
tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:""))
+excludedTagList :: Monad m => OrgParser m [Tag]
+excludedTagList = do
+ skipSpaces
+ map Tag <$> many (orgTagWord <* skipSpaces) <* newline
+
+setExcludedTags :: [Tag] -> OrgParserState -> OrgParserState
+setExcludedTags tagList st =
+ let finalSet = if orgStateExcludedTagsChanged st
+ then foldr Set.insert (orgStateExcludedTags st) tagList
+ else Set.fromList tagList
+ in st { orgStateExcludedTags = finalSet, orgStateExcludedTagsChanged = True}
+
setEmphasisPreChar :: Maybe [Char] -> OrgParserState -> OrgParserState
setEmphasisPreChar csMb st =
let preChars = fromMaybe (orgStateEmphasisPostChars defaultOrgParserState) csMb
diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs
index 4cb5bb626..59478256f 100644
--- a/src/Text/Pandoc/Readers/Org/ParserState.hs
+++ b/src/Text/Pandoc/Readers/Org/ParserState.hs
@@ -33,6 +33,7 @@ module Text.Pandoc.Readers.Org.ParserState
, defaultOrgParserState
, OrgParserLocal (..)
, OrgNoteRecord
+ , Tag(..)
, HasReaderOptions (..)
, HasQuoteContext (..)
, HasMacros (..)
@@ -88,6 +89,9 @@ type OrgNoteTable = [OrgNoteRecord]
type OrgLinkFormatters = M.Map String (String -> String)
-- | Macro expander function
type MacroExpander = [String] -> String
+-- | Tag
+newtype Tag = Tag { fromTag :: String }
+ deriving (Show, Eq, Ord)
-- | The states in which a todo item can be
data TodoState = Todo | Done
@@ -113,6 +117,8 @@ data OrgParserState = OrgParserState
-- specified here.
, orgStateEmphasisPostChars :: [Char] -- ^ Chars allowed at after emphasis
, orgStateEmphasisNewlines :: Maybe Int
+ , orgStateExcludedTags :: Set.Set Tag
+ , orgStateExcludedTagsChanged :: Bool
, orgStateExportSettings :: ExportSettings
, orgStateHeaderMap :: M.Map Inlines String
, orgStateIdentifiers :: Set.Set String
@@ -183,6 +189,8 @@ defaultOrgParserState = OrgParserState
, orgStateEmphasisCharStack = []
, orgStateEmphasisNewlines = Nothing
, orgStateExportSettings = def
+ , orgStateExcludedTags = Set.singleton $ Tag "noexport"
+ , orgStateExcludedTagsChanged = False
, orgStateHeaderMap = M.empty
, orgStateIdentifiers = Set.empty
, orgStateIncludeFiles = []
@@ -260,6 +268,7 @@ data ExportSettings = ExportSettings
, exportWithAuthor :: Bool -- ^ Include author in final meta-data
, exportWithCreator :: Bool -- ^ Include creator in final meta-data
, exportWithEmail :: Bool -- ^ Include email in final meta-data
+ , exportWithPlanning :: Bool -- ^ Keep planning info after headlines
, exportWithTags :: Bool -- ^ Keep tags as part of headlines
, exportWithTodoKeywords :: Bool -- ^ Keep TODO keywords in headers
}
@@ -280,6 +289,7 @@ defaultExportSettings = ExportSettings
, exportWithAuthor = True
, exportWithCreator = True
, exportWithEmail = True
+ , exportWithPlanning = False
, exportWithTags = True
, exportWithTodoKeywords = True
}
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
index e014de65e..52a346e36 100644
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -46,6 +46,8 @@ module Text.Pandoc.Readers.Org.Parsing
, orgArgKey
, orgArgWord
, orgArgWordChar
+ , orgTagWord
+ , orgTagWordChar
-- * Re-exports from Text.Pandoc.Parser
, ParserContext (..)
, many1Till
@@ -137,14 +139,13 @@ anyLine =
<* updateLastPreCharPos
<* updateLastForbiddenCharPos
--- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
--- of the state saved and restored.
+-- | Like @'Text.Pandoc.Parsing'@, but resets the position of the last character
+-- allowed before emphasised text.
parseFromString :: Monad m => OrgParser m a -> String -> OrgParser m a
parseFromString parser str' = do
- oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
result <- P.parseFromString parser str'
- updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
+ updateState $ \s -> s { orgStateLastPreCharPos = Nothing }
return result
-- | Skip one or more tab or space characters.
@@ -221,3 +222,9 @@ orgArgWord = many1 orgArgWordChar
-- | Chars treated as part of a word in plists.
orgArgWordChar :: Monad m => OrgParser m Char
orgArgWordChar = alphaNum <|> oneOf "-_"
+
+orgTagWord :: Monad m => OrgParser m String
+orgTagWord = many1 orgTagWordChar
+
+orgTagWordChar :: Monad m => OrgParser m Char
+orgTagWordChar = alphaNum <|> oneOf "@%#_"
diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs
index 17fe34738..71d1dd517 100644
--- a/src/Text/Pandoc/Readers/Org/Shared.hs
+++ b/src/Text/Pandoc/Readers/Org/Shared.hs
@@ -32,6 +32,7 @@ module Text.Pandoc.Readers.Org.Shared
, isImageFilename
, originalLang
, translateLang
+ , exportsCode
) where
import Prelude
@@ -96,3 +97,6 @@ translateLang cs =
"sh" -> "bash"
"sqlite" -> "sql"
_ -> cs
+
+exportsCode :: [(String, String)] -> Bool
+exportsCode = maybe True (`elem` ["code", "both"]) . lookup "exports"
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 71a38cf82..28fa7b83e 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -37,7 +37,7 @@ import Control.Arrow (second)
import Control.Monad (forM_, guard, liftM, mplus, mzero, when)
import Control.Monad.Except (throwError)
import Control.Monad.Identity (Identity (..))
-import Data.Char (isHexDigit, isSpace, toLower, toUpper)
+import Data.Char (isHexDigit, isSpace, toLower, toUpper, isAlphaNum)
import Data.List (deleteFirstsBy, elemIndex, intercalate, isInfixOf, isSuffixOf,
nub, sort, transpose, union)
import qualified Data.Map as M
@@ -172,6 +172,7 @@ parseRST = do
docMinusKeys <- concat <$>
manyTill (referenceKey <|> anchorDef <|>
noteBlock <|> citationBlock <|>
+ (snd <$> withRaw comment) <|>
headerBlock <|> lineClump) eof
setInput docMinusKeys
setPosition startPos
@@ -1089,7 +1090,7 @@ referenceKey = do
targetURI :: Monad m => ParserT [Char] st m [Char]
targetURI = do
skipSpaces
- optional newline
+ optional $ try $ newline >> notFollowedBy blankline
contents <- trim <$>
many1 (satisfy (/='\n')
<|> try (newline >> many1 spaceChar >> noneOf " \t\n"))
@@ -1313,19 +1314,24 @@ table = gridTable False <|> simpleTable False <|>
inline :: PandocMonad m => RSTParser m Inlines
inline = choice [ note -- can start with whitespace, so try before ws
- , whitespace
, link
- , str
- , endline
, strong
, emph
, code
, subst
, interpretedRole
- , smart
- , hyphens
- , escapedChar
- , symbol ] <?> "inline"
+ , inlineContent ] <?> "inline"
+
+-- strings, spaces and other characters that can appear either by
+-- themselves or within inline markup
+inlineContent :: PandocMonad m => RSTParser m Inlines
+inlineContent = choice [ whitespace
+ , str
+ , endline
+ , smart
+ , hyphens
+ , escapedChar
+ , symbol ] <?> "inline content"
parseInlineFromString :: PandocMonad m => String -> RSTParser m Inlines
parseInlineFromString = parseFromString' (trimInlines . mconcat <$> many inline)
@@ -1368,11 +1374,11 @@ atStart p = do
emph :: PandocMonad m => RSTParser m Inlines
emph = B.emph . trimInlines . mconcat <$>
- enclosed (atStart $ char '*') (char '*') inline
+ enclosed (atStart $ char '*') (char '*') inlineContent
strong :: PandocMonad m => RSTParser m Inlines
strong = B.strong . trimInlines . mconcat <$>
- enclosed (atStart $ string "**") (try $ string "**") inline
+ enclosed (atStart $ string "**") (try $ string "**") inlineContent
-- Note, this doesn't precisely implement the complex rule in
-- http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
@@ -1380,7 +1386,6 @@ strong = B.strong . trimInlines . mconcat <$>
--
-- TODO:
-- - Classes are silently discarded in addNewRole
--- - Lacks sensible implementation for title-reference (which is the default)
-- - Allows direct use of the :raw: role, rST only allows inherited use.
interpretedRole :: PandocMonad m => RSTParser m Inlines
interpretedRole = try $ do
@@ -1390,12 +1395,12 @@ interpretedRole = try $ do
renderRole :: PandocMonad m
=> String -> Maybe String -> String -> Attr -> RSTParser m Inlines
renderRole contents fmt role attr = case role of
- "sup" -> return $ B.superscript $ B.str contents
- "superscript" -> return $ B.superscript $ B.str contents
- "sub" -> return $ B.subscript $ B.str contents
- "subscript" -> return $ B.subscript $ B.str contents
- "emphasis" -> return $ B.emph $ B.str contents
- "strong" -> return $ B.strong $ B.str contents
+ "sup" -> return $ B.superscript $ treatAsText contents
+ "superscript" -> return $ B.superscript $ treatAsText contents
+ "sub" -> return $ B.subscript $ treatAsText contents
+ "subscript" -> return $ B.subscript $ treatAsText contents
+ "emphasis" -> return $ B.emph $ treatAsText contents
+ "strong" -> return $ B.strong $ treatAsText contents
"rfc-reference" -> return $ rfcLink contents
"RFC" -> return $ rfcLink contents
"pep-reference" -> return $ pepLink contents
@@ -1406,7 +1411,7 @@ renderRole contents fmt role attr = case role of
"title" -> titleRef contents
"t" -> titleRef contents
"code" -> return $ B.codeWith (addClass "sourceCode" attr) contents
- "span" -> return $ B.spanWith attr $ B.str contents
+ "span" -> return $ B.spanWith attr $ treatAsText contents
"raw" -> return $ B.rawInline (fromMaybe "" fmt) contents
custom -> do
customRoles <- stateRstCustomRoles <$> getState
@@ -1414,14 +1419,20 @@ renderRole contents fmt role attr = case role of
Just (newRole, newFmt, newAttr) ->
renderRole contents newFmt newRole newAttr
Nothing -> -- undefined role
- return $ B.spanWith ("",[],[("role",role)]) (B.str contents)
+ return $ B.codeWith ("",["interpreted-text"],[("role",role)])
+ contents
where
- titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour
+ titleRef ref = return $ B.spanWith ("",["title-ref"],[]) $ treatAsText ref
rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo)
where rfcUrl = "http://www.faqs.org/rfcs/rfc" ++ rfcNo ++ ".html"
pepLink pepNo = B.link pepUrl ("PEP " ++ pepNo) $ B.str ("PEP " ++ pepNo)
where padNo = replicate (4 - length pepNo) '0' ++ pepNo
pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/"
+ treatAsText = B.text . handleEscapes
+ handleEscapes [] = []
+ handleEscapes ('\\':' ':cs) = handleEscapes cs
+ handleEscapes ('\\':c:cs) = c : handleEscapes cs
+ handleEscapes (c:cs) = c : handleEscapes cs
addClass :: String -> Attr -> Attr
addClass c (ident, classes, keyValues) = (ident, classes `union` [c], keyValues)
@@ -1445,7 +1456,18 @@ roleAfter = try $ do
return (role,contents)
unmarkedInterpretedText :: PandocMonad m => RSTParser m [Char]
-unmarkedInterpretedText = enclosed (atStart $ char '`') (char '`') anyChar
+unmarkedInterpretedText = try $ do
+ atStart (char '`')
+ contents <- mconcat <$> (many1
+ ( many1 (noneOf "`\\\n")
+ <|> (char '\\' >> ((\c -> ['\\',c]) <$> noneOf "\n"))
+ <|> (string "\n" <* notFollowedBy blankline)
+ <|> try (string "`" <*
+ notFollowedBy (() <$ roleMarker) <*
+ lookAhead (satisfy isAlphaNum))
+ ))
+ char '`'
+ return contents
whitespace :: PandocMonad m => RSTParser m Inlines
whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace"
@@ -1480,7 +1502,7 @@ explicitLink = try $ do
char '`'
notFollowedBy (char '`') -- `` marks start of inline code
label' <- trimInlines . mconcat <$>
- manyTill (notFollowedBy (char '`') >> inline) (char '<')
+ manyTill (notFollowedBy (char '`') >> inlineContent) (char '<')
src <- trim <$> manyTill (noneOf ">\n") (char '>')
skipSpaces
string "`_"
diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs
index 1f230ae7e..c3cfedcfb 100644
--- a/src/Text/Pandoc/Readers/TWiki.hs
+++ b/src/Text/Pandoc/Readers/TWiki.hs
@@ -74,9 +74,6 @@ type TWParser = ParserT [Char] ParserState
tryMsg :: String -> TWParser m a -> TWParser m a
tryMsg msg p = try p <?> msg
-skip :: TWParser m a -> TWParser m ()
-skip parser = parser >> return ()
-
nested :: PandocMonad m => TWParser m a -> TWParser m a
nested p = do
nestlevel <- stateMaxNestingLevel <$> getState
@@ -92,7 +89,7 @@ htmlElement tag = tryMsg tag $ do
content <- manyTill anyChar (endtag <|> endofinput)
return (htmlAttrToPandoc attr, trim content)
where
- endtag = skip $ htmlTag (~== TagClose tag)
+ endtag = void $ htmlTag (~== TagClose tag)
endofinput = lookAhead $ try $ skipMany blankline >> skipSpaces >> eof
trim = dropWhile (=='\n') . reverse . dropWhile (=='\n') . reverse
@@ -114,18 +111,15 @@ parseHtmlContentWithAttrs tag parser = do
endOfContent = try $ skipMany blankline >> skipSpaces >> eof
parseHtmlContent :: PandocMonad m => String -> TWParser m a -> TWParser m [a]
-parseHtmlContent tag p = parseHtmlContentWithAttrs tag p >>= return . snd
+parseHtmlContent tag p = snd <$> parseHtmlContentWithAttrs tag p
--
-- main parser
--
parseTWiki :: PandocMonad m => TWParser m Pandoc
-parseTWiki = do
- bs <- mconcat <$> many block
- spaces
- eof
- return $ B.doc bs
+parseTWiki =
+ B.doc . mconcat <$> many block <* spaces <* eof
--
@@ -158,7 +152,7 @@ separator = tryMsg "separator" $ string "---" >> newline >> return B.horizontalR
header :: PandocMonad m => TWParser m B.Blocks
header = tryMsg "header" $ do
string "---"
- level <- many1 (char '+') >>= return . length
+ level <- length <$> many1 (char '+')
guard $ level <= 6
classes <- option [] $ string "!!" >> return ["unnumbered"]
skipSpaces
@@ -167,11 +161,10 @@ header = tryMsg "header" $ do
return $ B.headerWith attr level content
verbatim :: PandocMonad m => TWParser m B.Blocks
-verbatim = (htmlElement "verbatim" <|> htmlElement "pre")
- >>= return . (uncurry B.codeBlockWith)
+verbatim = uncurry B.codeBlockWith <$> (htmlElement "verbatim" <|> htmlElement "pre")
literal :: PandocMonad m => TWParser m B.Blocks
-literal = htmlElement "literal" >>= return . rawBlock
+literal = rawBlock <$> htmlElement "literal"
where
format (_, _, kvs) = fromMaybe "html" $ lookup "format" kvs
rawBlock (attrs, content) = B.rawBlock (format attrs) content
@@ -183,7 +176,7 @@ list prefix = choice [ bulletList prefix
definitionList :: PandocMonad m => String -> TWParser m B.Blocks
definitionList prefix = tryMsg "definitionList" $ do
- indent <- lookAhead $ string prefix *> (many1 $ string " ") <* string "$ "
+ indent <- lookAhead $ string prefix *> many1 (string " ") <* string "$ "
elements <- many $ parseDefinitionListItem (prefix ++ concat indent)
return $ B.definitionList elements
where
@@ -193,7 +186,7 @@ definitionList prefix = tryMsg "definitionList" $ do
string (indent ++ "$ ") >> skipSpaces
term <- many1Till inline $ string ": "
line <- listItemLine indent $ string "$ "
- return $ (mconcat term, [line])
+ return (mconcat term, [line])
bulletList :: PandocMonad m => String -> TWParser m B.Blocks
bulletList prefix = tryMsg "bulletList" $
@@ -227,25 +220,24 @@ parseListItem prefix marker = string prefix >> marker >> listItemLine prefix mar
listItemLine :: (PandocMonad m, Show a)
=> String -> TWParser m a -> TWParser m B.Blocks
-listItemLine prefix marker = lineContent >>= parseContent >>= return . mconcat
+listItemLine prefix marker = mconcat <$> (lineContent >>= parseContent)
where
lineContent = do
content <- anyLine
continuation <- optionMaybe listContinuation
- return $ filterSpaces content ++ "\n" ++ (maybe "" (" " ++) continuation)
+ return $ filterSpaces content ++ "\n" ++ maybe "" (" " ++) continuation
filterSpaces = reverse . dropWhile (== ' ') . reverse
listContinuation = notFollowedBy (string prefix >> marker) >>
string " " >> lineContent
parseContent = parseFromString' $ many1 $ nestedList <|> parseInline
- parseInline = many1Till inline (lastNewline <|> newlineBeforeNestedList) >>=
- return . B.plain . mconcat
+ parseInline = (B.plain . mconcat) <$> many1Till inline (lastNewline <|> newlineBeforeNestedList)
nestedList = list prefix
lastNewline = try $ char '\n' <* eof
newlineBeforeNestedList = try $ char '\n' <* lookAhead nestedList
table :: PandocMonad m => TWParser m B.Blocks
table = try $ do
- tableHead <- optionMaybe $ many1Till tableParseHeader newline >>= return . unzip
+ tableHead <- optionMaybe (unzip <$> many1Till tableParseHeader newline)
rows <- many1 tableParseRow
return $ buildTable mempty rows $ fromMaybe (align rows, columns rows) tableHead
where
@@ -258,11 +250,11 @@ table = try $ do
tableParseHeader :: PandocMonad m => TWParser m ((Alignment, Double), B.Blocks)
tableParseHeader = try $ do
char '|'
- leftSpaces <- many spaceChar >>= return . length
+ leftSpaces <- length <$> many spaceChar
char '*'
content <- tableColumnContent (char '*' >> skipSpaces >> char '|')
char '*'
- rightSpaces <- many spaceChar >>= return . length
+ rightSpaces <- length <$> many spaceChar
optional tableEndOfRow
return (tableAlign leftSpaces rightSpaces, content)
where
@@ -283,13 +275,13 @@ tableEndOfRow :: PandocMonad m => TWParser m Char
tableEndOfRow = lookAhead (try $ char '|' >> char '\n') >> char '|'
tableColumnContent :: PandocMonad m => TWParser m a -> TWParser m B.Blocks
-tableColumnContent end = manyTill content (lookAhead $ try end) >>= return . B.plain . mconcat
+tableColumnContent end = (B.plain . mconcat) <$> manyTill content (lookAhead $ try end)
where
content = continuation <|> inline
continuation = try $ char '\\' >> newline >> return mempty
blockQuote :: PandocMonad m => TWParser m B.Blocks
-blockQuote = parseHtmlContent "blockquote" block >>= return . B.blockQuote . mconcat
+blockQuote = (B.blockQuote . mconcat) <$> parseHtmlContent "blockquote" block
noautolink :: PandocMonad m => TWParser m B.Blocks
noautolink = do
@@ -300,15 +292,15 @@ noautolink = do
setState $ st{ stateAllowLinks = True }
return $ mconcat blocks
where
- parseContent = parseFromString' $ many $ block
+ parseContent = parseFromString' $ many block
para :: PandocMonad m => TWParser m B.Blocks
-para = many1Till inline endOfParaElement >>= return . result . mconcat
+para = (result . mconcat) <$> many1Till inline endOfParaElement
where
endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement
endOfInput = try $ skipMany blankline >> skipSpaces >> eof
endOfPara = try $ blankline >> skipMany1 blankline
- newBlockElement = try $ blankline >> skip blockElements
+ newBlockElement = try $ blankline >> void blockElements
result content = if F.all (==Space) content
then mempty
else B.para $ B.trimInlines content
@@ -340,7 +332,7 @@ inline = choice [ whitespace
] <?> "inline"
whitespace :: PandocMonad m => TWParser m B.Inlines
-whitespace = (lb <|> regsp) >>= return
+whitespace = lb <|> regsp
where lb = try $ skipMany spaceChar >> linebreak >> return B.space
regsp = try $ skipMany1 spaceChar >> return B.space
@@ -362,13 +354,13 @@ enclosed :: (Monoid b, PandocMonad m, Show a)
=> TWParser m a -> (TWParser m a -> TWParser m b) -> TWParser m b
enclosed sep p = between sep (try $ sep <* endMarker) p
where
- endMarker = lookAhead $ skip endSpace <|> skip (oneOf ".,!?:)|") <|> eof
+ endMarker = lookAhead $ void endSpace <|> void (oneOf ".,!?:)|") <|> eof
endSpace = (spaceChar <|> newline) >> return B.space
macro :: PandocMonad m => TWParser m B.Inlines
macro = macroWithParameters <|> withoutParameters
where
- withoutParameters = enclosed (char '%') (\_ -> macroName) >>= return . emptySpan
+ withoutParameters = emptySpan <$> enclosed (char '%') (const macroName)
emptySpan name = buildSpan name [] mempty
macroWithParameters :: PandocMonad m => TWParser m B.Inlines
@@ -393,13 +385,13 @@ macroName = do
return (first:rest)
attributes :: PandocMonad m => TWParser m (String, [(String, String)])
-attributes = char '{' *> spnl *> many (attribute <* spnl) <* char '}' >>=
- return . foldr (either mkContent mkKvs) ([], [])
+attributes = foldr (either mkContent mkKvs) ([], [])
+ <$> (char '{' *> spnl *> many (attribute <* spnl) <* char '}')
where
spnl = skipMany (spaceChar <|> newline)
mkContent c ([], kvs) = (c, kvs)
mkContent c (rest, kvs) = (c ++ " " ++ rest, kvs)
- mkKvs kv (cont, rest) = (cont, (kv : rest))
+ mkKvs kv (cont, rest) = (cont, kv : rest)
attribute :: PandocMonad m => TWParser m (Either String (String, String))
attribute = withKey <|> withoutKey
@@ -407,52 +399,50 @@ attribute = withKey <|> withoutKey
withKey = try $ do
key <- macroName
char '='
- parseValue False >>= return . (curry Right key)
- withoutKey = try $ parseValue True >>= return . Left
- parseValue allowSpaces = (withQuotes <|> withoutQuotes allowSpaces) >>= return . fromEntities
+ curry Right key <$> parseValue False
+ withoutKey = try $ Left <$> parseValue True
+ parseValue allowSpaces = fromEntities <$> (withQuotes <|> withoutQuotes allowSpaces)
withQuotes = between (char '"') (char '"') (\_ -> count 1 $ noneOf ['"'])
withoutQuotes allowSpaces
- | allowSpaces == True = many1 $ noneOf "}"
- | otherwise = many1 $ noneOf " }"
+ | allowSpaces = many1 $ noneOf "}"
+ | otherwise = many1 $ noneOf " }"
nestedInlines :: (Show a, PandocMonad m)
=> TWParser m a -> TWParser m B.Inlines
nestedInlines end = innerSpace <|> nestedInline
where
- innerSpace = try $ whitespace <* (notFollowedBy end)
+ innerSpace = try $ whitespace <* notFollowedBy end
nestedInline = notFollowedBy whitespace >> nested inline
strong :: PandocMonad m => TWParser m B.Inlines
-strong = try $ enclosed (char '*') nestedInlines >>= return . B.strong
+strong = try $ B.strong <$> enclosed (char '*') nestedInlines
strongHtml :: PandocMonad m => TWParser m B.Inlines
-strongHtml = (parseHtmlContent "strong" inline <|> parseHtmlContent "b" inline)
- >>= return . B.strong . mconcat
+strongHtml = B.strong . mconcat <$> (parseHtmlContent "strong" inline <|> parseHtmlContent "b" inline)
strongAndEmph :: PandocMonad m => TWParser m B.Inlines
-strongAndEmph = try $ enclosed (string "__") nestedInlines >>= return . B.emph . B.strong
+strongAndEmph = try $ B.emph . B.strong <$> enclosed (string "__") nestedInlines
emph :: PandocMonad m => TWParser m B.Inlines
-emph = try $ enclosed (char '_') nestedInlines >>= return . B.emph
+emph = try $ B.emph <$> enclosed (char '_') nestedInlines
emphHtml :: PandocMonad m => TWParser m B.Inlines
-emphHtml = (parseHtmlContent "em" inline <|> parseHtmlContent "i" inline)
- >>= return . B.emph . mconcat
+emphHtml = B.emph . mconcat <$> (parseHtmlContent "em" inline <|> parseHtmlContent "i" inline)
nestedString :: (Show a, PandocMonad m)
=> TWParser m a -> TWParser m String
-nestedString end = innerSpace <|> (count 1 nonspaceChar)
+nestedString end = innerSpace <|> count 1 nonspaceChar
where
innerSpace = try $ many1 spaceChar <* notFollowedBy end
boldCode :: PandocMonad m => TWParser m B.Inlines
-boldCode = try $ enclosed (string "==") nestedString >>= return . B.strong . B.code . fromEntities
+boldCode = try $ (B.strong . B.code . fromEntities) <$> enclosed (string "==") nestedString
htmlComment :: PandocMonad m => TWParser m B.Inlines
htmlComment = htmlTag isCommentTag >> return mempty
code :: PandocMonad m => TWParser m B.Inlines
-code = try $ enclosed (char '=') nestedString >>= return . B.code . fromEntities
+code = try $ (B.code . fromEntities) <$> enclosed (char '=') nestedString
codeHtml :: PandocMonad m => TWParser m B.Inlines
codeHtml = do
@@ -464,7 +454,7 @@ autoLink = try $ do
state <- getState
guard $ stateAllowLinks state
(text, url) <- parseLink
- guard $ checkLink (head $ reverse url)
+ guard $ checkLink (last url)
return $ makeLink (text, url)
where
parseLink = notFollowedBy nop >> (uri <|> emailAddress)
@@ -474,17 +464,17 @@ autoLink = try $ do
| otherwise = isAlphaNum c
str :: PandocMonad m => TWParser m B.Inlines
-str = (many1 alphaNum <|> count 1 characterReference) >>= return . B.str
+str = B.str <$> (many1 alphaNum <|> count 1 characterReference)
nop :: PandocMonad m => TWParser m B.Inlines
-nop = try $ (skip exclamation <|> skip nopTag) >> followContent
+nop = try $ (void exclamation <|> void nopTag) >> followContent
where
exclamation = char '!'
nopTag = stringAnyCase "<nop>"
- followContent = many1 nonspaceChar >>= return . B.str . fromEntities
+ followContent = B.str . fromEntities <$> many1 nonspaceChar
symbol :: PandocMonad m => TWParser m B.Inlines
-symbol = count 1 nonspaceChar >>= return . B.str
+symbol = B.str <$> count 1 nonspaceChar
smart :: PandocMonad m => TWParser m B.Inlines
smart = do
@@ -498,17 +488,16 @@ smart = do
singleQuoted :: PandocMonad m => TWParser m B.Inlines
singleQuoted = try $ do
singleQuoteStart
- withQuoteContext InSingleQuote $
- many1Till inline singleQuoteEnd >>=
- (return . B.singleQuoted . B.trimInlines . mconcat)
+ withQuoteContext InSingleQuote
+ (B.singleQuoted . B.trimInlines . mconcat <$> many1Till inline singleQuoteEnd)
doubleQuoted :: PandocMonad m => TWParser m B.Inlines
doubleQuoted = try $ do
doubleQuoteStart
contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
- (withQuoteContext InDoubleQuote $ doubleQuoteEnd >>
+ withQuoteContext InDoubleQuote (doubleQuoteEnd >>
return (B.doubleQuoted $ B.trimInlines contents))
- <|> (return $ (B.str "\8220") B.<> contents)
+ <|> return (B.str "\8220" B.<> contents)
link :: PandocMonad m => TWParser m B.Inlines
link = try $ do
@@ -527,5 +516,5 @@ linkText = do
char ']'
return (url, "", content)
where
- linkContent = (char '[') >> many1Till anyChar (char ']') >>= parseLinkContent
+ linkContent = char '[' >> many1Till anyChar (char ']') >>= parseLinkContent
parseLinkContent = parseFromString' $ many1 inline
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index bc3bcaf26..4b65be347 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -540,7 +540,7 @@ wordChunk = try $ do
str :: PandocMonad m => ParserT [Char] ParserState m Inlines
str = do
baseStr <- hyphenedWords
- -- RedCloth compliance : if parsed word is uppercase and immediatly
+ -- RedCloth compliance : if parsed word is uppercase and immediately
-- followed by parens, parens content is unconditionally word acronym
fullStr <- option baseStr $ try $ do
guard $ all isUpper baseStr
diff --git a/src/Text/Pandoc/Readers/TikiWiki.hs b/src/Text/Pandoc/Readers/TikiWiki.hs
index 5c7507248..8458b05e5 100644
--- a/src/Text/Pandoc/Readers/TikiWiki.hs
+++ b/src/Text/Pandoc/Readers/TikiWiki.hs
@@ -22,6 +22,7 @@ import Prelude
import Control.Monad
import Control.Monad.Except (throwError)
import qualified Data.Foldable as F
+import Data.List (dropWhileEnd)
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import qualified Data.Text as T
@@ -166,7 +167,7 @@ table = try $ do
-- return $ B.simpleTable (headers rows) $ trace ("rows: " ++ (show rows)) rows
return $B.simpleTable (headers rows) rows
where
- -- The headers are as many empty srings as the number of columns
+ -- The headers are as many empty strings as the number of columns
-- in the first row
headers rows = map (B.plain . B.str) $replicate (length $ head rows) ""
@@ -319,7 +320,7 @@ listItem = choice [
bulletItem :: PandocMonad m => TikiWikiParser m (ListNesting, B.Blocks)
bulletItem = try $ do
prefix <- many1 $ char '*'
- many1 $ char ' '
+ many $ char ' '
content <- listItemLine (length prefix)
return (LN Bullet (length prefix), B.plain content)
@@ -331,7 +332,7 @@ bulletItem = try $ do
numberedItem :: PandocMonad m => TikiWikiParser m (ListNesting, B.Blocks)
numberedItem = try $ do
prefix <- many1 $ char '#'
- many1 $ char ' '
+ many $ char ' '
content <- listItemLine (length prefix)
return (LN Numbered (length prefix), B.plain content)
@@ -346,7 +347,7 @@ listItemLine nest = lineContent >>= parseContent
listContinuation = string (replicate nest '+') >> lineContent
parseContent x = do
parsed <- parseFromString (many1 inline) x
- return $ mconcat parsed
+ return $ mconcat $ dropWhileEnd (== B.space) parsed
-- Turn the CODE macro attributes into Pandoc code block attributes.
mungeAttrs :: [(String, String)] -> (String, [String], [(String, String)])
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index bed49fd46..26dc934a9 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -198,7 +198,7 @@ para = try $ do
commentBlock :: T2T Blocks
commentBlock = try (blockMarkupArea anyLine (const mempty) "%%%") <|> comment
--- Seperator and Strong line treated the same
+-- Separator and Strong line treated the same
hrule :: T2T Blocks
hrule = try $ do
spaces
@@ -575,8 +575,10 @@ symbol = B.str . (:[]) <$> oneOf specialChars
getTarget :: T2T String
getTarget = do
mv <- lookupMeta "target" . stateMeta <$> getState
- let MetaString target = fromMaybe (MetaString "html") mv
- return target
+ return $ case mv of
+ Just (MetaString target) -> target
+ Just (MetaInlines [Str target]) -> target
+ _ -> "html"
atStart :: T2T ()
atStart = (sourceColumn <$> getPosition) >>= guard . (== 1)
diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs
index 824a912c3..15f0d991f 100644
--- a/src/Text/Pandoc/Readers/Vimwiki.hs
+++ b/src/Text/Pandoc/Readers/Vimwiki.hs
@@ -429,9 +429,7 @@ tableRow = try $ do
s <- lookAhead $ manyTill anyChar (try (char '|' >> many spaceChar
>> newline))
guard $ not $ "||" `isInfixOf` ("|" ++ s ++ "|")
- tr <- many tableCell
- many spaceChar >> char '\n'
- return tr
+ many tableCell <* many spaceChar <* char '\n'
tableCell :: PandocMonad m => VwParser m Blocks
tableCell = try $
@@ -451,13 +449,13 @@ ph s = try $ do
noHtmlPh :: PandocMonad m => VwParser m ()
noHtmlPh = try $
- () <$ (many spaceChar >> string "%nohtml" >> many spaceChar
- >> lookAhead newline)
+ () <$ many spaceChar <* string "%nohtml" <* many spaceChar
+ <* lookAhead newline
templatePh :: PandocMonad m => VwParser m ()
templatePh = try $
- () <$ (many spaceChar >> string "%template" >>many (noneOf "\n")
- >> lookAhead newline)
+ () <$ many spaceChar <* string "%template" <* many (noneOf "\n")
+ <* lookAhead newline
-- inline parser
@@ -617,10 +615,8 @@ procImgurl :: String -> String
procImgurl s = if take 6 s == "local:" then "file" ++ drop 5 s else s
inlineMath :: PandocMonad m => VwParser m Inlines
-inlineMath = try $ do
- char '$'
- contents <- many1Till (noneOf "\n") (char '$')
- return $ B.math contents
+inlineMath = try $
+ B.math <$ char '$' <*> many1Till (noneOf "\n") (char '$')
tag :: PandocMonad m => VwParser m Inlines
tag = try $ do
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 26b01bc90..9f48080b8 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -79,6 +79,7 @@ module Text.Pandoc.Shared (
makeMeta,
eastAsianLineBreakFilter,
underlineSpan,
+ splitSentences,
-- * TagSoup HTML handling
renderTags',
-- * File handling
@@ -94,6 +95,8 @@ module Text.Pandoc.Shared (
-- * for squashing blocks
blocksToInlines,
blocksToInlines',
+ blocksToInlinesWithSep,
+ defaultBlocksSeparator,
-- * Safe read
safeRead,
-- * Temp directory
@@ -580,6 +583,31 @@ eastAsianLineBreakFilter = bottomUp go
underlineSpan :: Inlines -> Inlines
underlineSpan = B.spanWith ("", ["underline"], [])
+-- | Returns the first sentence in a list of inlines, and the rest.
+breakSentence :: [Inline] -> ([Inline], [Inline])
+breakSentence [] = ([],[])
+breakSentence xs =
+ let isSentenceEndInline (Str ys@(_:_)) | last ys == '.' = True
+ isSentenceEndInline (Str ys@(_:_)) | last ys == '?' = True
+ isSentenceEndInline LineBreak = True
+ isSentenceEndInline _ = False
+ (as, bs) = break isSentenceEndInline xs
+ in case bs of
+ [] -> (as, [])
+ [c] -> (as ++ [c], [])
+ (c:Space:cs) -> (as ++ [c], cs)
+ (c:SoftBreak:cs) -> (as ++ [c], cs)
+ (Str ".":Str (')':ys):cs) -> (as ++ [Str ".", Str (')':ys)], cs)
+ (x@(Str ('.':')':_)):cs) -> (as ++ [x], cs)
+ (LineBreak:x@(Str ('.':_)):cs) -> (as ++[LineBreak], x:cs)
+ (c:cs) -> (as ++ [c] ++ ds, es)
+ where (ds, es) = breakSentence cs
+
+-- | Split a list of inlines into sentences.
+splitSentences :: [Inline] -> [[Inline]]
+splitSentences xs =
+ let (sent, rest) = breakSentence xs
+ in if null rest then [sent] else sent : splitSentences rest
--
-- TagSoup HTML handling
@@ -712,7 +740,7 @@ schemes = Set.fromList
, "ws", "wss", "wtai", "wyciwyg", "xcon", "xcon-userid", "xfire"
, "xmlrpc.beep", "xmlrpc.beeps", "xmpp", "xri", "ymsgr", "z39.50", "z39.50r"
, "z39.50s"
- -- Inofficial schemes
+ -- Unofficial schemes
, "doi", "isbn", "javascript", "pmid"
]
@@ -757,12 +785,19 @@ blocksToInlinesWithSep sep =
mconcat . intersperse sep . map blockToInlines
blocksToInlines' :: [Block] -> Inlines
-blocksToInlines' = blocksToInlinesWithSep parSep
- where parSep = B.space <> B.str "¶" <> B.space
+blocksToInlines' = blocksToInlinesWithSep defaultBlocksSeparator
blocksToInlines :: [Block] -> [Inline]
blocksToInlines = B.toList . blocksToInlines'
+-- | Inline elements used to separate blocks when squashing blocks into
+-- inlines.
+defaultBlocksSeparator :: Inlines
+defaultBlocksSeparator =
+ -- This is used in the pandoc.utils.blocks_to_inlines function. Docs
+ -- there should be updated if this is changed.
+ B.space <> B.str "¶" <> B.space
+
--
-- Safe read
diff --git a/src/Text/Pandoc/Translations.hs b/src/Text/Pandoc/Translations.hs
index 4a216af92..13dcb3b61 100644
--- a/src/Text/Pandoc/Translations.hs
+++ b/src/Text/Pandoc/Translations.hs
@@ -48,11 +48,12 @@ module Text.Pandoc.Translations (
)
where
import Prelude
-import Data.Aeson.Types (typeMismatch)
+import Data.Aeson.Types (Value(..), FromJSON(..))
+import qualified Data.Aeson.Types as Aeson
import qualified Data.HashMap.Strict as HM
import qualified Data.Map as M
import Data.Text as T
-import Data.Yaml as Yaml
+import qualified Data.YAML as YAML
import GHC.Generics (Generic)
import Text.Pandoc.Shared (safeRead)
import qualified Text.Pandoc.UTF8 as UTF8
@@ -90,7 +91,15 @@ instance FromJSON Term where
Just t' -> pure t'
Nothing -> fail $ "Invalid Term name " ++
show t
- parseJSON invalid = typeMismatch "Term" invalid
+ parseJSON invalid = Aeson.typeMismatch "Term" invalid
+
+instance YAML.FromYAML Term where
+ parseYAML (YAML.Scalar (YAML.SStr t)) =
+ case safeRead (T.unpack t) of
+ Just t' -> pure t'
+ Nothing -> fail $ "Invalid Term name " ++
+ show t
+ parseYAML invalid = YAML.typeMismatch "Term" invalid
instance FromJSON Translations where
parseJSON (Object hm) = do
@@ -102,14 +111,28 @@ instance FromJSON Translations where
Just t ->
case v of
(String s) -> return (t, T.unpack $ T.strip s)
- inv -> typeMismatch "String" inv
- parseJSON invalid = typeMismatch "Translations" invalid
+ inv -> Aeson.typeMismatch "String" inv
+ parseJSON invalid = Aeson.typeMismatch "Translations" invalid
+
+instance YAML.FromYAML Translations where
+ parseYAML = YAML.withMap "Translations" $
+ \tr -> Translations .M.fromList <$> mapM addItem (M.toList tr)
+ where addItem (n@(YAML.Scalar (YAML.SStr k)), v) =
+ case safeRead (T.unpack k) of
+ Nothing -> YAML.typeMismatch "Term" n
+ Just t ->
+ case v of
+ (YAML.Scalar (YAML.SStr s)) ->
+ return (t, T.unpack (T.strip s))
+ n' -> YAML.typeMismatch "String" n'
+ addItem (n, _) = YAML.typeMismatch "String" n
lookupTerm :: Term -> Translations -> Maybe String
lookupTerm t (Translations tm) = M.lookup t tm
readTranslations :: String -> Either String Translations
readTranslations s =
- case Yaml.decodeEither' $ UTF8.fromString s of
- Left err' -> Left $ prettyPrintParseException err'
- Right t -> Right t
+ case YAML.decodeStrict $ UTF8.fromString s of
+ Left err' -> Left err'
+ Right (t:_) -> Right t
+ Right [] -> Left "empty YAML document"
diff --git a/src/Text/Pandoc/UUID.hs b/src/Text/Pandoc/UUID.hs
index c1bae7038..60ff269da 100644
--- a/src/Text/Pandoc/UUID.hs
+++ b/src/Text/Pandoc/UUID.hs
@@ -67,13 +67,14 @@ instance Show UUID where
getUUID :: RandomGen g => g -> UUID
getUUID gen =
- let [a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p] = take 16 $ randoms gen :: [Word8]
- -- set variant
- i' = i `setBit` 7 `clearBit` 6
- -- set version (0100 for random)
- g' = g `clearBit` 7 `setBit` 6 `clearBit` 5 `clearBit` 4
- in
- UUID a b c d e f g' h i' j k l m n o p
+ case take 16 (randoms gen :: [Word8]) of
+ [a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p] ->
+ -- set variant
+ let i' = i `setBit` 7 `clearBit` 6
+ -- set version (0100 for random)
+ g' = g `clearBit` 7 `setBit` 6 `clearBit` 5 `clearBit` 4
+ in UUID a b c d e f g' h i' j k l m n o p
+ _ -> error "not enough random numbers for UUID" -- should not happen
getRandomUUID :: IO UUID
getRandomUUID = getUUID <$> getStdGen
diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs
index 036185282..ffe5b7473 100644
--- a/src/Text/Pandoc/Writers/AsciiDoc.hs
+++ b/src/Text/Pandoc/Writers/AsciiDoc.hs
@@ -44,7 +44,7 @@ import Data.Aeson (Result (..), Value (String), fromJSON, toJSON)
import Data.Char (isPunctuation, isSpace)
import Data.List (intercalate, intersperse, stripPrefix)
import qualified Data.Map as M
-import Data.Maybe (fromMaybe, isJust)
+import Data.Maybe (fromMaybe, isJust, listToMaybe)
import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as T
@@ -126,11 +126,16 @@ olMarker = do (start, style', delim) <- anyOrderedListMarker
else spaceChar
-- | True if string begins with an ordered list marker
-beginsWithOrderedListMarker :: String -> Bool
-beginsWithOrderedListMarker str =
- case runParser olMarker defaultParserState "para start" (take 10 str) of
- Left _ -> False
- Right _ -> True
+-- or would be interpreted as an AsciiDoc option command
+needsEscaping :: String -> Bool
+needsEscaping s = beginsWithOrderedListMarker s || isBracketed s
+ where
+ beginsWithOrderedListMarker str =
+ case runParser olMarker defaultParserState "para start" (take 10 str) of
+ Left _ -> False
+ Right _ -> True
+ isBracketed ('[':cs) = listToMaybe (reverse cs) == Just ']'
+ isBracketed _ = False
-- | Convert Pandoc block element to asciidoc.
blockToAsciiDoc :: PandocMonad m
@@ -146,8 +151,8 @@ blockToAsciiDoc opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) =
blockToAsciiDoc opts (Para inlines) = do
contents <- inlineListToAsciiDoc opts inlines
-- escape if para starts with ordered list marker
- let esc = if beginsWithOrderedListMarker (render Nothing contents)
- then text "\\"
+ let esc = if needsEscaping (render Nothing contents)
+ then text "{empty}"
else empty
return $ esc <> contents <> blankline
blockToAsciiDoc opts (LineBlock lns) = do
@@ -280,7 +285,7 @@ blockToAsciiDoc opts (DefinitionList items) = do
contents <- mapM (definitionListItemToAsciiDoc opts) items
return $ cat contents <> blankline
blockToAsciiDoc opts (Div (ident,_,_) bs) = do
- let identifier = if null ident then empty else ("[[" <> text ident <> "]]")
+ let identifier = if null ident then empty else "[[" <> text ident <> "]]"
contents <- blockListToAsciiDoc opts bs
return $ identifier $$ contents
@@ -487,6 +492,6 @@ inlineToAsciiDoc opts (Note [Plain inlines]) = do
-- asciidoc can't handle blank lines in notes
inlineToAsciiDoc _ (Note _) = return "[multiblock footnote omitted]"
inlineToAsciiDoc opts (Span (ident,_,_) ils) = do
- let identifier = if null ident then empty else ("[[" <> text ident <> "]]")
+ let identifier = if null ident then empty else "[[" <> text ident <> "]]"
contents <- inlineListToAsciiDoc opts ils
return $ identifier <> contents
diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs
index 98c1101fa..84ea37f38 100644
--- a/src/Text/Pandoc/Writers/CommonMark.hs
+++ b/src/Text/Pandoc/Writers/CommonMark.hs
@@ -45,7 +45,7 @@ import Network.HTTP (urlEncode)
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Options
-import Text.Pandoc.Shared (isTightList, linesToPara, substitute)
+import Text.Pandoc.Shared (isTightList, linesToPara, substitute, capitalize)
import Text.Pandoc.Templates (renderTemplate')
import Text.Pandoc.Walk (query, walk, walkM)
import Text.Pandoc.Writers.HTML (writeHtml5String, tagWithAttributes)
@@ -253,18 +253,34 @@ inlineToNodes opts (Strong xs) = (node STRONG (inlinesToNodes opts xs) :)
inlineToNodes opts (Strikeout xs) =
if isEnabled Ext_strikeout opts
then (node (CUSTOM_INLINE "~~" "~~") (inlinesToNodes opts xs) :)
- else ((node (HTML_INLINE (T.pack "<s>")) [] : inlinesToNodes opts xs ++
- [node (HTML_INLINE (T.pack "</s>")) []]) ++ )
+ else if isEnabled Ext_raw_html opts
+ then ((node (HTML_INLINE (T.pack "<s>")) [] : inlinesToNodes opts xs ++
+ [node (HTML_INLINE (T.pack "</s>")) []]) ++ )
+ else (inlinesToNodes opts xs ++)
inlineToNodes opts (Superscript xs) =
- ((node (HTML_INLINE (T.pack "<sup>")) [] : inlinesToNodes opts xs ++
- [node (HTML_INLINE (T.pack "</sup>")) []]) ++ )
+ if isEnabled Ext_raw_html opts
+ then ((node (HTML_INLINE (T.pack "<sup>")) [] : inlinesToNodes opts xs ++
+ [node (HTML_INLINE (T.pack "</sup>")) []]) ++ )
+ else case traverse toSuperscriptInline xs of
+ Nothing ->
+ ((node (TEXT (T.pack "^(")) [] : inlinesToNodes opts xs ++
+ [node (TEXT (T.pack ")")) []]) ++ )
+ Just xs' -> (inlinesToNodes opts xs' ++)
inlineToNodes opts (Subscript xs) =
- ((node (HTML_INLINE (T.pack "<sub>")) [] : inlinesToNodes opts xs ++
- [node (HTML_INLINE (T.pack "</sub>")) []]) ++ )
+ if isEnabled Ext_raw_html opts
+ then ((node (HTML_INLINE (T.pack "<sub>")) [] : inlinesToNodes opts xs ++
+ [node (HTML_INLINE (T.pack "</sub>")) []]) ++ )
+ else case traverse toSubscriptInline xs of
+ Nothing ->
+ ((node (TEXT (T.pack "_(")) [] : inlinesToNodes opts xs ++
+ [node (TEXT (T.pack ")")) []]) ++ )
+ Just xs' -> (inlinesToNodes opts xs' ++)
inlineToNodes opts (SmallCaps xs) =
- ((node (HTML_INLINE (T.pack "<span class=\"smallcaps\">")) []
- : inlinesToNodes opts xs ++
- [node (HTML_INLINE (T.pack "</span>")) []]) ++ )
+ if isEnabled Ext_raw_html opts
+ then ((node (HTML_INLINE (T.pack "<span class=\"smallcaps\">")) []
+ : inlinesToNodes opts xs ++
+ [node (HTML_INLINE (T.pack "</span>")) []]) ++ )
+ else (inlinesToNodes opts (capitalize xs) ++)
inlineToNodes opts (Link _ ils (url,tit)) =
(node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes opts ils) :)
-- title beginning with fig: indicates implicit figure
@@ -304,6 +320,11 @@ inlineToNodes opts (Math mt str) =
(node (HTML_INLINE (T.pack ("\\(" ++ str ++ "\\)"))) [] :)
DisplayMath ->
(node (HTML_INLINE (T.pack ("\\[" ++ str ++ "\\]"))) [] :)
+inlineToNodes opts (Span ("",["emoji"],kvs) [Str s]) = do
+ case lookup "data-emoji" kvs of
+ Just emojiname | isEnabled Ext_emoji opts ->
+ (node (TEXT (":" <> T.pack emojiname <> ":")) [] :)
+ _ -> (node (TEXT (T.pack s)) [] :)
inlineToNodes opts (Span attr ils) =
let nodes = inlinesToNodes opts ils
op = tagWithAttributes opts True False "span" attr
@@ -314,3 +335,19 @@ inlineToNodes opts (Span attr ils) =
inlineToNodes opts (Cite _ ils) = (inlinesToNodes opts ils ++)
inlineToNodes _ (Note _) = id -- should not occur
-- we remove Note elements in preprocessing
+
+toSubscriptInline :: Inline -> Maybe Inline
+toSubscriptInline Space = Just Space
+toSubscriptInline (Span attr ils) = Span attr <$> traverse toSubscriptInline ils
+toSubscriptInline (Str s) = Str <$> traverse toSubscript s
+toSubscriptInline LineBreak = Just LineBreak
+toSubscriptInline SoftBreak = Just SoftBreak
+toSubscriptInline _ = Nothing
+
+toSuperscriptInline :: Inline -> Maybe Inline
+toSuperscriptInline Space = Just Space
+toSuperscriptInline (Span attr ils) = Span attr <$> traverse toSuperscriptInline ils
+toSuperscriptInline (Str s) = Str <$> traverse toSuperscript s
+toSuperscriptInline LineBreak = Just LineBreak
+toSuperscriptInline SoftBreak = Just SoftBreak
+toSuperscriptInline _ = Nothing
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 10e996bdb..1f9760442 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -190,10 +190,9 @@ blockToConTeXt (BlockQuote lst) = do
blockToConTeXt (CodeBlock _ str) =
return $ flush ("\\starttyping" <> cr <> text str <> cr <> "\\stoptyping") $$ blankline
-- blankline because \stoptyping can't have anything after it, inc. '}'
-blockToConTeXt (RawBlock "context" str) = return $ text str <> blankline
-blockToConTeXt b@(RawBlock _ _ ) = do
- report $ BlockNotRendered b
- return empty
+blockToConTeXt b@(RawBlock f str)
+ | f == Format "context" || f == Format "tex" = return $ text str <> blankline
+ | otherwise = empty <$ report (BlockNotRendered b)
blockToConTeXt (Div (ident,_,kvs) bs) = do
let align dir txt = "\\startalignment[" <> dir <> "]" $$ txt $$ "\\stopalignment"
mblang <- fromBCP47 (lookup "lang" kvs)
@@ -330,8 +329,7 @@ alignToConTeXt align = case align of
AlignDefault -> empty
listItemToConTeXt :: PandocMonad m => [Block] -> WM m Doc
-listItemToConTeXt list = blockListToConTeXt list >>=
- return . ("\\item" $$) . nest 2
+listItemToConTeXt list = (("\\item" $$) . nest 2) <$> blockListToConTeXt list
defListItemToConTeXt :: PandocMonad m => ([Inline], [[Block]]) -> WM m Doc
defListItemToConTeXt (term, defs) = do
@@ -401,11 +399,9 @@ inlineToConTeXt (Math InlineMath str) =
return $ char '$' <> text str <> char '$'
inlineToConTeXt (Math DisplayMath str) =
return $ text "\\startformula " <> text str <> text " \\stopformula" <> space
-inlineToConTeXt (RawInline "context" str) = return $ text str
-inlineToConTeXt (RawInline "tex" str) = return $ text str
-inlineToConTeXt il@(RawInline _ _) = do
- report $ InlineNotRendered il
- return empty
+inlineToConTeXt il@(RawInline f str)
+ | f == Format "tex" || f == Format "context" = return $ text str
+ | otherwise = empty <$ report (InlineNotRendered il)
inlineToConTeXt LineBreak = return $ text "\\crlf" <> cr
inlineToConTeXt SoftBreak = do
wrapText <- gets (writerWrapText . stOptions)
@@ -457,7 +453,12 @@ inlineToConTeXt (Image attr@(_,cls,_) _ (src, _)) = do
clas = if null cls
then empty
else brackets $ text $ toLabel $ head cls
- src' = if isURI src
+ -- Use / for path separators on Windows; see #4918
+ fixPathSeparators = map $ \c -> case c of
+ '\\' -> '/'
+ _ -> c
+ src' = fixPathSeparators $
+ if isURI src
then src
else unEscapeString src
return $ braces $ "\\externalfigure" <> brackets (text src') <> dims <> clas
diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs
index 53b321c7c..37fec9f0f 100644
--- a/src/Text/Pandoc/Writers/Custom.hs
+++ b/src/Text/Pandoc/Writers/Custom.hs
@@ -1,6 +1,6 @@
-{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE DeriveDataTypeable #-}
-{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE NoImplicitPrelude #-}
{- Copyright (C) 2012-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
@@ -35,25 +35,26 @@ import Prelude
import Control.Arrow ((***))
import Control.Exception
import Control.Monad (when)
-import Control.Monad.Trans (MonadIO (liftIO))
import Data.Char (toLower)
import Data.List (intersperse)
import qualified Data.Map as M
import Data.Text (Text, pack)
import Data.Typeable
-import Foreign.Lua (Lua, ToLuaStack (..), callFunc)
-import Foreign.Lua.Api
+import Foreign.Lua (Lua, Pushable)
import Text.Pandoc.Class (PandocIO)
import Text.Pandoc.Definition
import Text.Pandoc.Error
-import Text.Pandoc.Lua.Init (runPandocLua, registerScriptPath)
+import Text.Pandoc.Lua.Init (LuaException (LuaException), runPandocLua,
+ registerScriptPath)
import Text.Pandoc.Lua.StackInstances ()
-import Text.Pandoc.Lua.Util (addValue, dostring')
+import Text.Pandoc.Lua.Util (addField, dofileWithTraceback)
import Text.Pandoc.Options
import Text.Pandoc.Templates
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.Writers.Shared
+import qualified Foreign.Lua as Lua
+
attrToMap :: Attr -> M.Map String String
attrToMap (id',classes,keyvals) = M.fromList
$ ("id", id')
@@ -62,41 +63,43 @@ attrToMap (id',classes,keyvals) = M.fromList
newtype Stringify a = Stringify a
-instance ToLuaStack (Stringify Format) where
- push (Stringify (Format f)) = push (map toLower f)
+instance Pushable (Stringify Format) where
+ push (Stringify (Format f)) = Lua.push (map toLower f)
-instance ToLuaStack (Stringify [Inline]) where
- push (Stringify ils) = push =<< inlineListToCustom ils
+instance Pushable (Stringify [Inline]) where
+ push (Stringify ils) = Lua.push =<< inlineListToCustom ils
-instance ToLuaStack (Stringify [Block]) where
- push (Stringify blks) = push =<< blockListToCustom blks
+instance Pushable (Stringify [Block]) where
+ push (Stringify blks) = Lua.push =<< blockListToCustom blks
-instance ToLuaStack (Stringify MetaValue) where
- push (Stringify (MetaMap m)) = push (fmap Stringify m)
- push (Stringify (MetaList xs)) = push (map Stringify xs)
- push (Stringify (MetaBool x)) = push x
- push (Stringify (MetaString s)) = push s
- push (Stringify (MetaInlines ils)) = push (Stringify ils)
- push (Stringify (MetaBlocks bs)) = push (Stringify bs)
+instance Pushable (Stringify MetaValue) where
+ push (Stringify (MetaMap m)) = Lua.push (fmap Stringify m)
+ push (Stringify (MetaList xs)) = Lua.push (map Stringify xs)
+ push (Stringify (MetaBool x)) = Lua.push x
+ push (Stringify (MetaString s)) = Lua.push s
+ push (Stringify (MetaInlines ils)) = Lua.push (Stringify ils)
+ push (Stringify (MetaBlocks bs)) = Lua.push (Stringify bs)
-instance ToLuaStack (Stringify Citation) where
+instance Pushable (Stringify Citation) where
push (Stringify cit) = do
- createtable 6 0
- addValue "citationId" $ citationId cit
- addValue "citationPrefix" . Stringify $ citationPrefix cit
- addValue "citationSuffix" . Stringify $ citationSuffix cit
- addValue "citationMode" $ show (citationMode cit)
- addValue "citationNoteNum" $ citationNoteNum cit
- addValue "citationHash" $ citationHash cit
+ Lua.createtable 6 0
+ addField "citationId" $ citationId cit
+ addField "citationPrefix" . Stringify $ citationPrefix cit
+ addField "citationSuffix" . Stringify $ citationSuffix cit
+ addField "citationMode" $ show (citationMode cit)
+ addField "citationNoteNum" $ citationNoteNum cit
+ addField "citationHash" $ citationHash cit
-- | Key-value pair, pushed as a table with @a@ as the only key and @v@ as the
-- associated value.
newtype KeyValue a b = KeyValue (a, b)
-instance (ToLuaStack a, ToLuaStack b) => ToLuaStack (KeyValue a b) where
+instance (Pushable a, Pushable b) => Pushable (KeyValue a b) where
push (KeyValue (k, v)) = do
- newtable
- addValue k v
+ Lua.newtable
+ Lua.push k
+ Lua.push v
+ Lua.rawset (Lua.nthFromTop 3)
data PandocLuaException = PandocLuaException String
deriving (Show, Typeable)
@@ -106,14 +109,13 @@ instance Exception PandocLuaException
-- | Convert Pandoc to custom markup.
writeCustom :: FilePath -> WriterOptions -> Pandoc -> PandocIO Text
writeCustom luaFile opts doc@(Pandoc meta _) = do
- luaScript <- liftIO $ UTF8.readFile luaFile
res <- runPandocLua $ do
registerScriptPath luaFile
- stat <- dostring' luaScript
+ stat <- dofileWithTraceback luaFile
-- check for error in lua script (later we'll change the return type
-- to handle this more gracefully):
- when (stat /= OK) $
- tostring 1 >>= throw . PandocLuaException . UTF8.toString
+ when (stat /= Lua.OK) $
+ Lua.tostring' (-1) >>= throw . PandocLuaException . UTF8.toString
-- TODO - call hierarchicalize, so we have that info
rendered <- docToCustom opts doc
context <- metaToJSON opts
@@ -122,7 +124,7 @@ writeCustom luaFile opts doc@(Pandoc meta _) = do
meta
return (rendered, context)
let (body, context) = case res of
- Left e -> throw (PandocLuaException (show e))
+ Left (LuaException msg) -> throw (PandocLuaException msg)
Right x -> x
case writerTemplate opts of
Nothing -> return $ pack body
@@ -134,7 +136,7 @@ writeCustom luaFile opts doc@(Pandoc meta _) = do
docToCustom :: WriterOptions -> Pandoc -> Lua String
docToCustom opts (Pandoc (Meta metamap) blocks) = do
body <- blockListToCustom blocks
- callFunc "Doc" body (fmap Stringify metamap) (writerVariables opts)
+ Lua.callFunc "Doc" body (fmap Stringify metamap) (writerVariables opts)
-- | Convert Pandoc block element to Custom.
blockToCustom :: Block -- ^ Block element
@@ -142,52 +144,55 @@ blockToCustom :: Block -- ^ Block element
blockToCustom Null = return ""
-blockToCustom (Plain inlines) = callFunc "Plain" (Stringify inlines)
+blockToCustom (Plain inlines) = Lua.callFunc "Plain" (Stringify inlines)
blockToCustom (Para [Image attr txt (src,tit)]) =
- callFunc "CaptionedImage" src tit (Stringify txt) (attrToMap attr)
+ Lua.callFunc "CaptionedImage" src tit (Stringify txt) (attrToMap attr)
-blockToCustom (Para inlines) = callFunc "Para" (Stringify inlines)
+blockToCustom (Para inlines) = Lua.callFunc "Para" (Stringify inlines)
-blockToCustom (LineBlock linesList) = callFunc "LineBlock" (map Stringify linesList)
+blockToCustom (LineBlock linesList) =
+ Lua.callFunc "LineBlock" (map Stringify linesList)
blockToCustom (RawBlock format str) =
- callFunc "RawBlock" (Stringify format) str
+ Lua.callFunc "RawBlock" (Stringify format) str
-blockToCustom HorizontalRule = callFunc "HorizontalRule"
+blockToCustom HorizontalRule = Lua.callFunc "HorizontalRule"
blockToCustom (Header level attr inlines) =
- callFunc "Header" level (Stringify inlines) (attrToMap attr)
+ Lua.callFunc "Header" level (Stringify inlines) (attrToMap attr)
blockToCustom (CodeBlock attr str) =
- callFunc "CodeBlock" str (attrToMap attr)
+ Lua.callFunc "CodeBlock" str (attrToMap attr)
-blockToCustom (BlockQuote blocks) = callFunc "BlockQuote" (Stringify blocks)
+blockToCustom (BlockQuote blocks) =
+ Lua.callFunc "BlockQuote" (Stringify blocks)
blockToCustom (Table capt aligns widths headers rows) =
let aligns' = map show aligns
capt' = Stringify capt
headers' = map Stringify headers
rows' = map (map Stringify) rows
- in callFunc "Table" capt' aligns' widths headers' rows'
+ in Lua.callFunc "Table" capt' aligns' widths headers' rows'
-blockToCustom (BulletList items) = callFunc "BulletList" (map Stringify items)
+blockToCustom (BulletList items) =
+ Lua.callFunc "BulletList" (map Stringify items)
blockToCustom (OrderedList (num,sty,delim) items) =
- callFunc "OrderedList" (map Stringify items) num (show sty) (show delim)
+ Lua.callFunc "OrderedList" (map Stringify items) num (show sty) (show delim)
blockToCustom (DefinitionList items) =
- callFunc "DefinitionList"
- (map (KeyValue . (Stringify *** map Stringify)) items)
+ Lua.callFunc "DefinitionList"
+ (map (KeyValue . (Stringify *** map Stringify)) items)
blockToCustom (Div attr items) =
- callFunc "Div" (Stringify items) (attrToMap attr)
+ Lua.callFunc "Div" (Stringify items) (attrToMap attr)
-- | Convert list of Pandoc block elements to Custom.
blockListToCustom :: [Block] -- ^ List of block elements
-> Lua String
blockListToCustom xs = do
- blocksep <- callFunc "Blocksep"
+ blocksep <- Lua.callFunc "Blocksep"
bs <- mapM blockToCustom xs
return $ mconcat $ intersperse blocksep bs
@@ -200,51 +205,51 @@ inlineListToCustom lst = do
-- | Convert Pandoc inline element to Custom.
inlineToCustom :: Inline -> Lua String
-inlineToCustom (Str str) = callFunc "Str" str
+inlineToCustom (Str str) = Lua.callFunc "Str" str
-inlineToCustom Space = callFunc "Space"
+inlineToCustom Space = Lua.callFunc "Space"
-inlineToCustom SoftBreak = callFunc "SoftBreak"
+inlineToCustom SoftBreak = Lua.callFunc "SoftBreak"
-inlineToCustom (Emph lst) = callFunc "Emph" (Stringify lst)
+inlineToCustom (Emph lst) = Lua.callFunc "Emph" (Stringify lst)
-inlineToCustom (Strong lst) = callFunc "Strong" (Stringify lst)
+inlineToCustom (Strong lst) = Lua.callFunc "Strong" (Stringify lst)
-inlineToCustom (Strikeout lst) = callFunc "Strikeout" (Stringify lst)
+inlineToCustom (Strikeout lst) = Lua.callFunc "Strikeout" (Stringify lst)
-inlineToCustom (Superscript lst) = callFunc "Superscript" (Stringify lst)
+inlineToCustom (Superscript lst) = Lua.callFunc "Superscript" (Stringify lst)
-inlineToCustom (Subscript lst) = callFunc "Subscript" (Stringify lst)
+inlineToCustom (Subscript lst) = Lua.callFunc "Subscript" (Stringify lst)
-inlineToCustom (SmallCaps lst) = callFunc "SmallCaps" (Stringify lst)
+inlineToCustom (SmallCaps lst) = Lua.callFunc "SmallCaps" (Stringify lst)
-inlineToCustom (Quoted SingleQuote lst) = callFunc "SingleQuoted" (Stringify lst)
+inlineToCustom (Quoted SingleQuote lst) = Lua.callFunc "SingleQuoted" (Stringify lst)
-inlineToCustom (Quoted DoubleQuote lst) = callFunc "DoubleQuoted" (Stringify lst)
+inlineToCustom (Quoted DoubleQuote lst) = Lua.callFunc "DoubleQuoted" (Stringify lst)
-inlineToCustom (Cite cs lst) = callFunc "Cite" (Stringify lst) (map Stringify cs)
+inlineToCustom (Cite cs lst) = Lua.callFunc "Cite" (Stringify lst) (map Stringify cs)
inlineToCustom (Code attr str) =
- callFunc "Code" str (attrToMap attr)
+ Lua.callFunc "Code" str (attrToMap attr)
inlineToCustom (Math DisplayMath str) =
- callFunc "DisplayMath" str
+ Lua.callFunc "DisplayMath" str
inlineToCustom (Math InlineMath str) =
- callFunc "InlineMath" str
+ Lua.callFunc "InlineMath" str
inlineToCustom (RawInline format str) =
- callFunc "RawInline" (Stringify format) str
+ Lua.callFunc "RawInline" (Stringify format) str
-inlineToCustom LineBreak = callFunc "LineBreak"
+inlineToCustom LineBreak = Lua.callFunc "LineBreak"
inlineToCustom (Link attr txt (src,tit)) =
- callFunc "Link" (Stringify txt) src tit (attrToMap attr)
+ Lua.callFunc "Link" (Stringify txt) src tit (attrToMap attr)
inlineToCustom (Image attr alt (src,tit)) =
- callFunc "Image" (Stringify alt) src tit (attrToMap attr)
+ Lua.callFunc "Image" (Stringify alt) src tit (attrToMap attr)
-inlineToCustom (Note contents) = callFunc "Note" (Stringify contents)
+inlineToCustom (Note contents) = Lua.callFunc "Note" (Stringify contents)
inlineToCustom (Span attr items) =
- callFunc "Span" (Stringify items) (attrToMap attr)
+ Lua.callFunc "Span" (Stringify items) (attrToMap attr)
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index f6e814095..3306e4f31 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -126,9 +126,10 @@ writeDocbook opts (Pandoc meta blocks) = do
defField "mathml" (case writerHTMLMathMethod opts of
MathML -> True
_ -> False) metadata
- case writerTemplate opts of
- Nothing -> return main
- Just tpl -> renderTemplate' tpl context
+ (if writerPreferAscii opts then toEntities else id) <$>
+ case writerTemplate opts of
+ Nothing -> return main
+ Just tpl -> renderTemplate' tpl context
-- | Convert an Element to Docbook.
elementToDocbook :: PandocMonad m => WriterOptions -> Int -> Element -> DB m Doc
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 1666c0562..524d20fd1 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -66,7 +66,7 @@ import Text.Pandoc.Readers.Docx.StyleMap
import Text.Pandoc.Shared hiding (Element)
import Text.Pandoc.Walk
import Text.Pandoc.Writers.Math
-import Text.Pandoc.Writers.Shared (fixDisplayMath, metaValueToInlines)
+import Text.Pandoc.Writers.Shared
import Text.Printf (printf)
import Text.TeXMath
import Text.XML.Light as XML
@@ -230,7 +230,7 @@ writeDocx opts doc@(Pandoc meta _) = do
let mbAttrMarLeft = (elAttribs <$> mbpgmar) >>= lookupAttrBy ((=="left") . qName)
let mbAttrMarRight = (elAttribs <$> mbpgmar) >>= lookupAttrBy ((=="right") . qName)
- -- Get the avaible area (converting the size and the margins to int and
+ -- Get the available area (converting the size and the margins to int and
-- doing the difference
let pgContentWidth = (-) <$> (read <$> mbAttrSzWidth ::Maybe Integer)
<*> (
@@ -266,8 +266,9 @@ writeDocx opts doc@(Pandoc meta _) = do
-- parse styledoc for heading styles
let styleMaps = getStyleMaps styledoc
- let tocTitle = fromMaybe (stTocTitle defaultWriterState) $
- metaValueToInlines <$> lookupMeta "toc-title" meta
+ let tocTitle = case lookupMetaInlines "toc-title" meta of
+ [] -> stTocTitle defaultWriterState
+ ls -> ls
let initialSt = defaultWriterState {
stStyleMaps = styleMaps
@@ -727,7 +728,7 @@ getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists
makeTOC :: (PandocMonad m) => WriterOptions -> WS m [Element]
-makeTOC opts | writerTableOfContents opts = do
+makeTOC opts = do
let depth = "1-"++show (writerTOCDepth opts)
let tocCmd = "TOC \\o \""++depth++"\" \\h \\z \\u"
tocTitle <- gets stTocTitle
@@ -751,8 +752,6 @@ makeTOC opts | writerTableOfContents opts = do
) -- w:p
])
])] -- w:sdt
-makeTOC _ = return []
-
-- | Convert Pandoc document to two lists of
-- OpenXML elements (the main document and footnotes).
@@ -761,15 +760,9 @@ writeOpenXML opts (Pandoc meta blocks) = do
let tit = docTitle meta
let auths = docAuthors meta
let dat = docDate meta
- let abstract' = case lookupMeta "abstract" meta of
- Just (MetaBlocks bs) -> bs
- Just (MetaInlines ils) -> [Plain ils]
- _ -> []
- let subtitle' = case lookupMeta "subtitle" meta of
- Just (MetaBlocks [Plain xs]) -> xs
- Just (MetaBlocks [Para xs]) -> xs
- Just (MetaInlines xs) -> xs
- _ -> []
+ let abstract' = lookupMetaBlocks "abstract" meta
+ let subtitle' = lookupMetaInlines "subtitle" meta
+ let includeTOC = writerTableOfContents opts || lookupMetaBool "toc" meta
title <- withParaPropM (pStyleM "Title") $ blocksToOpenXML opts [Para tit | not (null tit)]
subtitle <- withParaPropM (pStyleM "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')]
authors <- withParaProp (pCustomStyle "Author") $ blocksToOpenXML opts $
@@ -801,7 +794,9 @@ writeOpenXML opts (Pandoc meta blocks) = do
] ++ annotation
]
comments' <- mapM toComment comments
- toc <- makeTOC opts
+ toc <- if includeTOC
+ then makeTOC opts
+ else return []
let meta' = title ++ subtitle ++ authors ++ date ++ abstract ++ toc
return (meta' ++ doc', notes', comments')
@@ -908,9 +903,10 @@ blockToOpenXML' opts (Para lst)
| null lst && not (isEnabled Ext_empty_paragraphs opts) = return []
| otherwise = do
isFirstPara <- gets stFirstPara
- paraProps <- getParaProps $ case lst of
- [Math DisplayMath _] -> True
- _ -> False
+ let displayMathPara = case lst of
+ [x] -> isDisplayMath x
+ _ -> False
+ paraProps <- getParaProps displayMathPara
bodyTextStyle <- pStyleM "Body Text"
let paraProps' = case paraProps of
[] | isFirstPara -> [mknode "w:pPr" []
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index f1ff8b482..6099f0223 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -74,6 +74,7 @@ import Text.Printf (printf)
import Text.XML.Light (Attr (..), Element (..), Node (..), QName (..),
add_attrs, lookupAttr, node, onlyElems, parseXML,
ppElement, showElement, strContent, unode, unqual)
+import Text.Pandoc.XML (escapeStringForXML)
-- A Chapter includes a list of blocks and maybe a section
-- number offset. Note, some chapters are unnumbered. The section
@@ -446,7 +447,8 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do
cpContent <- lift $ writeHtml
opts'{ writerVariables =
("coverpage","true"):
- ("pagetitle",plainTitle):
+ ("pagetitle",
+ escapeStringForXML plainTitle):
cssvars True ++ vars }
(Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"../media/" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"])
imgContent <- lift $ P.readFileLazy img
@@ -459,7 +461,8 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do
-- title page
tpContent <- lift $ writeHtml opts'{
writerVariables = ("titlepage","true"):
- ("pagetitle",plainTitle):
+ ("body-type", "frontmatter"):
+ ("pagetitle", escapeStringForXML plainTitle):
cssvars True ++ vars }
(Pandoc meta [])
tpEntry <- mkEntry "text/title_page.xhtml" tpContent
@@ -563,13 +566,28 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do
let chapToEntry num (Chapter mbnum bs) =
mkEntry ("text/" ++ showChapter num) =<<
writeHtml opts'{ writerNumberOffset = fromMaybe [] mbnum
- , writerVariables = cssvars True ++ vars }
- (case bs of
- (Header _ _ xs : _) ->
+ , writerVariables = ("body-type", bodyType) :
+ cssvars True ++ vars } pdoc
+ where (pdoc, bodyType) =
+ case bs of
+ (Header _ (_,_,kvs) xs : _) ->
-- remove notes or we get doubled footnotes
- Pandoc (setMeta "title" (walk removeNote $ fromList xs)
- nullMeta) bs
- _ -> Pandoc nullMeta bs)
+ (Pandoc (setMeta "title"
+ (walk removeNote $ fromList xs) nullMeta) bs,
+ case lookup "epub:type" kvs of
+ Nothing -> "bodymatter"
+ Just x
+ | x `elem` frontMatterTypes -> "frontmatter"
+ | x `elem` backMatterTypes -> "backmatter"
+ | otherwise -> "bodymatter")
+ _ -> (Pandoc nullMeta bs, "bodymatter")
+ frontMatterTypes = ["prologue", "abstract", "acknowledgments",
+ "copyright-page", "dedication",
+ "foreword", "halftitle",
+ "introduction", "preface",
+ "seriespage", "titlepage"]
+ backMatterTypes = ["afterword", "appendix", "colophon",
+ "conclusion", "epigraph"]
chapterEntries <- zipWithM chapToEntry [1..] chapters
@@ -754,7 +772,8 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do
(writeHtmlStringForEPUB version
opts{ writerTemplate = Nothing
, writerVariables =
- ("pagetitle",plainTitle):
+ ("pagetitle",
+ escapeStringForXML plainTitle):
writerVariables opts}
(Pandoc nullMeta
[Plain $ walk clean tit])) of
@@ -782,7 +801,7 @@ pandocToEPUB version opts doc@(Pandoc meta _) = do
[ unode "a" ! [("href", "text/cover.xhtml")
,("epub:type", "cover")] $
"Cover"] |
- epubCoverImage metadata /= Nothing
+ isJust (epubCoverImage metadata)
] ++
[ unode "li"
[ unode "a" ! [("href", "#toc")
diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs
index a46011a8f..a139de5cd 100644
--- a/src/Text/Pandoc/Writers/FB2.hs
+++ b/src/Text/Pandoc/Writers/FB2.hs
@@ -119,7 +119,7 @@ description meta' = do
let as = authors meta'
dd <- docdate meta'
annotation <- case lookupMeta "abstract" meta' of
- Just (MetaBlocks bs) -> (list . el "annotation") <$> cMapM blockToXml bs
+ Just (MetaBlocks bs) -> (list . el "annotation") <$> cMapM blockToXml (map unPlain bs)
_ -> pure mempty
let lang = case lookupMeta "lang" meta' of
Just (MetaInlines [Str s]) -> [el "lang" $ iso639 s]
@@ -135,8 +135,9 @@ description meta' = do
Just (MetaString s) -> coverimage s
_ -> return []
return $ el "description"
- [ el "title-info" (genre : (bt ++ annotation ++ as ++ dd ++ lang))
- , el "document-info" (el "program-used" "pandoc" : coverpage)
+ [ el "title-info" (genre :
+ (as ++ bt ++ annotation ++ dd ++ coverpage ++ lang))
+ , el "document-info" [el "program-used" "pandoc"]
]
booktitle :: PandocMonad m => Meta -> FBM m [Content]
@@ -398,6 +399,11 @@ plainToPara (Para inlines : rest) =
Para inlines : HorizontalRule : plainToPara rest -- HorizontalRule will be converted to <empty-line />
plainToPara (p:rest) = p : plainToPara rest
+-- Replace plain text with paragraphs
+unPlain :: Block -> Block
+unPlain (Plain inlines) = Para inlines
+unPlain x = x
+
-- Simulate increased indentation level. Will not really work
-- for multi-line paragraphs.
indentPrefix :: String -> Block -> Block
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index a09ad2fda..46f754226 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -50,13 +50,13 @@ import Prelude
import Control.Monad.State.Strict
import Data.Char (ord, toLower)
import Data.List (intercalate, intersperse, isPrefixOf, partition)
-import Data.Maybe (catMaybes, fromMaybe, isJust, isNothing)
+import Data.Maybe (catMaybes, fromMaybe, isJust, isNothing, mapMaybe)
import qualified Data.Set as Set
import Data.String (fromString)
import Data.Text (Text)
import qualified Data.Text.Lazy as TL
import Network.HTTP (urlEncode)
-import Network.URI (URI (..), parseURIReference, unEscapeString)
+import Network.URI (URI (..), parseURIReference)
import Numeric (showHex)
import Text.Blaze.Internal (customLeaf, customParent, MarkupM(Empty))
#if MIN_VERSION_blaze_markup(0,6,3)
@@ -75,7 +75,7 @@ import Text.Pandoc.Templates
import Text.Pandoc.Walk
import Text.Pandoc.Writers.Math
import Text.Pandoc.Writers.Shared
-import Text.Pandoc.XML (escapeStringForXML, fromEntities)
+import Text.Pandoc.XML (escapeStringForXML, fromEntities, toEntities)
#if MIN_VERSION_blaze_markup(0,6,3)
#else
import Text.Blaze.Internal (preEscapedString, preEscapedText)
@@ -206,7 +206,8 @@ writeHtmlString' :: PandocMonad m
=> WriterState -> WriterOptions -> Pandoc -> m Text
writeHtmlString' st opts d = do
(body, context) <- evalStateT (pandocToHtml opts d) st
- case writerTemplate opts of
+ (if writerPreferAscii opts then toEntities else id) <$>
+ case writerTemplate opts of
Nothing -> return $ renderHtml' body
Just tpl -> do
-- warn if empty lang
@@ -221,16 +222,19 @@ writeHtmlString' st opts d = do
lookup "sourcefile" (writerVariables opts)
report $ NoTitleElement fallback
return $ resetField "pagetitle" fallback context
- renderTemplate' tpl $
- defField "body" (renderHtml' body) context'
+ renderTemplate' tpl
+ (defField "body" (renderHtml' body) context')
writeHtml' :: PandocMonad m => WriterState -> WriterOptions -> Pandoc -> m Html
writeHtml' st opts d =
case writerTemplate opts of
Just _ -> preEscapedText <$> writeHtmlString' st opts d
- Nothing -> do
- (body, _) <- evalStateT (pandocToHtml opts d) st
- return body
+ Nothing
+ | writerPreferAscii opts
+ -> preEscapedText <$> writeHtmlString' st opts d
+ | otherwise -> do
+ (body, _) <- evalStateT (pandocToHtml opts d) st
+ return body
-- result is (title, authors, date, toc, body, new variables)
pandocToHtml :: PandocMonad m
@@ -259,7 +263,7 @@ pandocToHtml opts (Pandoc meta blocks) = do
st <- get
notes <- footnoteSection opts (reverse (stNotes st))
let thebody = blocks' >> notes
- let math = case writerHTMLMathMethod opts of
+ let math = case writerHTMLMathMethod opts of
MathJax url
| slideVariant /= RevealJsSlides ->
-- mathjax is handled via a special plugin in revealjs
@@ -273,10 +277,10 @@ pandocToHtml opts (Pandoc meta blocks) = do
KaTeX url -> do
H.script !
A.src (toValue $ url ++ "katex.min.js") $ mempty
- H.script !
- A.src (toValue $ url ++ "contrib/auto-render.min.js") $ mempty
+ nl opts
H.script
- "document.addEventListener(\"DOMContentLoaded\", function() {\n renderMathInElement(document.body);\n});"
+ "document.addEventListener(\"DOMContentLoaded\", function () {\n var mathElements = document.getElementsByClassName(\"math\");\n for (var i = 0; i < mathElements.length; i++) {\n var texText = mathElements[i].firstChild;\n if (mathElements[i].tagName == \"SPAN\") { katex.render(texText.data, mathElements[i], { displayMode: mathElements[i].classList.contains(\"display\"), throwOnError: false } );\n }}});"
+ nl opts
H.link ! A.rel "stylesheet" !
A.href (toValue $ url ++ "katex.min.css")
@@ -296,10 +300,11 @@ pandocToHtml opts (Pandoc meta blocks) = do
(if stMath st
then defField "math" (renderHtml' math)
else id) $
- defField "mathjax"
- (case writerHTMLMathMethod opts of
- MathJax _ -> True
- _ -> False) $
+ (case writerHTMLMathMethod opts of
+ MathJax u -> defField "mathjax" True .
+ defField "mathjaxurl"
+ (takeWhile (/='?') u)
+ _ -> defField "mathjax" False) $
defField "quotes" (stQuotes st) $
-- for backwards compatibility we populate toc
-- with the contents of the toc, rather than a
@@ -460,7 +465,7 @@ elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elemen
t <- addAttrs opts attr $
secttag header'
return $
- (if slideVariant == RevealJsSlides
+ (if slideVariant == RevealJsSlides && not (null innerContents)
then H5.section
else id) $ mconcat $ t : innerContents
else if writerSectionDivs opts || slide
@@ -576,12 +581,23 @@ toAttrs :: PandocMonad m
=> [(String, String)] -> StateT WriterState m [Attribute]
toAttrs kvs = do
html5 <- gets stHtml5
- return $ map (\(x,y) ->
- customAttribute
- (fromString (if not html5 || x `Set.member` html5Attributes
- || "data-" `isPrefixOf` x
- then x
- else "data-" ++ x)) (toValue y)) kvs
+ mbEpubVersion <- gets stEPUBVersion
+ return $ mapMaybe (\(x,y) ->
+ if html5
+ then
+ if x `Set.member` html5Attributes
+ || ':' `elem` x -- e.g. epub: namespace
+ || "data-" `isPrefixOf` x
+ then Just $ customAttribute (fromString x) (toValue y)
+ else Just $ customAttribute (fromString ("data-" ++ x))
+ (toValue y)
+ else
+ if mbEpubVersion == Just EPUB2 &&
+ not (x `Set.member` html4Attributes ||
+ "xml:" `isPrefixOf` x)
+ then Nothing
+ else Just $ customAttribute (fromString x) (toValue y))
+ kvs
attrsToHtml :: PandocMonad m
=> WriterOptions -> Attr -> StateT WriterState m [Attribute]
@@ -828,9 +844,7 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do
return $ foldl (!) l attribs
blockToHtml opts (DefinitionList lst) = do
contents <- mapM (\(term, defs) ->
- do term' <- if null term
- then return mempty
- else liftM H.dt $ inlineListToHtml opts term
+ do term' <- liftM H.dt $ inlineListToHtml opts term
defs' <- mapM (liftM (\x -> H.dd (x >> nl opts)) .
blockListToHtml opts) defs
return $ mconcat $ nl opts : term' : nl opts :
@@ -1051,8 +1065,8 @@ inlineToHtml opts inline = do
DisplayMath -> "\\[" ++ str ++ "\\]"
KaTeX _ -> return $ H.span ! A.class_ mathClass $ toHtml $
case t of
- InlineMath -> "\\(" ++ str ++ "\\)"
- DisplayMath -> "\\[" ++ str ++ "\\]"
+ InlineMath -> str
+ DisplayMath -> str
PlainMath -> do
x <- lift (texMathToInlines t str) >>= inlineListToHtml opts
let m = H.span ! A.class_ mathClass $ x
@@ -1084,10 +1098,7 @@ inlineToHtml opts inline = do
in '#' : prefix ++ xs
_ -> s
let link = H.a ! A.href (toValue s') $ linkText
- let attr = if txt == [Str (unEscapeString s)]
- then (ident, "uri" : classes, kvs)
- else (ident, classes, kvs)
- link' <- addAttrs opts attr link
+ link' <- addAttrs opts (ident, classes, kvs) link
return $ if null tit
then link'
else link' ! A.title (toValue tit)
@@ -1422,3 +1433,125 @@ html5Attributes = Set.fromList
, "workertype"
, "wrap"
]
+
+html4Attributes :: Set.Set String
+html4Attributes = Set.fromList
+ [ "abbr"
+ , "accept"
+ , "accept-charset"
+ , "accesskey"
+ , "action"
+ , "align"
+ , "alink"
+ , "alt"
+ , "archive"
+ , "axis"
+ , "background"
+ , "bgcolor"
+ , "border"
+ , "cellpadding"
+ , "cellspacing"
+ , "char"
+ , "charoff"
+ , "charset"
+ , "checked"
+ , "cite"
+ , "class"
+ , "classid"
+ , "clear"
+ , "code"
+ , "codebase"
+ , "codetype"
+ , "color"
+ , "cols"
+ , "colspan"
+ , "compact"
+ , "content"
+ , "coords"
+ , "data"
+ , "datetime"
+ , "declare"
+ , "defer"
+ , "dir"
+ , "disabled"
+ , "enctype"
+ , "face"
+ , "for"
+ , "frame"
+ , "frameborder"
+ , "headers"
+ , "height"
+ , "href"
+ , "hreflang"
+ , "hspace"
+ , "http-equiv"
+ , "id"
+ , "ismap"
+ , "label"
+ , "lang"
+ , "language"
+ , "link"
+ , "longdesc"
+ , "marginheight"
+ , "marginwidth"
+ , "maxlength"
+ , "media"
+ , "method"
+ , "multiple"
+ , "name"
+ , "nohref"
+ , "noresize"
+ , "noshade"
+ , "nowrap"
+ , "object"
+ , "onblur"
+ , "onchange"
+ , "onclick"
+ , "ondblclick"
+ , "onfocus"
+ , "onkeydown"
+ , "onkeypress"
+ , "onkeyup"
+ , "onload"
+ , "onmousedown"
+ , "onmousemove"
+ , "onmouseout"
+ , "onmouseover"
+ , "onmouseup"
+ , "onreset"
+ , "onselect"
+ , "onsubmit"
+ , "onunload"
+ , "profile"
+ , "prompt"
+ , "readonly"
+ , "rel"
+ , "rev"
+ , "rows"
+ , "rowspan"
+ , "rules"
+ , "scheme"
+ , "scope"
+ , "scrolling"
+ , "selected"
+ , "shape"
+ , "size"
+ , "span"
+ , "src"
+ , "standby"
+ , "start"
+ , "style"
+ , "summary"
+ , "tabindex"
+ , "target"
+ , "text"
+ , "title"
+ , "usemap"
+ , "valign"
+ , "value"
+ , "valuetype"
+ , "version"
+ , "vlink"
+ , "vspace"
+ , "width"
+ ]
diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs
index 75b8c78dc..80e092b6a 100644
--- a/src/Text/Pandoc/Writers/Haddock.hs
+++ b/src/Text/Pandoc/Writers/Haddock.hs
@@ -45,7 +45,6 @@ import Text.Pandoc.Options
import Text.Pandoc.Pretty
import Text.Pandoc.Shared
import Text.Pandoc.Templates (renderTemplate')
-import Text.Pandoc.Writers.Math (texMathToInlines)
import Text.Pandoc.Writers.Shared
type Notes = [[Block]]
@@ -208,13 +207,13 @@ blockListToHaddock :: PandocMonad m
-> [Block] -- ^ List of block elements
-> StateT WriterState m Doc
blockListToHaddock opts blocks =
- mapM (blockToHaddock opts) blocks >>= return . cat
+ cat <$> mapM (blockToHaddock opts) blocks
-- | Convert list of Pandoc inline elements to haddock.
inlineListToHaddock :: PandocMonad m
=> WriterOptions -> [Inline] -> StateT WriterState m Doc
inlineListToHaddock opts lst =
- mapM (inlineToHaddock opts) lst >>= return . cat
+ cat <$> mapM (inlineToHaddock opts) lst
-- | Convert Pandoc inline element to haddock.
inlineToHaddock :: PandocMonad m
@@ -250,11 +249,10 @@ inlineToHaddock _ (Code _ str) =
return $ "@" <> text (escapeString str) <> "@"
inlineToHaddock _ (Str str) =
return $ text $ escapeString str
-inlineToHaddock opts (Math mt str) = do
- let adjust x = case mt of
- DisplayMath -> cr <> x <> cr
- InlineMath -> x
- adjust <$> (lift (texMathToInlines mt str) >>= inlineListToHaddock opts)
+inlineToHaddock _ (Math mt str) =
+ return $ case mt of
+ DisplayMath -> cr <> "\\[" <> text str <> "\\]" <> cr
+ InlineMath -> "\\(" <> text str <> "\\)"
inlineToHaddock _ il@(RawInline f str)
| f == "haddock" = return $ text str
| otherwise = do
diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs
index 266d58007..ef1e2af0a 100644
--- a/src/Text/Pandoc/Writers/ICML.hs
+++ b/src/Text/Pandoc/Writers/ICML.hs
@@ -149,11 +149,12 @@ writeICML opts (Pandoc meta blocks) = do
$ defField "charStyles" (render' $ charStylesToDoc st)
$ defField "parStyles" (render' $ parStylesToDoc st)
$ defField "hyperlinks" (render' $ hyperlinksToDoc $ links st) metadata
- case writerTemplate opts of
+ (if writerPreferAscii opts then toEntities else id) <$>
+ case writerTemplate opts of
Nothing -> return main
Just tpl -> renderTemplate' tpl context
--- | Auxilary functions for parStylesToDoc and charStylesToDoc.
+-- | Auxiliary functions for parStylesToDoc and charStylesToDoc.
contains :: String -> (String, (String, String)) -> [(String, String)]
contains s rule =
[snd rule | (fst rule) `isInfixOf` s]
diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs
index fb3236bd9..4e78a4cce 100644
--- a/src/Text/Pandoc/Writers/JATS.hs
+++ b/src/Text/Pandoc/Writers/JATS.hs
@@ -102,7 +102,8 @@ docToJATS opts (Pandoc meta blocks) = do
$ defField "mathml" (case writerHTMLMathMethod opts of
MathML -> True
_ -> False) metadata
- case writerTemplate opts of
+ (if writerPreferAscii opts then toEntities else id) <$>
+ case writerTemplate opts of
Nothing -> return main
Just tpl -> renderTemplate' tpl context
@@ -344,7 +345,7 @@ inlineToJATS _ (Str str) = return $ text $ escapeStringForXML str
inlineToJATS opts (Emph lst) =
inTagsSimple "italic" <$> inlinesToJATS opts lst
inlineToJATS opts (Strong lst) =
- inTags False "bold" [("role", "strong")] <$> inlinesToJATS opts lst
+ inTagsSimple "bold" <$> inlinesToJATS opts lst
inlineToJATS opts (Strikeout lst) =
inTagsSimple "strike" <$> inlinesToJATS opts lst
inlineToJATS opts (Superscript lst) =
@@ -352,8 +353,7 @@ inlineToJATS opts (Superscript lst) =
inlineToJATS opts (Subscript lst) =
inTagsSimple "sub" <$> inlinesToJATS opts lst
inlineToJATS opts (SmallCaps lst) =
- inTags False "sc" [("role", "smallcaps")] <$>
- inlinesToJATS opts lst
+ inTagsSimple "sc" <$> inlinesToJATS opts lst
inlineToJATS opts (Quoted SingleQuote lst) = do
contents <- inlinesToJATS opts lst
return $ char '‘' <> contents <> char '’'
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 2904bec06..c1b5d0fa4 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -42,8 +42,9 @@ import Data.Aeson (FromJSON, object, (.=))
import Data.Char (isAlphaNum, isAscii, isDigit, isLetter, isPunctuation, ord,
toLower)
import Data.List (foldl', intercalate, intersperse, isInfixOf, nubBy,
- stripPrefix, (\\))
+ stripPrefix, (\\), uncons)
import Data.Maybe (catMaybes, fromMaybe, isJust, mapMaybe, isNothing)
+import qualified Data.Map as M
import Data.Text (Text)
import qualified Data.Text as T
import Network.URI (unEscapeString)
@@ -63,6 +64,7 @@ import Text.Pandoc.Walk
import Text.Pandoc.Writers.Shared
import qualified Text.Parsec as P
import Text.Printf (printf)
+import qualified Data.Text.Normalize as Normalize
data WriterState =
WriterState { stInNote :: Bool -- true if we're in a note
@@ -176,9 +178,9 @@ pandocToLaTeX options (Pandoc meta blocks) = do
modify $ \s -> s{stCsquotes = True}
let (blocks'', lastHeader) = if writerCiteMethod options == Citeproc then
(blocks', [])
- else case last blocks' of
- Header 1 _ il -> (init blocks', il)
- _ -> (blocks', [])
+ else case reverse blocks' of
+ Header 1 _ il : _ -> (init blocks', il)
+ _ -> (blocks', [])
beamer <- gets stBeamer
blocks''' <- if beamer
then toSlides blocks''
@@ -248,7 +250,8 @@ pandocToLaTeX options (Pandoc meta blocks) = do
defField "biblatex" True
_ -> id) $
defField "colorlinks" (any hasStringValue
- ["citecolor", "urlcolor", "linkcolor", "toccolor"]) $
+ ["citecolor", "urlcolor", "linkcolor", "toccolor",
+ "filecolor"]) $
(if null dirs
then id
else defField "dir" ("ltr" :: String)) $
@@ -317,46 +320,110 @@ data StringContext = TextString
-- escape things as needed for LaTeX
stringToLaTeX :: PandocMonad m => StringContext -> String -> LW m String
-stringToLaTeX _ [] = return ""
-stringToLaTeX ctx (x:xs) = do
+stringToLaTeX context zs = do
opts <- gets stOptions
- rest <- stringToLaTeX ctx xs
- let ligatures = isEnabled Ext_smart opts && ctx == TextString
- let isUrl = ctx == URLString
- return $
+ go opts context $
+ if writerPreferAscii opts
+ then T.unpack $ Normalize.normalize Normalize.NFD $ T.pack zs
+ else zs
+ where
+ go _ _ [] = return ""
+ go opts ctx (x:xs) = do
+ let ligatures = isEnabled Ext_smart opts && ctx == TextString
+ let isUrl = ctx == URLString
+ let mbAccentCmd =
+ if writerPreferAscii opts && ctx == TextString
+ then uncons xs >>= \(c,_) -> M.lookup c accents
+ else Nothing
+ let emits s =
+ case mbAccentCmd of
+ Just cmd -> ((cmd ++ "{" ++ s ++ "}") ++)
+ <$> go opts ctx (drop 1 xs) -- drop combining accent
+ Nothing -> (s++) <$> go opts ctx xs
+ let emitc c =
+ case mbAccentCmd of
+ Just cmd -> ((cmd ++ "{" ++ [c] ++ "}") ++)
+ <$> go opts ctx (drop 1 xs) -- drop combining accent
+ Nothing -> (c:) <$> go opts ctx xs
case x of
- '{' -> "\\{" ++ rest
- '}' -> "\\}" ++ rest
- '`' | ctx == CodeString -> "\\textasciigrave{}" ++ rest
- '$' | not isUrl -> "\\$" ++ rest
- '%' -> "\\%" ++ rest
- '&' -> "\\&" ++ rest
- '_' | not isUrl -> "\\_" ++ rest
- '#' -> "\\#" ++ rest
- '-' | not isUrl -> case xs of
- -- prevent adjacent hyphens from forming ligatures
- ('-':_) -> "-\\/" ++ rest
- _ -> '-' : rest
- '~' | not isUrl -> "\\textasciitilde{}" ++ rest
- '^' -> "\\^{}" ++ rest
- '\\'| isUrl -> '/' : rest -- NB. / works as path sep even on Windows
- | otherwise -> "\\textbackslash{}" ++ rest
- '|' | not isUrl -> "\\textbar{}" ++ rest
- '<' -> "\\textless{}" ++ rest
- '>' -> "\\textgreater{}" ++ rest
- '[' -> "{[}" ++ rest -- to avoid interpretation as
- ']' -> "{]}" ++ rest -- optional arguments
- '\'' | ctx == CodeString -> "\\textquotesingle{}" ++ rest
- '\160' -> "~" ++ rest
- '\x202F' -> "\\," ++ rest
- '\x2026' -> "\\ldots{}" ++ rest
- '\x2018' | ligatures -> "`" ++ rest
- '\x2019' | ligatures -> "'" ++ rest
- '\x201C' | ligatures -> "``" ++ rest
- '\x201D' | ligatures -> "''" ++ rest
- '\x2014' | ligatures -> "---" ++ rest
- '\x2013' | ligatures -> "--" ++ rest
- _ -> x : rest
+ '{' -> emits "\\{"
+ '}' -> emits "\\}"
+ '`' | ctx == CodeString -> emits "\\textasciigrave{}"
+ '$' | not isUrl -> emits "\\$"
+ '%' -> emits "\\%"
+ '&' -> emits "\\&"
+ '_' | not isUrl -> emits "\\_"
+ '#' -> emits "\\#"
+ '-' | not isUrl -> case xs of
+ -- prevent adjacent hyphens from forming ligatures
+ ('-':_) -> emits "-\\/"
+ _ -> emitc '-'
+ '~' | not isUrl -> emits "\\textasciitilde{}"
+ '^' -> emits "\\^{}"
+ '\\'| isUrl -> emitc '/' -- NB. / works as path sep even on Windows
+ | otherwise -> emits "\\textbackslash{}"
+ '|' | not isUrl -> emits "\\textbar{}"
+ '<' -> emits "\\textless{}"
+ '>' -> emits "\\textgreater{}"
+ '[' -> emits "{[}" -- to avoid interpretation as
+ ']' -> emits "{]}" -- optional arguments
+ '\'' | ctx == CodeString -> emits "\\textquotesingle{}"
+ '\160' -> emits "~"
+ '\x202F' -> emits "\\,"
+ '\x2026' -> emits "\\ldots{}"
+ '\x2018' | ligatures -> emits "`"
+ '\x2019' | ligatures -> emits "'"
+ '\x201C' | ligatures -> emits "``"
+ '\x201D' | ligatures -> emits "''"
+ '\x2014' | ligatures -> emits "---"
+ '\x2013' | ligatures -> emits "--"
+ _ | writerPreferAscii opts
+ -> case x of
+ 'ı' -> emits "\\i "
+ 'ȷ' -> emits "\\j "
+ 'å' -> emits "\\aa "
+ 'Å' -> emits "\\AA "
+ 'ß' -> emits "\\ss "
+ 'ø' -> emits "\\o "
+ 'Ø' -> emits "\\O "
+ 'Ł' -> emits "\\L "
+ 'ł' -> emits "\\l "
+ 'æ' -> emits "\\ae "
+ 'Æ' -> emits "\\AE "
+ 'œ' -> emits "\\oe "
+ 'Œ' -> emits "\\OE "
+ '£' -> emits "\\pounds "
+ '€' -> emits "\\euro "
+ '©' -> emits "\\copyright "
+ _ -> emitc x
+ | otherwise -> emitc x
+
+accents :: M.Map Char String
+accents = M.fromList
+ [ ('\779' , "\\H")
+ , ('\768' , "\\`")
+ , ('\769' , "\\'")
+ , ('\770' , "\\^")
+ , ('\771' , "\\~")
+ , ('\776' , "\\\"")
+ , ('\775' , "\\.")
+ , ('\772' , "\\=")
+ , ('\781' , "\\|")
+ , ('\817' , "\\b")
+ , ('\807' , "\\c")
+ , ('\783' , "\\G")
+ , ('\777' , "\\h")
+ , ('\803' , "\\d")
+ , ('\785' , "\\f")
+ , ('\778' , "\\r")
+ , ('\865' , "\\t")
+ , ('\782' , "\\U")
+ , ('\780' , "\\v")
+ , ('\774' , "\\u")
+ , ('\808' , "\\k")
+ , ('\785' , "\\newtie")
+ , ('\8413', "\\textcircled")
+ ]
toLabel :: PandocMonad m => String -> LW m String
toLabel z = go `fmap` stringToLaTeX URLString z
@@ -402,7 +469,8 @@ elementToBeamer slideLevel (Sec lvl _num (ident,classes,kvs) tit elts)
not (null $ query hasCodeBlock elts ++ query hasCode elts)
let frameoptions = ["allowdisplaybreaks", "allowframebreaks", "fragile",
"b", "c", "t", "environment",
- "label", "plain", "shrink", "standout"]
+ "label", "plain", "shrink", "standout",
+ "noframenumbering"]
let optionslist = ["fragile" | fragile && isNothing (lookup "fragile" kvs)] ++
[k | k <- classes, k `elem` frameoptions] ++
[k ++ "=" ++ v | (k,v) <- kvs, k `elem` frameoptions]
@@ -487,7 +555,7 @@ blockToLaTeX (Div (identifier,classes,kvs) bs)
then \contents ->
let fromPct xs =
case reverse xs of
- '%':ds -> '0':'.': reverse ds
+ '%':ds -> showFl (read (reverse ds) / 100 :: Double)
_ -> xs
w = maybe "0.48" fromPct (lookup "width" kvs)
in inCmd "begin" "column" <>
@@ -517,25 +585,15 @@ blockToLaTeX (Plain lst) =
inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst
-- title beginning with fig: indicates that the image is a figure
blockToLaTeX (Para [Image attr@(ident, _, _) txt (src,'f':'i':'g':':':tit)]) = do
- inNote <- gets stInNote
- inMinipage <- gets stInMinipage
- modify $ \st -> st{ stInMinipage = True, stNotes = [] }
- capt <- inlineListToLaTeX txt
- notes <- gets stNotes
- modify $ \st -> st{ stInMinipage = False, stNotes = [] }
-
- -- We can't have footnotes in the list of figures, so remove them:
- captForLof <- if null notes
- then return empty
- else brackets <$> inlineListToLaTeX (walk deNote txt)
- img <- inlineToLaTeX (Image attr txt (src,tit))
- let footnotes = notesToLaTeX notes
+ (capt, captForLof, footnotes) <- getCaption txt
lab <- labelFor ident
let caption = "\\caption" <> captForLof <> braces capt <> lab
+ img <- inlineToLaTeX (Image attr txt (src,tit))
innards <- hypertarget True ident $
"\\centering" $$ img $$ caption <> cr
let figure = cr <> "\\begin{figure}" $$ innards $$ "\\end{figure}"
- return $ if inNote || inMinipage
+ st <- get
+ return $ if stInNote st || stInMinipage st
-- can't have figures in notes or minipage (here, table cell)
-- http://www.tex.ac.uk/FAQ-ouparmd.html
then "\\begin{center}" $$ img $+$ capt $$ "\\end{center}"
@@ -714,11 +772,11 @@ blockToLaTeX (Header level (id',classes,_) lst) = do
modify $ \s -> s{stInHeading = False}
return hdr
blockToLaTeX (Table caption aligns widths heads rows) = do
+ (captionText, captForLof, footnotes) <- getCaption caption
let toHeaders hs = do contents <- tableRowToLaTeX True aligns widths hs
return ("\\toprule" $$ contents $$ "\\midrule")
let removeNote (Note _) = Span ("", [], []) []
removeNote x = x
- captionText <- inlineListToLaTeX caption
firsthead <- if isEmpty captionText || all null heads
then return empty
else ($$ text "\\endfirsthead") <$> toHeaders heads
@@ -730,8 +788,8 @@ blockToLaTeX (Table caption aligns widths heads rows) = do
else walk removeNote heads)
let capt = if isEmpty captionText
then empty
- else text "\\caption" <>
- braces captionText <> "\\tabularnewline"
+ else "\\caption" <> captForLof <> braces captionText
+ <> "\\tabularnewline"
rows' <- mapM (tableRowToLaTeX False aligns widths) rows
let colDescriptors = text $ concatMap toColDescriptor aligns
modify $ \s -> s{ stTable = True }
@@ -745,6 +803,21 @@ blockToLaTeX (Table caption aligns widths heads rows) = do
$$ vcat rows'
$$ "\\bottomrule"
$$ "\\end{longtable}"
+ $$ footnotes
+
+getCaption :: PandocMonad m => [Inline] -> LW m (Doc, Doc, Doc)
+getCaption txt = do
+ inMinipage <- gets stInMinipage
+ modify $ \st -> st{ stInMinipage = True, stNotes = [] }
+ capt <- inlineListToLaTeX txt
+ notes <- gets stNotes
+ modify $ \st -> st{ stInMinipage = inMinipage, stNotes = [] }
+ -- We can't have footnotes in the list of figures/tables, so remove them:
+ captForLof <- if null notes
+ then return empty
+ else brackets <$> inlineListToLaTeX (walk deNote txt)
+ let footnotes = notesToLaTeX notes
+ return (capt, captForLof, footnotes)
toColDescriptor :: Alignment -> String
toColDescriptor align =
@@ -863,9 +936,11 @@ defListItemToLaTeX (term, defs) = do
else term'
def' <- liftM vsep $ mapM blockListToLaTeX defs
return $ case defs of
- ((Header{} : _) : _) ->
+ ((Header{} : _) : _) ->
+ "\\item" <> brackets term'' <> " ~ " $$ def'
+ ((CodeBlock{} : _) : _) -> -- see #4662
"\\item" <> brackets term'' <> " ~ " $$ def'
- _ ->
+ _ ->
"\\item" <> brackets term'' $$ def'
-- | Craft the section header, inserting the secton reference, if supplied.
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
index 912231a88..81fa38bd7 100644
--- a/src/Text/Pandoc/Writers/Man.hs
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -107,7 +107,8 @@ pandocToMan opts (Pandoc meta blocks) = do
$ defField "has-tables" hasTables
$ defField "hyphenate" True
$ defField "pandoc-version" pandocVersion metadata
- case writerTemplate opts of
+ (if writerPreferAscii opts then groffEscape else id) <$>
+ case writerTemplate opts of
Nothing -> return main
Just tpl -> renderTemplate' tpl context
@@ -152,32 +153,6 @@ escapeCode = intercalate "\n" . map escapeLine . lines where
-- line. groff/troff treats the line-ending period differently.
-- See http://code.google.com/p/pandoc/issues/detail?id=148.
--- | Returns the first sentence in a list of inlines, and the rest.
-breakSentence :: [Inline] -> ([Inline], [Inline])
-breakSentence [] = ([],[])
-breakSentence xs =
- let isSentenceEndInline (Str ys@(_:_)) | last ys == '.' = True
- isSentenceEndInline (Str ys@(_:_)) | last ys == '?' = True
- isSentenceEndInline LineBreak = True
- isSentenceEndInline _ = False
- (as, bs) = break isSentenceEndInline xs
- in case bs of
- [] -> (as, [])
- [c] -> (as ++ [c], [])
- (c:Space:cs) -> (as ++ [c], cs)
- (c:SoftBreak:cs) -> (as ++ [c], cs)
- (Str ".":Str (')':ys):cs) -> (as ++ [Str ".", Str (')':ys)], cs)
- (x@(Str ('.':')':_)):cs) -> (as ++ [x], cs)
- (LineBreak:x@(Str ('.':_)):cs) -> (as ++[LineBreak], x:cs)
- (c:cs) -> (as ++ [c] ++ ds, es)
- where (ds, es) = breakSentence cs
-
--- | Split a list of inlines into sentences.
-splitSentences :: [Inline] -> [[Inline]]
-splitSentences xs =
- let (sent, rest) = breakSentence xs
- in if null rest then [sent] else sent : splitSentences rest
-
-- | Convert Pandoc block element to man.
blockToMan :: PandocMonad m
=> WriterOptions -- ^ Options
@@ -325,11 +300,11 @@ blockListToMan :: PandocMonad m
-> [Block] -- ^ List of block elements
-> StateT WriterState m Doc
blockListToMan opts blocks =
- mapM (blockToMan opts) blocks >>= (return . vcat)
+ vcat <$> mapM (blockToMan opts) blocks
-- | Convert list of Pandoc inline elements to man.
inlineListToMan :: PandocMonad m => WriterOptions -> [Inline] -> StateT WriterState m Doc
-inlineListToMan opts lst = mapM (inlineToMan opts) lst >>= (return . hcat)
+inlineListToMan opts lst = hcat <$> mapM (inlineToMan opts) lst
-- | Convert Pandoc inline element to man.
inlineToMan :: PandocMonad m => WriterOptions -> Inline -> StateT WriterState m Doc
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 075858e5e..9a4acb59d 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -38,7 +38,7 @@ module Text.Pandoc.Writers.Markdown (writeMarkdown, writePlain) where
import Prelude
import Control.Monad.Reader
import Control.Monad.State.Strict
-import Data.Char (chr, isPunctuation, isSpace, ord, isAlphaNum)
+import Data.Char (isPunctuation, isSpace, isAlphaNum)
import Data.Default
import qualified Data.HashMap.Strict as H
import Data.List (find, group, intersperse, sortBy, stripPrefix, transpose)
@@ -50,7 +50,7 @@ import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Vector as V
-import Data.Yaml (Value (Array, Bool, Number, Object, String))
+import Data.Aeson (Value (Array, Bool, Number, Object, String))
import Network.HTTP (urlEncode)
import Text.HTML.TagSoup (Tag (..), isTagText, parseTags)
import Text.Pandoc.Class (PandocMonad, report)
@@ -298,7 +298,8 @@ escapeString opts (c:cs) =
'\\':c:escapeString opts cs
'|' | isEnabled Ext_pipe_tables opts -> '\\':'|':escapeString opts cs
'^' | isEnabled Ext_superscript opts -> '\\':'^':escapeString opts cs
- '~' | isEnabled Ext_subscript opts -> '\\':'~':escapeString opts cs
+ '~' | isEnabled Ext_subscript opts ||
+ isEnabled Ext_strikeout opts -> '\\':'~':escapeString opts cs
'$' | isEnabled Ext_tex_math_dollars opts -> '\\':'$':escapeString opts cs
'\'' | isEnabled Ext_smart opts -> '\\':'\'':escapeString opts cs
'"' | isEnabled Ext_smart opts -> '\\':'"':escapeString opts cs
@@ -452,8 +453,14 @@ blockToMarkdown' opts (Plain inlines) = do
| otherwise -> contents
return $ contents' <> cr
-- title beginning with fig: indicates figure
-blockToMarkdown' opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) =
- blockToMarkdown opts (Para [Image attr alt (src,tit)])
+blockToMarkdown' opts (Para [Image attr alt (src,'f':'i':'g':':':tit)])
+ | isEnabled Ext_raw_html opts &&
+ not (isEnabled Ext_link_attributes opts) &&
+ attr /= nullAttr = -- use raw HTML
+ (text . T.unpack . T.strip) <$>
+ writeHtml5String opts{ writerTemplate = Nothing }
+ (Pandoc nullMeta [Para [Image attr alt (src,"fig:" ++ tit)]])
+ | otherwise = blockToMarkdown opts (Para [Image attr alt (src,tit)])
blockToMarkdown' opts (Para inlines) =
(<> blankline) `fmap` blockToMarkdown opts (Plain inlines)
blockToMarkdown' opts (LineBlock lns) =
@@ -619,7 +626,7 @@ blockToMarkdown' opts t@(Table caption aligns widths headers rows) = do
(all null headers) aligns' widths' headers rows
| isEnabled Ext_raw_html opts -> fmap (id,) $
(text . T.unpack) <$>
- (writeHtml5String def $ Pandoc nullMeta [t])
+ (writeHtml5String opts{ writerTemplate = Nothing } $ Pandoc nullMeta [t])
| hasSimpleCells &&
isEnabled Ext_pipe_tables opts -> do
rawHeaders <- padRow <$> mapM (blockListToMarkdown opts) headers
@@ -976,6 +983,11 @@ isRight (Left _) = False
-- | Convert Pandoc inline element to markdown.
inlineToMarkdown :: PandocMonad m => WriterOptions -> Inline -> MD m Doc
+inlineToMarkdown opts (Span ("",["emoji"],kvs) [Str s]) = do
+ case lookup "data-emoji" kvs of
+ Just emojiname | isEnabled Ext_emoji opts ->
+ return $ ":" <> text emojiname <> ":"
+ _ -> inlineToMarkdown opts (Str s)
inlineToMarkdown opts (Span attrs ils) = do
plain <- asks envPlain
contents <- inlineListToMarkdown opts ils
@@ -1172,7 +1184,7 @@ inlineToMarkdown opts lnk@(Link attr txt (src, tit))
not (isEnabled Ext_link_attributes opts) &&
attr /= nullAttr = -- use raw HTML
(text . T.unpack . T.strip) <$>
- writeHtml5String def (Pandoc nullMeta [Plain [lnk]])
+ writeHtml5String opts{ writerTemplate = Nothing } (Pandoc nullMeta [Plain [lnk]])
| otherwise = do
plain <- asks envPlain
linktext <- inlineListToMarkdown opts txt
@@ -1212,7 +1224,7 @@ inlineToMarkdown opts img@(Image attr alternate (source, tit))
not (isEnabled Ext_link_attributes opts) &&
attr /= nullAttr = -- use raw HTML
(text . T.unpack . T.strip) <$>
- writeHtml5String def (Pandoc nullMeta [Plain [img]])
+ writeHtml5String opts{ writerTemplate = Nothing } (Pandoc nullMeta [Plain [img]])
| otherwise = do
plain <- asks envPlain
let txt = if null alternate || alternate == [Str source]
@@ -1237,33 +1249,6 @@ makeMathPlainer = walk go
go (Emph xs) = Span nullAttr xs
go x = x
-toSuperscript :: Char -> Maybe Char
-toSuperscript '1' = Just '\x00B9'
-toSuperscript '2' = Just '\x00B2'
-toSuperscript '3' = Just '\x00B3'
-toSuperscript '+' = Just '\x207A'
-toSuperscript '-' = Just '\x207B'
-toSuperscript '=' = Just '\x207C'
-toSuperscript '(' = Just '\x207D'
-toSuperscript ')' = Just '\x207E'
-toSuperscript c
- | c >= '0' && c <= '9' =
- Just $ chr (0x2070 + (ord c - 48))
- | isSpace c = Just c
- | otherwise = Nothing
-
-toSubscript :: Char -> Maybe Char
-toSubscript '+' = Just '\x208A'
-toSubscript '-' = Just '\x208B'
-toSubscript '=' = Just '\x208C'
-toSubscript '(' = Just '\x208D'
-toSubscript ')' = Just '\x208E'
-toSubscript c
- | c >= '0' && c <= '9' =
- Just $ chr (0x2080 + (ord c - 48))
- | isSpace c = Just c
- | otherwise = Nothing
-
lineBreakToSpace :: Inline -> Inline
lineBreakToSpace LineBreak = Space
lineBreakToSpace SoftBreak = Space
diff --git a/src/Text/Pandoc/Writers/Math.hs b/src/Text/Pandoc/Writers/Math.hs
index 99d17d594..61decf2df 100644
--- a/src/Text/Pandoc/Writers/Math.hs
+++ b/src/Text/Pandoc/Writers/Math.hs
@@ -55,4 +55,4 @@ defaultMathJaxURL :: String
defaultMathJaxURL = "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.2/"
defaultKaTeXURL :: String
-defaultKaTeXURL = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.8.3/"
+defaultKaTeXURL = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.9.0/"
diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs
index df50028a0..666853a3c 100644
--- a/src/Text/Pandoc/Writers/MediaWiki.hs
+++ b/src/Text/Pandoc/Writers/MediaWiki.hs
@@ -313,6 +313,7 @@ tableCellToMediaWiki headless rownum (alignment, width, bs) = do
let sep = case bs of
[Plain _] -> " "
[Para _] -> " "
+ [] -> ""
_ -> "\n"
return $ marker ++ attr ++ sep ++ trimr contents
diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs
index 16a66c85b..9a35a9693 100644
--- a/src/Text/Pandoc/Writers/Ms.hs
+++ b/src/Text/Pandoc/Writers/Ms.hs
@@ -127,7 +127,8 @@ pandocToMs opts (Pandoc meta blocks) = do
$ defField "title-meta" titleMeta
$ defField "author-meta" (intercalate "; " authorsMeta)
$ defField "highlighting-macros" highlightingMacros metadata
- case writerTemplate opts of
+ (if writerPreferAscii opts then groffEscape else id) <$>
+ case writerTemplate opts of
Nothing -> return main
Just tpl -> renderTemplate' tpl context
@@ -188,32 +189,6 @@ escapeCode = intercalate "\n" . map escapeLine . lines
-- line. groff/troff treats the line-ending period differently.
-- See http://code.google.com/p/pandoc/issues/detail?id=148.
--- | Returns the first sentence in a list of inlines, and the rest.
-breakSentence :: [Inline] -> ([Inline], [Inline])
-breakSentence [] = ([],[])
-breakSentence xs =
- let isSentenceEndInline (Str ys@(_:_)) | last ys == '.' = True
- isSentenceEndInline (Str ys@(_:_)) | last ys == '?' = True
- isSentenceEndInline LineBreak = True
- isSentenceEndInline _ = False
- (as, bs) = break isSentenceEndInline xs
- in case bs of
- [] -> (as, [])
- [c] -> (as ++ [c], [])
- (c:Space:cs) -> (as ++ [c], cs)
- (c:SoftBreak:cs) -> (as ++ [c], cs)
- (Str ".":Str (')':ys):cs) -> (as ++ [Str ".", Str (')':ys)], cs)
- (x@(Str ('.':')':_)):cs) -> (as ++ [x], cs)
- (LineBreak:x@(Str ('.':_)):cs) -> (as ++[LineBreak], x:cs)
- (c:cs) -> (as ++ [c] ++ ds, es)
- where (ds, es) = breakSentence cs
-
--- | Split a list of inlines into sentences.
-splitSentences :: [Inline] -> [[Inline]]
-splitSentences xs =
- let (sent, rest) = breakSentence xs
- in if null rest then [sent] else sent : splitSentences rest
-
blockToMs :: PandocMonad m
=> WriterOptions -- ^ Options
-> Block -- ^ Block element
@@ -434,7 +409,7 @@ blockListToMs :: PandocMonad m
-> [Block] -- ^ List of block elements
-> MS m Doc
blockListToMs opts blocks =
- mapM (blockToMs opts) blocks >>= (return . vcat)
+ vcat <$> mapM (blockToMs opts) blocks
-- | Convert list of Pandoc inline elements to ms.
inlineListToMs :: PandocMonad m => WriterOptions -> [Inline] -> MS m Doc
diff --git a/src/Text/Pandoc/Writers/Muse.hs b/src/Text/Pandoc/Writers/Muse.hs
index 3681fcc0d..18aebc364 100644
--- a/src/Text/Pandoc/Writers/Muse.hs
+++ b/src/Text/Pandoc/Writers/Muse.hs
@@ -46,7 +46,7 @@ module Text.Pandoc.Writers.Muse (writeMuse) where
import Prelude
import Control.Monad.Reader
import Control.Monad.State.Strict
-import Data.Char (isSpace, isDigit, isAsciiUpper, isAsciiLower)
+import Data.Char (isSpace, isAlphaNum, isDigit, isAsciiUpper, isAsciiLower)
import Data.Default
import Data.Text (Text)
import Data.List (intersperse, transpose, isInfixOf)
@@ -70,20 +70,24 @@ data WriterEnv =
WriterEnv { envOptions :: WriterOptions
, envTopLevel :: Bool
, envInsideBlock :: Bool
- , envInlineStart :: Bool
+ , envInlineStart :: Bool -- ^ True if there is only whitespace since last newline
, envInsideLinkDescription :: Bool -- ^ Escape ] if True
- , envAfterSpace :: Bool
+ , envAfterSpace :: Bool -- ^ There is whitespace (not just newline) before
, envOneLine :: Bool -- ^ True if newlines are not allowed
+ , envInsideAsterisks :: Bool -- ^ True if outer element is emphasis with asterisks
+ , envNearAsterisks :: Bool -- ^ Rendering inline near asterisks
}
data WriterState =
WriterState { stNotes :: Notes
, stIds :: Set.Set String
+ , stUseTags :: Bool -- ^ Use tags for emphasis, for example because previous character is a letter
}
instance Default WriterState
where def = WriterState { stNotes = []
, stIds = Set.empty
+ , stUseTags = False
}
evalMuse :: PandocMonad m => Muse m a -> WriterEnv -> WriterState -> m a
@@ -103,6 +107,8 @@ writeMuse opts document =
, envInsideLinkDescription = False
, envAfterSpace = False
, envOneLine = False
+ , envInsideAsterisks = False
+ , envNearAsterisks = False
}
-- | Return Muse representation of document.
@@ -212,6 +218,7 @@ blockToMuse (BulletList items) = do
=> [Block]
-> Muse m Doc
bulletListItemToMuse item = do
+ modify $ \st -> st { stUseTags = False }
contents <- blockListToMuse item
return $ hang 2 "- " contents
blockToMuse (DefinitionList items) = do
@@ -223,16 +230,18 @@ blockToMuse (DefinitionList items) = do
=> ([Inline], [[Block]])
-> Muse m Doc
definitionListItemToMuse (label, defs) = do
+ modify $ \st -> st { stUseTags = False }
label' <- local (\env -> env { envOneLine = True, envAfterSpace = True }) $ inlineListToMuse' label
contents <- vcat <$> mapM descriptionToMuse defs
let ind = offset label'
- return $ hang ind label' contents
+ return $ hang ind (nowrap label') contents
descriptionToMuse :: PandocMonad m
=> [Block]
-> Muse m Doc
descriptionToMuse desc = hang 4 " :: " <$> blockListToMuse desc
blockToMuse (Header level (ident,_,_) inlines) = do
opts <- asks envOptions
+ topLevel <- asks envTopLevel
contents <- local (\env -> env { envOneLine = True }) $ inlineListToMuse' inlines
ids <- gets stIds
let autoId = uniqueIdent inlines ids
@@ -241,8 +250,8 @@ blockToMuse (Header level (ident,_,_) inlines) = do
let attr' = if null ident || (isEnabled Ext_auto_identifiers opts && ident == autoId)
then empty
else "#" <> text ident <> cr
- let header' = text $ replicate level '*'
- return $ blankline <> attr' $$ nowrap (header' <> space <> contents) <> blankline
+ let header' = if topLevel then (text $ replicate level '*') <> space else mempty
+ return $ blankline <> attr' $$ nowrap (header' <> contents) <> blankline
-- https://www.gnu.org/software/emacs-muse/manual/muse.html#Horizontal-Rules-and-Anchors
blockToMuse HorizontalRule = return $ blankline $$ "----" $$ blankline
blockToMuse (Table caption _ _ headers rows) = do
@@ -284,7 +293,11 @@ noteToMuse :: PandocMonad m
-> [Block]
-> Muse m Doc
noteToMuse num note =
- hang (length marker) (text marker) <$> blockListToMuse note
+ hang (length marker) (text marker) <$>
+ (local (\env -> env { envInsideBlock = True
+ , envInlineStart = True
+ , envAfterSpace = True
+ }) $ blockListToMuse note)
where
marker = "[" ++ show num ++ "] "
@@ -295,6 +308,12 @@ escapeString s =
substitute "</verbatim>" "<</verbatim><verbatim>/verbatim>" s ++
"</verbatim>"
+-- | Replace newlines with spaces
+replaceNewlines :: String -> String
+replaceNewlines ('\n':xs) = ' ':replaceNewlines xs
+replaceNewlines (x:xs) = x:replaceNewlines xs
+replaceNewlines [] = []
+
startsWithMarker :: (Char -> Bool) -> String -> Bool
startsWithMarker f (' ':xs) = startsWithMarker f xs
startsWithMarker f (x:xs) =
@@ -321,16 +340,28 @@ containsFootnotes = p
s (_:xs) = p xs
s [] = False
-conditionalEscapeString :: Bool -> String -> String
-conditionalEscapeString isInsideLinkDescription s =
- if any (`elem` ("#*<=|" :: String)) s ||
- "::" `isInfixOf` s ||
- "~~" `isInfixOf` s ||
- "[[" `isInfixOf` s ||
- ("]" `isInfixOf` s && isInsideLinkDescription) ||
- containsFootnotes s
- then escapeString s
- else s
+-- | Return True if string should be escaped with <verbatim> tags
+shouldEscapeString :: PandocMonad m
+ => String
+ -> Muse m Bool
+shouldEscapeString s = do
+ insideLink <- asks envInsideLinkDescription
+ return $ null s ||
+ any (`elem` ("#*<=|" :: String)) s ||
+ "::" `isInfixOf` s ||
+ "~~" `isInfixOf` s ||
+ "[[" `isInfixOf` s ||
+ ("]" `isInfixOf` s && insideLink) ||
+ containsFootnotes s
+
+conditionalEscapeString :: PandocMonad m
+ => String
+ -> Muse m String
+conditionalEscapeString s = do
+ shouldEscape <- shouldEscapeString s
+ return $ if shouldEscape
+ then escapeString s
+ else s
-- Expand Math and Cite before normalizing inline list
preprocessInlineList :: PandocMonad m
@@ -389,6 +420,19 @@ fixNotes (Space : n@Note{} : rest) = Str " " : n : fixNotes rest
fixNotes (SoftBreak : n@Note{} : rest) = Str " " : n : fixNotes rest
fixNotes (x:xs) = x : fixNotes xs
+startsWithSpace :: [Inline] -> Bool
+startsWithSpace (Space:_) = True
+startsWithSpace (SoftBreak:_) = True
+startsWithSpace (Str s:_) = stringStartsWithSpace s
+startsWithSpace _ = False
+
+endsWithSpace :: [Inline] -> Bool
+endsWithSpace [Space] = True
+endsWithSpace [SoftBreak] = True
+endsWithSpace [Str s] = stringStartsWithSpace $ reverse s
+endsWithSpace (_:xs) = endsWithSpace xs
+endsWithSpace [] = False
+
urlEscapeBrackets :: String -> String
urlEscapeBrackets (']':xs) = '%':'5':'D':urlEscapeBrackets xs
urlEscapeBrackets (x:xs) = x:urlEscapeBrackets xs
@@ -397,22 +441,33 @@ urlEscapeBrackets [] = []
isHorizontalRule :: String -> Bool
isHorizontalRule s = length s >= 4 && all (== '-') s
-startsWithSpace :: String -> Bool
-startsWithSpace (x:_) = isSpace x
-startsWithSpace [] = False
+stringStartsWithSpace :: String -> Bool
+stringStartsWithSpace (x:_) = isSpace x
+stringStartsWithSpace "" = False
fixOrEscape :: Bool -> Inline -> Bool
fixOrEscape sp (Str "-") = sp
-fixOrEscape sp (Str ";") = not sp
-fixOrEscape _ (Str ">") = True
+fixOrEscape sp (Str s@('-':x:_)) = (sp && isSpace x) || isHorizontalRule s
+fixOrEscape sp (Str (";")) = not sp
+fixOrEscape sp (Str (';':x:_)) = not sp && isSpace x
+fixOrEscape _ (Str (">")) = True
+fixOrEscape _ (Str ('>':x:_)) = isSpace x
fixOrEscape sp (Str s) = (sp && (startsWithMarker isDigit s ||
startsWithMarker isAsciiLower s ||
startsWithMarker isAsciiUpper s))
- || isHorizontalRule s || startsWithSpace s
+ || stringStartsWithSpace s
fixOrEscape _ Space = True
fixOrEscape _ SoftBreak = True
fixOrEscape _ _ = False
+inlineListStartsWithAlnum :: PandocMonad m
+ => [Inline]
+ -> Muse m Bool
+inlineListStartsWithAlnum (Str s:_) = do
+ esc <- shouldEscapeString s
+ return $ esc || isAlphaNum (head s)
+inlineListStartsWithAlnum _ = return False
+
-- | Convert list of Pandoc inline elements to Muse
renderInlineList :: PandocMonad m
=> [Inline]
@@ -424,86 +479,159 @@ renderInlineList (x:xs) = do
start <- asks envInlineStart
afterSpace <- asks envAfterSpace
topLevel <- asks envTopLevel
- r <- inlineToMuse x
+ insideAsterisks <- asks envInsideAsterisks
+ nearAsterisks <- asks envNearAsterisks
+ useTags <- gets stUseTags
+ alnumNext <- inlineListStartsWithAlnum xs
+ let newUseTags = useTags || alnumNext
+ modify $ \st -> st { stUseTags = newUseTags }
+
+ r <- local (\env -> env { envInlineStart = False
+ , envInsideAsterisks = False
+ , envNearAsterisks = nearAsterisks || (null xs && insideAsterisks)
+ }) $ inlineToMuse x
opts <- asks envOptions
let isNewline = (x == SoftBreak && writerWrapText opts == WrapPreserve) || x == LineBreak
lst' <- local (\env -> env { envInlineStart = isNewline
, envAfterSpace = x == Space || (not topLevel && isNewline)
+ , envNearAsterisks = False
}) $ renderInlineList xs
if start && fixOrEscape afterSpace x
then pure (text "<verbatim></verbatim>" <> r <> lst')
else pure (r <> lst')
-- | Normalize and convert list of Pandoc inline elements to Muse.
-inlineListToMuse'' :: PandocMonad m
- => Bool
- -> [Inline]
- -> Muse m Doc
-inlineListToMuse'' start lst = do
- lst' <- (normalizeInlineList . fixNotes) <$> preprocessInlineList (map (removeKeyValues . replaceSmallCaps) lst)
- topLevel <- asks envTopLevel
- afterSpace <- asks envAfterSpace
- local (\env -> env { envInlineStart = start
- , envAfterSpace = afterSpace || (start && not topLevel)
- }) $ renderInlineList lst'
+inlineListToMuse :: PandocMonad m
+ => [Inline]
+ -> Muse m Doc
+inlineListToMuse lst = do
+ lst' <- normalizeInlineList . fixNotes <$> preprocessInlineList (map (removeKeyValues . replaceSmallCaps) lst)
+ insideAsterisks <- asks envInsideAsterisks
+ modify $ \st -> st { stUseTags = False } -- Previous character is likely a '>' or some other markup
+ local (\env -> env { envNearAsterisks = insideAsterisks }) $ renderInlineList lst'
inlineListToMuse' :: PandocMonad m => [Inline] -> Muse m Doc
-inlineListToMuse' = inlineListToMuse'' True
-
-inlineListToMuse :: PandocMonad m => [Inline] -> Muse m Doc
-inlineListToMuse = inlineListToMuse'' False
+inlineListToMuse' lst = do
+ topLevel <- asks envTopLevel
+ afterSpace <- asks envAfterSpace
+ local (\env -> env { envInlineStart = True
+ , envAfterSpace = afterSpace || not topLevel
+ }) $ inlineListToMuse lst
-- | Convert Pandoc inline element to Muse.
inlineToMuse :: PandocMonad m
=> Inline
-> Muse m Doc
inlineToMuse (Str str) = do
- insideLink <- asks envInsideLinkDescription
- return $ text $ conditionalEscapeString insideLink str
+ escapedStr <- conditionalEscapeString $ replaceNewlines str
+ let useTags = isAlphaNum $ last escapedStr -- escapedStr is never empty because empty strings are escaped
+ modify $ \st -> st { stUseTags = useTags }
+ return $ text escapedStr
+inlineToMuse (Emph [Strong lst]) = do
+ useTags <- gets stUseTags
+ let lst' = normalizeInlineList lst
+ if useTags
+ then do contents <- local (\env -> env { envInsideAsterisks = True }) $ inlineListToMuse lst'
+ modify $ \st -> st { stUseTags = False }
+ return $ "<em>**" <> contents <> "**</em>"
+ else if null lst' || startsWithSpace lst' || endsWithSpace lst'
+ then do
+ contents <- local (\env -> env { envInsideAsterisks = False }) $ inlineListToMuse lst'
+ modify $ \st -> st { stUseTags = True }
+ return $ "*<strong>" <> contents <> "</strong>*"
+ else do
+ contents <- local (\env -> env { envInsideAsterisks = True }) $ inlineListToMuse lst'
+ modify $ \st -> st { stUseTags = True }
+ return $ "***" <> contents <> "***"
inlineToMuse (Emph lst) = do
- contents <- inlineListToMuse lst
- return $ "<em>" <> contents <> "</em>"
+ useTags <- gets stUseTags
+ let lst' = normalizeInlineList lst
+ if useTags || null lst' || startsWithSpace lst' || endsWithSpace lst'
+ then do contents <- inlineListToMuse lst'
+ return $ "<em>" <> contents <> "</em>"
+ else do contents <- local (\env -> env { envInsideAsterisks = True }) $ inlineListToMuse lst'
+ modify $ \st -> st { stUseTags = True }
+ return $ "*" <> contents <> "*"
+inlineToMuse (Strong [Emph lst]) = do
+ useTags <- gets stUseTags
+ let lst' = normalizeInlineList lst
+ if useTags
+ then do contents <- local (\env -> env { envInsideAsterisks = True }) $ inlineListToMuse lst'
+ modify $ \st -> st { stUseTags = False }
+ return $ "<strong>*" <> contents <> "*</strong>"
+ else if null lst' || startsWithSpace lst' || endsWithSpace lst'
+ then do
+ contents <- local (\env -> env { envInsideAsterisks = False }) $ inlineListToMuse lst'
+ modify $ \st -> st { stUseTags = True }
+ return $ "**<em>" <> contents <> "</em>**"
+ else do
+ contents <- local (\env -> env { envInsideAsterisks = True }) $ inlineListToMuse lst'
+ modify $ \st -> st { stUseTags = True }
+ return $ "***" <> contents <> "***"
inlineToMuse (Strong lst) = do
- contents <- inlineListToMuse lst
- return $ "<strong>" <> contents <> "</strong>"
+ useTags <- gets stUseTags
+ let lst' = normalizeInlineList lst
+ if useTags || null lst' || startsWithSpace lst' || endsWithSpace lst'
+ then do contents <- inlineListToMuse lst'
+ modify $ \st -> st { stUseTags = False }
+ return $ "<strong>" <> contents <> "</strong>"
+ else do contents <- local (\env -> env { envInsideAsterisks = True }) $ inlineListToMuse lst'
+ modify $ \st -> st { stUseTags = True }
+ return $ "**" <> contents <> "**"
inlineToMuse (Strikeout lst) = do
contents <- inlineListToMuse lst
+ modify $ \st -> st { stUseTags = False }
return $ "<del>" <> contents <> "</del>"
inlineToMuse (Superscript lst) = do
contents <- inlineListToMuse lst
+ modify $ \st -> st { stUseTags = False }
return $ "<sup>" <> contents <> "</sup>"
inlineToMuse (Subscript lst) = do
contents <- inlineListToMuse lst
+ modify $ \st -> st { stUseTags = False }
return $ "<sub>" <> contents <> "</sub>"
inlineToMuse SmallCaps {} =
fail "SmallCaps should be expanded before normalization"
inlineToMuse (Quoted SingleQuote lst) = do
contents <- inlineListToMuse lst
+ modify $ \st -> st { stUseTags = False }
return $ "‘" <> contents <> "’"
inlineToMuse (Quoted DoubleQuote lst) = do
contents <- inlineListToMuse lst
+ modify $ \st -> st { stUseTags = False }
return $ "“" <> contents <> "”"
inlineToMuse Cite {} =
fail "Citations should be expanded before normalization"
-inlineToMuse (Code _ str) = return $
- "<code>" <> text (substitute "</code>" "<</code><code>/code>" str) <> "</code>"
+inlineToMuse (Code _ str) = do
+ useTags <- gets stUseTags
+ modify $ \st -> st { stUseTags = False }
+ return $ if useTags || null str || '=' `elem` str || isSpace (head str) || isSpace (last str)
+ then "<code>" <> text (substitute "</code>" "<</code><code>/code>" str) <> "</code>"
+ else "=" <> text str <> "="
inlineToMuse Math{} =
fail "Math should be expanded before normalization"
-inlineToMuse (RawInline (Format f) str) =
+inlineToMuse (RawInline (Format f) str) = do
+ modify $ \st -> st { stUseTags = False }
return $ "<literal style=\"" <> text f <> "\">" <> text str <> "</literal>"
inlineToMuse LineBreak = do
oneline <- asks envOneLine
+ modify $ \st -> st { stUseTags = False }
return $ if oneline then "<br>" else "<br>" <> cr
-inlineToMuse Space = return space
+inlineToMuse Space = do
+ modify $ \st -> st { stUseTags = False }
+ return space
inlineToMuse SoftBreak = do
oneline <- asks envOneLine
wrapText <- asks $ writerWrapText . envOptions
+ modify $ \st -> st { stUseTags = False }
return $ if not oneline && wrapText == WrapPreserve then cr else space
inlineToMuse (Link _ txt (src, _)) =
case txt of
- [Str x] | escapeURI x == src ->
+ [Str x] | escapeURI x == src -> do
+ modify $ \st -> st { stUseTags = False }
return $ "[[" <> text (escapeLink x) <> "]]"
_ -> do contents <- local (\env -> env { envInsideLinkDescription = True }) $ inlineListToMuse txt
+ modify $ \st -> st { stUseTags = False }
return $ "[[" <> text (escapeLink src) <> "][" <> contents <> "]]"
where escapeLink lnk = if isImageUrl lnk then "URL:" ++ urlEscapeBrackets lnk else urlEscapeBrackets lnk
-- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el
@@ -514,11 +642,12 @@ inlineToMuse (Image attr alt (source,'f':'i':'g':':':title)) =
inlineToMuse (Image attr@(_, classes, _) inlines (source, title)) = do
opts <- asks envOptions
alt <- local (\env -> env { envInsideLinkDescription = True }) $ inlineListToMuse inlines
- let title' = if null title
- then if null inlines
- then ""
- else "[" <> alt <> "]"
- else "[" <> text (conditionalEscapeString True title) <> "]"
+ title' <- if null title
+ then if null inlines
+ then return ""
+ else return $ "[" <> alt <> "]"
+ else do s <- local (\env -> env { envInsideLinkDescription = True }) $ conditionalEscapeString title
+ return $ "[" <> text s <> "]"
let width = case dimension Width attr of
Just (Percent x) | isEnabled Ext_amuse opts -> " " ++ show (round x :: Integer)
_ -> ""
@@ -528,11 +657,14 @@ inlineToMuse (Image attr@(_, classes, _) inlines (source, title)) = do
let rightalign = if "align-right" `elem` classes
then " r"
else ""
+ modify $ \st -> st { stUseTags = False }
return $ "[[" <> text (urlEscapeBrackets source ++ width ++ leftalign ++ rightalign) <> "]" <> title' <> "]"
inlineToMuse (Note contents) = do
-- add to notes in state
notes <- gets stNotes
- modify $ \st -> st { stNotes = contents:notes }
+ modify $ \st -> st { stNotes = contents:notes
+ , stUseTags = False
+ }
let ref = show $ length notes + 1
return $ "[" <> text ref <> "]"
inlineToMuse (Span (anchor,names,_) inlines) = do
@@ -540,6 +672,7 @@ inlineToMuse (Span (anchor,names,_) inlines) = do
let anchorDoc = if null anchor
then mempty
else text ('#':anchor) <> space
+ modify $ \st -> st { stUseTags = False }
return $ anchorDoc <> (if null inlines && not (null anchor)
then mempty
else (if null names
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index 7aecb3da5..1c9481630 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -189,8 +189,8 @@ transformPicMath opts (Image attr@(id', cls, _) lab (src,t)) = catchError
let dims =
case (getDim Width, getDim Height) of
(Just w, Just h) -> [("width", show w), ("height", show h)]
- (Just w@(Percent p), Nothing) -> [("width", show w), ("height", show (p / ratio) ++ "%")]
- (Nothing, Just h@(Percent p)) -> [("width", show (p * ratio) ++ "%"), ("height", show h)]
+ (Just w@(Percent _), Nothing) -> [("rel-width", show w),("rel-height", "scale"),("width", show ptX ++ "pt"),("height", show ptY ++ "pt")]
+ (Nothing, Just h@(Percent _)) -> [("rel-width", "scale"),("rel-height", show h),("width", show ptX ++ "pt"),("height", show ptY ++ "pt")]
(Just w@(Inch i), Nothing) -> [("width", show w), ("height", show (i / ratio) ++ "in")]
(Nothing, Just h@(Inch i)) -> [("width", show (i * ratio) ++ "in"), ("height", show h)]
_ -> [("width", show ptX ++ "pt"), ("height", show ptY ++ "pt")]
diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs
index 6c48046a2..716c5cbad 100644
--- a/src/Text/Pandoc/Writers/OPML.hs
+++ b/src/Text/Pandoc/Writers/OPML.hs
@@ -62,7 +62,8 @@ writeOPML opts (Pandoc meta blocks) = do
meta'
main <- (render colwidth . vcat) <$> mapM (elementToOPML opts) elements
let context = defField "body" main metadata
- case writerTemplate opts of
+ (if writerPreferAscii opts then toEntities else id) <$>
+ case writerTemplate opts of
Nothing -> return main
Just tpl -> renderTemplate' tpl context
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index 514327e9a..d9f0a8e44 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -39,17 +39,20 @@ import Control.Monad.State.Strict hiding (when)
import Data.Char (chr)
import Data.List (sortBy)
import qualified Data.Map as Map
+import Data.Maybe (fromMaybe)
import Data.Ord (comparing)
import qualified Data.Set as Set
import Data.Text (Text)
import Text.Pandoc.BCP47 (Lang (..), parseBCP47)
-import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Class (PandocMonad, report, translateTerm,
+ setTranslations, toLang)
import Text.Pandoc.Definition
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Pretty
import Text.Pandoc.Shared (linesToPara)
import Text.Pandoc.Templates (renderTemplate')
+import qualified Text.Pandoc.Translations as Term (Term(Figure, Table))
import Text.Pandoc.Writers.Math
import Text.Pandoc.Writers.Shared
import Text.Pandoc.XML
@@ -67,32 +70,36 @@ plainToPara x = x
type OD m = StateT WriterState m
data WriterState =
- WriterState { stNotes :: [Doc]
- , stTableStyles :: [Doc]
- , stParaStyles :: [Doc]
- , stListStyles :: [(Int, [Doc])]
- , stTextStyles :: Map.Map (Set.Set TextStyle) (String, Doc)
- , stTextStyleAttr :: Set.Set TextStyle
- , stIndentPara :: Int
- , stInDefinition :: Bool
- , stTight :: Bool
- , stFirstPara :: Bool
- , stImageId :: Int
+ WriterState { stNotes :: [Doc]
+ , stTableStyles :: [Doc]
+ , stParaStyles :: [Doc]
+ , stListStyles :: [(Int, [Doc])]
+ , stTextStyles :: Map.Map (Set.Set TextStyle) (String, Doc)
+ , stTextStyleAttr :: Set.Set TextStyle
+ , stIndentPara :: Int
+ , stInDefinition :: Bool
+ , stTight :: Bool
+ , stFirstPara :: Bool
+ , stImageId :: Int
+ , stTableCaptionId :: Int
+ , stImageCaptionId :: Int
}
defaultWriterState :: WriterState
defaultWriterState =
- WriterState { stNotes = []
- , stTableStyles = []
- , stParaStyles = []
- , stListStyles = []
- , stTextStyles = Map.empty
- , stTextStyleAttr = Set.empty
- , stIndentPara = 0
- , stInDefinition = False
- , stTight = False
- , stFirstPara = False
- , stImageId = 1
+ WriterState { stNotes = []
+ , stTableStyles = []
+ , stParaStyles = []
+ , stListStyles = []
+ , stTextStyles = Map.empty
+ , stTextStyleAttr = Set.empty
+ , stIndentPara = 0
+ , stInDefinition = False
+ , stTight = False
+ , stFirstPara = False
+ , stImageId = 1
+ , stTableCaptionId = 1
+ , stImageCaptionId = 1
}
when :: Bool -> Doc -> Doc
@@ -193,10 +200,15 @@ formulaStyle mt = inTags False "style:style"
,("style:horizontal-rel", "paragraph-content")
,("style:wrap", "none")]
-inHeaderTags :: PandocMonad m => Int -> Doc -> OD m Doc
-inHeaderTags i d =
+inHeaderTags :: PandocMonad m => Int -> String -> Doc -> OD m Doc
+inHeaderTags i ident d =
return $ inTags False "text:h" [ ("text:style-name", "Heading_20_" ++ show i)
- , ("text:outline-level", show i)] d
+ , ("text:outline-level", show i)]
+ $ if null ident
+ then d
+ else selfClosingTag "text:bookmark-start" [ ("text:name", ident) ]
+ <> d <>
+ selfClosingTag "text:bookmark-end" [ ("text:name", ident) ]
inQuotes :: QuoteType -> Doc -> Doc
inQuotes SingleQuote s = char '\8216' <> s <> char '\8217'
@@ -218,6 +230,11 @@ handleSpaces s
-- | Convert Pandoc document to string in OpenDocument format.
writeOpenDocument :: PandocMonad m => WriterOptions -> Pandoc -> m Text
writeOpenDocument opts (Pandoc meta blocks) = do
+ let defLang = Lang "en" "US" "" []
+ lang <- case lookupMetaString "lang" meta of
+ "" -> pure defLang
+ s -> fromMaybe defLang <$> toLang (Just s)
+ setTranslations lang
let colwidth = if writerWrapText opts == WrapAuto
then Just $ writerColumns opts
else Nothing
@@ -349,8 +366,9 @@ blockToOpenDocument o bs
| LineBlock b <- bs = blockToOpenDocument o $ linesToPara b
| Div attr xs <- bs = withLangFromAttr attr
(blocksToOpenDocument o xs)
- | Header i _ b <- bs = setFirstPara >>
- (inHeaderTags i =<< inlinesToOpenDocument o b)
+ | Header i (ident,_,_) b
+ <- bs = setFirstPara >> (inHeaderTags i ident
+ =<< inlinesToOpenDocument o b)
| BlockQuote b <- bs = setFirstPara >> mkBlockQuote b
| DefinitionList b <- bs = setFirstPara >> defList b
| BulletList b <- bs = setFirstPara >> bulletListToOpenDocument o b
@@ -394,11 +412,11 @@ blockToOpenDocument o bs
mapM_ addParaStyle . newPara $ paraHStyles ++ paraStyles
captionDoc <- if null c
then return empty
- else withParagraphStyle o "Table" [Para c]
+ else inlinesToOpenDocument o c >>= numberedTableCaption
th <- if all null h
then return empty
- else colHeadsToOpenDocument o name (map fst paraHStyles) h
- tr <- mapM (tableRowToOpenDocument o name (map fst paraStyles)) r
+ else colHeadsToOpenDocument o (map fst paraHStyles) h
+ tr <- mapM (tableRowToOpenDocument o (map fst paraStyles)) r
return $ inTags True "table:table" [ ("table:name" , name)
, ("table:style-name", name)
] (vcat columns $$ th $$ vcat tr) $$ captionDoc
@@ -406,28 +424,54 @@ blockToOpenDocument o bs
withParagraphStyle o "Figure" [Para [Image attr caption (source,title)]]
| otherwise = do
imageDoc <- withParagraphStyle o "FigureWithCaption" [Para [Image attr caption (source,title)]]
- captionDoc <- withParagraphStyle o "FigureCaption" [Para caption]
+ captionDoc <- inlinesToOpenDocument o caption >>= numberedFigureCaption
return $ imageDoc $$ captionDoc
+
+numberedTableCaption :: PandocMonad m => Doc -> OD m Doc
+numberedTableCaption caption = do
+ id' <- gets stTableCaptionId
+ modify (\st -> st{ stTableCaptionId = id' + 1 })
+ capterm <- translateTerm Term.Table
+ return $ numberedCaption "Table" capterm "Table" id' caption
+
+numberedFigureCaption :: PandocMonad m => Doc -> OD m Doc
+numberedFigureCaption caption = do
+ id' <- gets stImageCaptionId
+ modify (\st -> st{ stImageCaptionId = id' + 1 })
+ capterm <- translateTerm Term.Figure
+ return $ numberedCaption "FigureCaption" capterm "Illustration" id' caption
+
+numberedCaption :: String -> String -> String -> Int -> Doc -> Doc
+numberedCaption style term name num caption =
+ let t = text term
+ r = num - 1
+ s = inTags False "text:sequence" [ ("text:ref-name", "ref" ++ name ++ show r),
+ ("text:name", name),
+ ("text:formula", "ooow:" ++ name ++ "+1"),
+ ("style:num-format", "1") ] $ text $ show num
+ c = text ": "
+ in inParagraphTagsWithStyle style $ hcat [ t, text " ", s, c, caption ]
+
colHeadsToOpenDocument :: PandocMonad m
- => WriterOptions -> String -> [String] -> [[Block]]
+ => WriterOptions -> [String] -> [[Block]]
-> OD m Doc
-colHeadsToOpenDocument o tn ns hs =
+colHeadsToOpenDocument o ns hs =
inTagsIndented "table:table-header-rows" . inTagsIndented "table:table-row" . vcat <$>
- mapM (tableItemToOpenDocument o tn) (zip ns hs)
+ mapM (tableItemToOpenDocument o "TableHeaderRowCell") (zip ns hs)
tableRowToOpenDocument :: PandocMonad m
- => WriterOptions -> String -> [String] -> [[Block]]
+ => WriterOptions -> [String] -> [[Block]]
-> OD m Doc
-tableRowToOpenDocument o tn ns cs =
+tableRowToOpenDocument o ns cs =
inTagsIndented "table:table-row" . vcat <$>
- mapM (tableItemToOpenDocument o tn) (zip ns cs)
+ mapM (tableItemToOpenDocument o "TableRowCell") (zip ns cs)
tableItemToOpenDocument :: PandocMonad m
=> WriterOptions -> String -> (String,[Block])
-> OD m Doc
-tableItemToOpenDocument o tn (n,i) =
- let a = [ ("table:style-name" , tn ++ ".A1" )
+tableItemToOpenDocument o s (n,i) =
+ let a = [ ("table:style-name" , s )
, ("office:value-type", "string" )
]
in inTags True "table:table-cell" a <$>
@@ -500,7 +544,9 @@ inlineToOpenDocument o ils
modify (\st -> st{ stImageId = id' + 1 })
let getDims [] = []
getDims (("width", w) :xs) = ("svg:width", w) : getDims xs
+ getDims (("rel-width", w):xs) = ("style:rel-width", w) : getDims xs
getDims (("height", h):xs) = ("svg:height", h) : getDims xs
+ getDims (("rel-height", w):xs) = ("style:rel-height", w) : getDims xs
getDims (_:xs) = getDims xs
return $ inTags False "draw:frame"
(("draw:name", "img" ++ show id') : getDims kvs) $
@@ -555,10 +601,18 @@ orderedListLevelStyle (s,n, d) (l,ls) =
listLevelStyle :: Int -> Doc
listLevelStyle i =
- let indent = show (0.4 * fromIntegral (i - 1) :: Double) in
- selfClosingTag "style:list-level-properties"
- [ ("text:space-before" , indent ++ "in")
- , ("text:min-label-width", "0.4in")]
+ let indent = show (0.5 * fromIntegral i :: Double) in
+ inTags True "style:list-level-properties"
+ [ ("text:list-level-position-and-space-mode",
+ "label-alignment")
+ , ("fo:text-align", "right")
+ ] $
+ selfClosingTag "style:list-level-label-alignment"
+ [ ("text:label-followed-by", "listtab")
+ , ("text:list-tab-stop-position", indent ++ "in")
+ , ("fo:text-indent", "-0.1in")
+ , ("fo:margin-left", indent ++ "in")
+ ]
tableStyle :: Int -> [(Char,Double)] -> Doc
tableStyle num wcs =
@@ -576,13 +630,21 @@ tableStyle num wcs =
, ("style:family", "table-column" )] $
selfClosingTag "style:table-column-properties"
[("style:rel-column-width", printf "%d*" (floor $ w * 65535 :: Integer))]
- cellStyle = inTags True "style:style"
- [ ("style:name" , tableId ++ ".A1")
+ headerRowCellStyle = inTags True "style:style"
+ [ ("style:name" , "TableHeaderRowCell")
+ , ("style:family", "table-cell" )] $
+ selfClosingTag "style:table-cell-properties"
+ [ ("fo:border", "none")]
+ rowCellStyle = inTags True "style:style"
+ [ ("style:name" , "TableRowCell")
, ("style:family", "table-cell" )] $
selfClosingTag "style:table-cell-properties"
[ ("fo:border", "none")]
+ cellStyles = if num == 0
+ then headerRowCellStyle $$ rowCellStyle
+ else empty
columnStyles = map colStyle wcs
- in table $$ vcat columnStyles $$ cellStyle
+ in cellStyles $$ table $$ vcat columnStyles
paraStyle :: PandocMonad m => [(String,String)] -> OD m Int
paraStyle attrs = do
diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
index a71775e13..12a54fd71 100644
--- a/src/Text/Pandoc/Writers/Org.hs
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -109,7 +109,7 @@ escapeString = escapeStringUsing $
, ('\x2013',"--")
, ('\x2019',"'")
, ('\x2026',"...")
- ] ++ backslashEscapes "^_"
+ ]
isRawFormat :: Format -> Bool
isRawFormat f =
@@ -266,7 +266,7 @@ orderedListItemToOrg marker items = do
contents <- blockListToOrg items
return $ hang (length marker + 1) (text marker <> space) (contents <> cr)
--- | Convert defintion list item (label, list of blocks) to Org.
+-- | Convert definition list item (label, list of blocks) to Org.
definitionListItemToOrg :: PandocMonad m
=> ([Inline], [[Block]]) -> Org m Doc
definitionListItemToOrg (label, defs) = do
diff --git a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs
index e14476b16..c97d8d770 100644
--- a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs
+++ b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs
@@ -72,7 +72,7 @@ import Text.Pandoc.Logging
import Text.Pandoc.Walk
import Data.Time (UTCTime)
import qualified Text.Pandoc.Shared as Shared -- so we don't overlap "Element"
-import Text.Pandoc.Writers.Shared (metaValueToInlines)
+import Text.Pandoc.Writers.Shared (lookupMetaInlines)
import qualified Data.Map as M
import qualified Data.Set as S
import Data.Maybe (maybeToList, fromMaybe)
@@ -731,9 +731,9 @@ makeEndNotesSlideBlocks = do
anchorSet <- M.keysSet <$> gets stAnchorMap
if M.null noteIds
then return []
- else let title = case lookupMeta "notes-title" meta of
- Just val -> metaValueToInlines val
- Nothing -> [Str "Notes"]
+ else let title = case lookupMetaInlines "notes-title" meta of
+ [] -> [Str "Notes"]
+ ls -> ls
ident = Shared.uniqueIdent title anchorSet
hdr = Header slideLevel (ident, [], []) title
blks = concatMap (\(n, bs) -> makeNoteEntry n bs) $
@@ -744,13 +744,7 @@ getMetaSlide :: Pres (Maybe Slide)
getMetaSlide = do
meta <- asks envMetadata
title <- inlinesToParElems $ docTitle meta
- subtitle <- inlinesToParElems $
- case lookupMeta "subtitle" meta of
- Just (MetaString s) -> [Str s]
- Just (MetaInlines ils) -> ils
- Just (MetaBlocks [Plain ils]) -> ils
- Just (MetaBlocks [Para ils]) -> ils
- _ -> []
+ subtitle <- inlinesToParElems $ lookupMetaInlines "subtitle" meta
authors <- mapM inlinesToParElems $ docAuthors meta
date <- inlinesToParElems $ docDate meta
if null title && null subtitle && null authors && null date
@@ -785,9 +779,9 @@ makeTOCSlide blks = local (\env -> env{envCurSlideId = tocSlideId}) $ do
contents <- BulletList <$> mapM elementToListItem (Shared.hierarchicalize blks)
meta <- asks envMetadata
slideLevel <- asks envSlideLevel
- let tocTitle = case lookupMeta "toc-title" meta of
- Just val -> metaValueToInlines val
- Nothing -> [Str "Table of Contents"]
+ let tocTitle = case lookupMetaInlines "toc-title" meta of
+ [] -> [Str "Table of Contents"]
+ ls -> ls
hdr = Header slideLevel nullAttr tocTitle
blocksToSlide [hdr, contents]
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index f82597c55..d64529c21 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -35,7 +35,7 @@ module Text.Pandoc.Writers.RST ( writeRST, flatten ) where
import Prelude
import Control.Monad.State.Strict
import Data.Char (isSpace, toLower)
-import Data.List (isPrefixOf, stripPrefix)
+import Data.List (isPrefixOf, stripPrefix, transpose)
import Data.Maybe (fromMaybe)
import Data.Text (Text, stripEnd)
import qualified Text.Pandoc.Builder as B
@@ -82,14 +82,12 @@ pandocToRST (Pandoc meta blocks) = do
else Nothing
let render' :: Doc -> Text
render' = render colwidth
- let subtit = case lookupMeta "subtitle" meta of
- Just (MetaBlocks [Plain xs]) -> xs
- _ -> []
+ let subtit = lookupMetaInlines "subtitle" meta
title <- titleToRST (docTitle meta) subtit
metadata <- metaToJSON opts
(fmap render' . blockListToRST)
(fmap (stripEnd . render') . inlineListToRST)
- $ B.deleteMeta "title" $ B.deleteMeta "subtitle" meta
+ meta
body <- blockListToRST' True $ case writerTemplate opts of
Just _ -> normalizeHeadings 1 blocks
Nothing -> blocks
@@ -103,8 +101,9 @@ pandocToRST (Pandoc meta blocks) = do
let context = defField "body" main
$ defField "toc" (writerTableOfContents opts)
$ defField "toc-depth" (show $ writerTOCDepth opts)
+ $ defField "number-sections" (writerNumberSections opts)
$ defField "math" hasMath
- $ defField "title" (render Nothing title :: String)
+ $ defField "titleblock" (render Nothing title :: String)
$ defField "math" hasMath
$ defField "rawtex" rawTeX metadata
case writerTemplate opts of
@@ -209,11 +208,26 @@ blockToRST :: PandocMonad m
=> Block -- ^ Block element
-> RST m Doc
blockToRST Null = return empty
-blockToRST (Div attr bs) = do
+blockToRST (Div ("",["admonition-title"],[]) _) = return empty
+ -- this is generated by the rst reader and can safely be
+ -- omitted when we're generating rst
+blockToRST (Div (ident,classes,_kvs) bs) = do
contents <- blockListToRST bs
- let startTag = ".. raw:: html" $+$ nest 3 (tagWithAttrs "div" attr)
- let endTag = ".. raw:: html" $+$ nest 3 "</div>"
- return $ blankline <> startTag $+$ contents $+$ endTag $$ blankline
+ let admonitions = ["attention","caution","danger","error","hint",
+ "important","note","tip","warning","admonition"]
+ let admonition = case classes of
+ (cl:_)
+ | cl `elem` admonitions
+ -> ".. " <> text cl <> "::"
+ cls -> ".. container::" <> space <>
+ text (unwords (filter (/= "container") cls))
+ return $ blankline $$
+ admonition $$
+ (if null ident
+ then blankline
+ else " :name: " <> text ident $$ blankline) $$
+ nest 3 contents $$
+ blankline
blockToRST (Plain inlines) = inlineListToRST inlines
-- title beginning with fig: indicates that the image is a figure
blockToRST (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do
@@ -236,6 +250,7 @@ blockToRST (LineBlock lns) =
linesToLineBlock lns
blockToRST (RawBlock f@(Format f') str)
| f == "rst" = return $ text str
+ | f == "tex" = blockToRST (RawBlock (Format "latex") str)
| otherwise = return $ blankline <> ".. raw:: " <>
text (map toLower f') $+$
nest 3 (text str) $$ blankline
@@ -272,7 +287,8 @@ blockToRST (CodeBlock (_,classes,kvs) str) = do
then return $ prefixed "> " (text str) $$ blankline
else return $
(case [c | c <- classes,
- c `notElem` ["sourceCode","literate","numberLines"]] of
+ c `notElem` ["sourceCode","literate","numberLines",
+ "number-lines","example"]] of
[] -> "::"
(lang:_) -> (".. code:: " <> text lang) $$ numberlines)
$+$ nest 3 (text str) $$ blankline
@@ -288,9 +304,12 @@ blockToRST (Table caption aligns widths headers rows) = do
modify $ \st -> st{ stOptions = oldOpts }
return result
opts <- gets stOptions
- tbl <- gridTable opts blocksToDoc (all null headers)
- (map (const AlignDefault) aligns) widths
- headers rows
+ let isSimple = all (== 0) widths
+ tbl <- if isSimple
+ then simpleTable opts blocksToDoc headers rows
+ else gridTable opts blocksToDoc (all null headers)
+ (map (const AlignDefault) aligns) widths
+ headers rows
return $ if null caption
then tbl $$ blankline
else (".. table:: " <> caption') $$ blankline $$ nest 3 tbl $$
@@ -331,7 +350,7 @@ orderedListItemToRST marker items = do
let marker' = marker ++ " "
return $ hang (length marker') (text marker') $ contents <> cr
--- | Convert defintion list item (label, list of blocks) to RST.
+-- | Convert definition list item (label, list of blocks) to RST.
definitionListItemToRST :: PandocMonad m => ([Inline], [[Block]]) -> RST m Doc
definitionListItemToRST (label, defs) = do
label' <- inlineListToRST label
@@ -470,6 +489,8 @@ flatten outer
-- them and they will be readable and parsable
(Quoted _ _, _) -> keep f i
(_, Quoted _ _) -> keep f i
+ -- inlineToRST handles this case properly so it's safe to keep
+ (Link _ _ _, Image _ _ _) -> keep f i
-- parent inlines would prevent links from being correctly
-- parsed, in this case we prioritise the content over the
-- style
@@ -569,15 +590,18 @@ inlineToRST (Quoted DoubleQuote lst) = do
else return $ "“" <> contents <> "”"
inlineToRST (Cite _ lst) =
writeInlines lst
+inlineToRST (Code (_,["interpreted-text"],[("role",role)]) str) = do
+ return $ ":" <> text role <> ":`" <> text str <> "`"
inlineToRST (Code _ str) = do
opts <- gets stOptions
-- we trim the string because the delimiters must adjoin a
-- non-space character; see #3496
-- we use :literal: when the code contains backticks, since
-- :literal: allows backslash-escapes; see #3974
- return $ if '`' `elem` str
- then ":literal:`" <> text (escapeString opts (trim str)) <> "`"
- else "``" <> text (trim str) <> "``"
+ return $
+ if '`' `elem` str
+ then ":literal:`" <> text (escapeString opts (trim str)) <> "`"
+ else "``" <> text (trim str) <> "``"
inlineToRST (Str str) = do
opts <- gets stOptions
return $ text $
@@ -672,3 +696,30 @@ imageDimsToRST attr = do
Just dim -> cols dim
Nothing -> empty
return $ cr <> name $$ showDim Width $$ showDim Height
+
+simpleTable :: PandocMonad m
+ => WriterOptions
+ -> (WriterOptions -> [Block] -> m Doc)
+ -> [[Block]]
+ -> [[[Block]]]
+ -> m Doc
+simpleTable opts blocksToDoc headers rows = do
+ -- can't have empty cells in first column:
+ let fixEmpties (d:ds) = if isEmpty d
+ then text "\\ " : ds
+ else d : ds
+ fixEmpties [] = []
+ headerDocs <- if all null headers
+ then return []
+ else fixEmpties <$> mapM (blocksToDoc opts) headers
+ rowDocs <- mapM (fmap fixEmpties . mapM (blocksToDoc opts)) rows
+ let numChars [] = 0
+ numChars xs = maximum . map offset $ xs
+ let colWidths = map numChars $ transpose (headerDocs : rowDocs)
+ let toRow = hsep . zipWith lblock colWidths
+ let hline = hsep (map (\n -> text (replicate n '=')) colWidths)
+ let hdr = if all null headers
+ then mempty
+ else hline $$ toRow headerDocs
+ let bdy = vcat $ map toRow rowDocs
+ return $ hdr $$ hline $$ bdy $$ hline
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
index 3045c1c10..ed8dc9ae4 100644
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -341,8 +341,10 @@ listItemToRTF :: PandocMonad m
listItemToRTF alignment indent marker [] = return $
rtfCompact (indent + listIncrement) (negate listIncrement) alignment
(marker ++ "\\tx" ++ show listIncrement ++ "\\tab ")
-listItemToRTF alignment indent marker list = do
- (first:rest) <- mapM (blockToRTF (indent + listIncrement) alignment) list
+listItemToRTF alignment indent marker (listFirst:listRest) = do
+ let f = blockToRTF (indent + listIncrement) alignment
+ first <- f listFirst
+ rest <- mapM f listRest
let listMarker = "\\fi" ++ show (negate listIncrement) ++ " " ++ marker ++
"\\tx" ++ show listIncrement ++ "\\tab"
let insertListMarker ('\\':'f':'i':'-':d:xs) | isDigit d =
diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs
index 2edce7deb..ed2c46d7b 100644
--- a/src/Text/Pandoc/Writers/Shared.hs
+++ b/src/Text/Pandoc/Writers/Shared.hs
@@ -38,17 +38,27 @@ module Text.Pandoc.Writers.Shared (
, resetField
, defField
, tagWithAttrs
+ , isDisplayMath
, fixDisplayMath
, unsmartify
+ , hasSimpleCells
, gridTable
- , metaValueToInlines
+ , lookupMetaBool
+ , lookupMetaBlocks
+ , lookupMetaInlines
+ , lookupMetaString
, stripLeadingTrailingSpace
+ , groffEscape
+ , toSubscript
+ , toSuperscript
)
where
import Prelude
import Control.Monad (zipWithM)
+import Data.Monoid (Any (..))
import Data.Aeson (FromJSON (..), Result (..), ToJSON (..), Value (Object),
encode, fromJSON)
+import Data.Char (chr, ord, isAscii, isSpace)
import qualified Data.HashMap.Strict as H
import Data.List (groupBy, intersperse, transpose)
import qualified Data.Map as M
@@ -59,9 +69,11 @@ import qualified Text.Pandoc.Builder as Builder
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Pretty
-import Text.Pandoc.Walk (query)
+import Text.Pandoc.Shared (stringify)
import Text.Pandoc.UTF8 (toStringLazy)
import Text.Pandoc.XML (escapeStringForXML)
+import Text.Pandoc.Walk (query)
+import Text.Printf (printf)
-- | Create JSON value for template from a 'Meta' and an association list
-- of variables, specified at the command line or in the writer.
@@ -187,8 +199,9 @@ tagWithAttrs tag (ident,classes,kvs) = hsep
] <> ">"
isDisplayMath :: Inline -> Bool
-isDisplayMath (Math DisplayMath _) = True
-isDisplayMath _ = False
+isDisplayMath (Math DisplayMath _) = True
+isDisplayMath (Span _ [Math DisplayMath _]) = True
+isDisplayMath _ = False
stripLeadingTrailingSpace :: [Inline] -> [Inline]
stripLeadingTrailingSpace = go . reverse . go . reverse
@@ -233,6 +246,21 @@ unsmartify opts ('\8216':xs) = '\'' : unsmartify opts xs
unsmartify opts (x:xs) = x : unsmartify opts xs
unsmartify _ [] = []
+-- | True if block is a table that can be represented with
+-- one line per row.
+hasSimpleCells :: Block -> Bool
+hasSimpleCells (Table _caption _aligns _widths headers rows) =
+ all isSimpleCell (concat (headers:rows))
+ where
+ isLineBreak LineBreak = Any True
+ isLineBreak _ = Any False
+ hasLineBreak = getAny . query isLineBreak
+ isSimpleCell [Plain ils] = not (hasLineBreak ils)
+ isSimpleCell [Para ils ] = not (hasLineBreak ils)
+ isSimpleCell [] = True
+ isSimpleCell _ = False
+hasSimpleCells _ = False
+
gridTable :: Monad m
=> WriterOptions
-> (WriterOptions -> [Block] -> m Doc)
@@ -332,9 +360,82 @@ gridTable opts blocksToDoc headless aligns widths headers rows = do
body $$
border '-' (repeat AlignDefault) widthsInChars
-metaValueToInlines :: MetaValue -> [Inline]
-metaValueToInlines (MetaString s) = [Str s]
-metaValueToInlines (MetaInlines ils) = ils
-metaValueToInlines (MetaBlocks bs) = query return bs
-metaValueToInlines (MetaBool b) = [Str $ show b]
-metaValueToInlines _ = []
+
+
+-- | Retrieve the metadata value for a given @key@
+-- and convert to Bool.
+lookupMetaBool :: String -> Meta -> Bool
+lookupMetaBool key meta =
+ case lookupMeta key meta of
+ Just (MetaBlocks _) -> True
+ Just (MetaInlines _) -> True
+ Just (MetaString (_:_)) -> True
+ Just (MetaBool True) -> True
+ _ -> False
+
+-- | Retrieve the metadata value for a given @key@
+-- and extract blocks.
+lookupMetaBlocks :: String -> Meta -> [Block]
+lookupMetaBlocks key meta =
+ case lookupMeta key meta of
+ Just (MetaBlocks bs) -> bs
+ Just (MetaInlines ils) -> [Plain ils]
+ Just (MetaString s) -> [Plain [Str s]]
+ _ -> []
+
+-- | Retrieve the metadata value for a given @key@
+-- and extract inlines.
+lookupMetaInlines :: String -> Meta -> [Inline]
+lookupMetaInlines key meta =
+ case lookupMeta key meta of
+ Just (MetaString s) -> [Str s]
+ Just (MetaInlines ils) -> ils
+ Just (MetaBlocks [Plain ils]) -> ils
+ Just (MetaBlocks [Para ils]) -> ils
+ _ -> []
+
+-- | Retrieve the metadata value for a given @key@
+-- and convert to String.
+lookupMetaString :: String -> Meta -> String
+lookupMetaString key meta =
+ case lookupMeta key meta of
+ Just (MetaString s) -> s
+ Just (MetaInlines ils) -> stringify ils
+ Just (MetaBlocks bs) -> stringify bs
+ Just (MetaBool b) -> show b
+ _ -> ""
+
+-- | Escape non-ASCII characters using groff \u[..] sequences.
+groffEscape :: T.Text -> T.Text
+groffEscape = T.concatMap toUchar
+ where toUchar c
+ | isAscii c = T.singleton c
+ | otherwise = T.pack $ printf "\\[u%04X]" (ord c)
+
+
+toSuperscript :: Char -> Maybe Char
+toSuperscript '1' = Just '\x00B9'
+toSuperscript '2' = Just '\x00B2'
+toSuperscript '3' = Just '\x00B3'
+toSuperscript '+' = Just '\x207A'
+toSuperscript '-' = Just '\x207B'
+toSuperscript '=' = Just '\x207C'
+toSuperscript '(' = Just '\x207D'
+toSuperscript ')' = Just '\x207E'
+toSuperscript c
+ | c >= '0' && c <= '9' =
+ Just $ chr (0x2070 + (ord c - 48))
+ | isSpace c = Just c
+ | otherwise = Nothing
+
+toSubscript :: Char -> Maybe Char
+toSubscript '+' = Just '\x208A'
+toSubscript '-' = Just '\x208B'
+toSubscript '=' = Just '\x208C'
+toSubscript '(' = Just '\x208D'
+toSubscript ')' = Just '\x208E'
+toSubscript c
+ | c >= '0' && c <= '9' =
+ Just $ chr (0x2080 + (ord c - 48))
+ | isSpace c = Just c
+ | otherwise = Nothing
diff --git a/src/Text/Pandoc/Writers/TEI.hs b/src/Text/Pandoc/Writers/TEI.hs
index e461f5715..9169c8515 100644
--- a/src/Text/Pandoc/Writers/TEI.hs
+++ b/src/Text/Pandoc/Writers/TEI.hs
@@ -35,7 +35,6 @@ import Prelude
import Data.Char (toLower)
import Data.List (isPrefixOf, stripPrefix)
import Data.Text (Text)
-import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad, report)
import Text.Pandoc.Definition
import Text.Pandoc.Highlighting (languages, languagesByExtension)
@@ -48,16 +47,6 @@ import Text.Pandoc.Templates (renderTemplate')
import Text.Pandoc.Writers.Shared
import Text.Pandoc.XML
--- | Convert list of authors to a docbook <author> section
-authorToTEI :: PandocMonad m => WriterOptions -> [Inline] -> m B.Inlines
-authorToTEI opts name' = do
- name <- render Nothing <$> inlinesToTEI opts name'
- let colwidth = if writerWrapText opts == WrapAuto
- then Just $ writerColumns opts
- else Nothing
- return $ B.rawInline "tei" $ render colwidth $
- inTagsSimple "author" (text $ escapeStringForXML name)
-
-- | Convert Pandoc document to string in Docbook format.
writeTEI :: PandocMonad m => WriterOptions -> Pandoc -> m Text
writeTEI opts (Pandoc meta blocks) = do
@@ -72,13 +61,11 @@ writeTEI opts (Pandoc meta blocks) = do
TopLevelChapter -> 0
TopLevelSection -> 1
TopLevelDefault -> 1
- auths' <- mapM (authorToTEI opts) $ docAuthors meta
- let meta' = B.setMeta "author" auths' meta
metadata <- metaToJSON opts
(fmap (render' . vcat) .
mapM (elementToTEI opts startLvl) . hierarchicalize)
(fmap render' . inlinesToTEI opts)
- meta'
+ meta
main <- (render' . vcat) <$> mapM (elementToTEI opts startLvl) elements
let context = defField "body" main
$
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
index 305b41206..21d1f4eca 100644
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -56,8 +56,6 @@ import Text.Printf (printf)
data WriterState =
WriterState { stStrikeout :: Bool -- document contains strikeout
- , stSuperscript :: Bool -- document contains superscript
- , stSubscript :: Bool -- document contains subscript
, stEscapeComma :: Bool -- in a context where we need @comma
, stIdentifiers :: Set.Set String -- header ids used already
, stOptions :: WriterOptions -- writer options
@@ -74,8 +72,7 @@ type TI m = StateT WriterState m
writeTexinfo :: PandocMonad m => WriterOptions -> Pandoc -> m Text
writeTexinfo options document =
evalStateT (pandocToTexinfo options $ wrapTop document)
- WriterState { stStrikeout = False, stSuperscript = False,
- stEscapeComma = False, stSubscript = False,
+ WriterState { stStrikeout = False, stEscapeComma = False,
stIdentifiers = Set.empty, stOptions = options}
-- | Add a "Top" node around the document, needed by Texinfo.
@@ -102,8 +99,6 @@ pandocToTexinfo options (Pandoc meta blocks) = do
let context = defField "body" body
$ defField "toc" (writerTableOfContents options)
$ defField "titlepage" titlePage
- $ defField "subscript" (stSubscript st)
- $ defField "superscript" (stSuperscript st)
$
defField "strikeout" (stStrikeout st) metadata
case writerTemplate options of
@@ -351,12 +346,9 @@ collectNodes :: Int -> [Block] -> [Block]
collectNodes _ [] = []
collectNodes level (x:xs) =
case x of
- (Header hl _ _) ->
- if hl < level
- then []
- else if hl == level
- then x : collectNodes level xs
- else collectNodes level xs
+ (Header hl _ _) | hl < level -> []
+ | hl == level -> x : collectNodes level xs
+ | otherwise -> collectNodes level xs
_ ->
collectNodes level xs
@@ -394,7 +386,7 @@ defListItemToTexinfo (term, defs) = do
inlineListToTexinfo :: PandocMonad m
=> [Inline] -- ^ Inlines to convert
-> TI m Doc
-inlineListToTexinfo lst = mapM inlineToTexinfo lst >>= return . hcat
+inlineListToTexinfo lst = hcat <$> mapM inlineToTexinfo lst
-- | Convert list of inline elements to Texinfo acceptable for a node name.
inlineListForNode :: PandocMonad m
@@ -416,10 +408,10 @@ inlineToTexinfo (Span _ lst) =
inlineListToTexinfo lst
inlineToTexinfo (Emph lst) =
- inlineListToTexinfo lst >>= return . inCmd "emph"
+ inCmd "emph" <$> inlineListToTexinfo lst
inlineToTexinfo (Strong lst) =
- inlineListToTexinfo lst >>= return . inCmd "strong"
+ inCmd "strong" <$> inlineListToTexinfo lst
inlineToTexinfo (Strikeout lst) = do
modify $ \st -> st{ stStrikeout = True }
@@ -427,17 +419,15 @@ inlineToTexinfo (Strikeout lst) = do
return $ text "@textstrikeout{" <> contents <> text "}"
inlineToTexinfo (Superscript lst) = do
- modify $ \st -> st{ stSuperscript = True }
contents <- inlineListToTexinfo lst
- return $ text "@textsuperscript{" <> contents <> char '}'
+ return $ text "@sup{" <> contents <> char '}'
inlineToTexinfo (Subscript lst) = do
- modify $ \st -> st{ stSubscript = True }
contents <- inlineListToTexinfo lst
- return $ text "@textsubscript{" <> contents <> char '}'
+ return $ text "@sub{" <> contents <> char '}'
inlineToTexinfo (SmallCaps lst) =
- inlineListToTexinfo lst >>= return . inCmd "sc"
+ inCmd "sc" <$> inlineListToTexinfo lst
inlineToTexinfo (Code _ str) =
return $ text $ "@code{" ++ stringToTexinfo str ++ "}"
diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs
index 0ed79d2df..c7d96454a 100644
--- a/src/Text/Pandoc/Writers/Textile.hs
+++ b/src/Text/Pandoc/Writers/Textile.hs
@@ -73,7 +73,7 @@ pandocToTextile opts (Pandoc meta blocks) = do
(inlineListToTextile opts) meta
body <- blockListToTextile opts blocks
notes <- gets $ unlines . reverse . stNotes
- let main = pack $ body ++ if null notes then "" else ("\n\n" ++ notes)
+ let main = pack $ body ++ if null notes then "" else "\n\n" ++ notes
let context = defField "body" main metadata
case writerTemplate opts of
Nothing -> return main
@@ -154,7 +154,7 @@ blockToTextile _ HorizontalRule = return "<hr />\n"
blockToTextile opts (Header level (ident,classes,keyvals) inlines) = do
contents <- inlineListToTextile opts inlines
- let identAttr = if null ident then "" else ('#':ident)
+ let identAttr = if null ident then "" else '#':ident
let attribs = if null identAttr && null classes
then ""
else "(" ++ unwords classes ++ identAttr ++ ")"
@@ -382,13 +382,13 @@ blockListToTextile :: PandocMonad m
-> [Block] -- ^ List of block elements
-> TW m String
blockListToTextile opts blocks =
- mapM (blockToTextile opts) blocks >>= return . vcat
+ vcat <$> mapM (blockToTextile opts) blocks
-- | Convert list of Pandoc inline elements to Textile.
inlineListToTextile :: PandocMonad m
=> WriterOptions -> [Inline] -> TW m String
inlineListToTextile opts lst =
- mapM (inlineToTextile opts) lst >>= return . concat
+ concat <$> mapM (inlineToTextile opts) lst
-- | Convert Pandoc inline element to Textile.
inlineToTextile :: PandocMonad m => WriterOptions -> Inline -> TW m String
@@ -463,15 +463,15 @@ inlineToTextile _ SoftBreak = return " "
inlineToTextile _ Space = return " "
inlineToTextile opts (Link (_, cls, _) txt (src, _)) = do
- let classes = if null cls
- then ""
- else "(" ++ unwords cls ++ ")"
label <- case txt of
[Code _ s]
| s == src -> return "$"
[Str s]
| s == src -> return "$"
_ -> inlineListToTextile opts txt
+ let classes = if null cls || cls == ["uri"] && label == "$"
+ then ""
+ else "(" ++ unwords cls ++ ")"
return $ "\"" ++ classes ++ label ++ "\":" ++ src
inlineToTextile opts (Image attr@(_, cls, _) alt (source, tit)) = do