aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/Custom.hs83
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs57
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs88
-rw-r--r--src/Text/Pandoc/Readers/Docx/Fields.hs25
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs289
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse/Styles.hs25
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs16
-rw-r--r--src/Text/Pandoc/Readers/HTML/Parsing.hs17
-rw-r--r--src/Text/Pandoc/Readers/HTML/Table.hs13
-rw-r--r--src/Text/Pandoc/Readers/Ipynb.hs44
-rw-r--r--src/Text/Pandoc/Readers/JATS.hs15
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs47
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Inline.hs10
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Macro.hs117
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Math.hs14
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Parsing.hs79
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/SIunitx.hs6
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Table.hs4
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Types.hs6
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs108
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs9
-rw-r--r--src/Text/Pandoc/Readers/Metadata.hs167
-rw-r--r--src/Text/Pandoc/Readers/Org/Blocks.hs24
-rw-r--r--src/Text/Pandoc/Readers/Org/DocumentTree.hs12
-rw-r--r--src/Text/Pandoc/Readers/Org/Inlines.hs314
-rw-r--r--src/Text/Pandoc/Readers/Org/Meta.hs4
-rw-r--r--src/Text/Pandoc/Readers/RST.hs38
-rw-r--r--src/Text/Pandoc/Readers/RTF.hs1351
28 files changed, 2351 insertions, 631 deletions
diff --git a/src/Text/Pandoc/Readers/Custom.hs b/src/Text/Pandoc/Readers/Custom.hs
new file mode 100644
index 000000000..9252a9e45
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Custom.hs
@@ -0,0 +1,83 @@
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+ Module : Text.Pandoc.Readers.Custom
+ Copyright : Copyright (C) 2021 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Supports custom parsers written in Lua which produce a Pandoc AST.
+-}
+module Text.Pandoc.Readers.Custom ( readCustom ) where
+import Control.Exception
+import Control.Monad (when)
+import HsLua as Lua hiding (Operation (Div), render)
+import Control.Monad.IO.Class (MonadIO)
+import Text.Pandoc.Definition
+import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Logging
+import Text.Pandoc.Lua (Global (..), runLua, setGlobals)
+import Text.Pandoc.Lua.PandocLua
+import Text.Pandoc.Lua.Marshal.Pandoc (peekPandoc)
+import Text.Pandoc.Lua.Util (dofileWithTraceback, callWithTraceback,
+ pcallWithTraceback)
+import Text.Pandoc.Options
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
+import qualified Data.Text as T
+
+-- | Convert custom markup to Pandoc.
+readCustom :: (PandocMonad m, MonadIO m, ToSources s)
+ => FilePath -> ReaderOptions -> s -> m Pandoc
+readCustom luaFile opts srcs = do
+ let globals = [ PANDOC_SCRIPT_FILE luaFile ]
+ res <- runLua $ do
+ setGlobals globals
+ stat <- dofileWithTraceback luaFile
+ -- check for error in lua script (later we'll change the return type
+ -- to handle this more gracefully):
+ when (stat /= Lua.OK)
+ Lua.throwErrorAsException
+ parseCustom
+ case res of
+ Left msg -> throw msg
+ Right doc -> return doc
+ where
+ parseCustom = do
+ let input = toSources srcs
+ getglobal "Reader"
+ push input
+ push opts
+ pcallWithTraceback 2 1 >>= \case
+ OK -> forcePeek $ peekPandoc top
+ ErrRun -> do
+ -- Caught a runtime error. Check if parsing might work if we
+ -- pass a string instead of a Sources list, then retry.
+ runPeek (peekText top) >>= \case
+ Failure {} ->
+ -- not a string error object. Bail!
+ throwErrorAsException
+ Success errmsg -> do
+ if "string expected, got pandoc Sources" `T.isInfixOf` errmsg
+ then do
+ pop 1
+ _ <- unPandocLua $ do
+ report $ Deprecated "old Reader function signature" $
+ T.unlines
+ [ "Reader functions should accept a sources list; "
+ , "functions expecting `string` input are deprecated. "
+ , "Use `tostring` to convert the first argument to a "
+ , "string."
+ ]
+ getglobal "Reader"
+ push $ sourcesToText input -- push sources as string
+ push opts
+ callWithTraceback 2 1
+ forcePeek $ peekPandoc top
+ else
+ -- nothing we can do here
+ throwErrorAsException
+ _ -> -- not a runtime error, we won't be able to recover from that
+ throwErrorAsException
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index c49b82ccf..be90eb23e 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -19,7 +19,7 @@ import Data.Foldable (asum)
import Data.Generics
import Data.List (intersperse,elemIndex)
import Data.List.NonEmpty (nonEmpty)
-import Data.Maybe (fromMaybe,mapMaybe)
+import Data.Maybe (catMaybes,fromMaybe,mapMaybe,maybeToList)
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
@@ -316,7 +316,7 @@ List of all DocBook tags, with [x] indicating implemented,
[ ] postcode - A postal code in an address
[x] preface - Introductory matter preceding the first chapter of a book
[ ] prefaceinfo - Meta-information for a Preface
-[ ] primary - The primary word or phrase under which an index term should be
+[x] primary - The primary word or phrase under which an index term should be
sorted
[ ] primaryie - A primary term in an index entry, not in the text
[ ] printhistory - The printing history of a document
@@ -385,7 +385,7 @@ List of all DocBook tags, with [x] indicating implemented,
[o] screeninfo - Information about how a screen shot was produced
[ ] screenshot - A representation of what the user sees or might see on a
computer screen
-[ ] secondary - A secondary word or phrase in an index term
+[x] secondary - A secondary word or phrase in an index term
[ ] secondaryie - A secondary term in an index entry, rather than in the text
[x] sect1 - A top-level section of document
[x] sect1info - Meta-information for a Sect1
@@ -461,7 +461,7 @@ List of all DocBook tags, with [x] indicating implemented,
[x] td - A table entry in an HTML table
[x] term - The word or phrase being defined or described in a variable list
[ ] termdef - An inline term definition
-[ ] tertiary - A tertiary word or phrase in an index term
+[x] tertiary - A tertiary word or phrase in an index term
[ ] tertiaryie - A tertiary term in an index entry, rather than in the text
[ ] textdata - Pointer to external text data
[ ] textobject - A wrapper for a text description of an object and its
@@ -829,7 +829,7 @@ parseBlock (Elem e) =
"section" -> gets dbSectionLevel >>= sect . (+1)
"simplesect" ->
gets dbSectionLevel >>=
- sectWith (attrValue "id" e,["unnumbered"],[]) . (+1)
+ sectWith(attrValue "id" e) ["unnumbered"] [] . (+1)
"refsect1" -> sect 1
"refsect2" -> sect 2
"refsect3" -> sect 3
@@ -907,6 +907,7 @@ parseBlock (Elem e) =
let classes' = case attrValue "language" e of
"" -> []
x -> [x]
+ ++ ["numberLines" | attrValue "linenumbering" e == "numbered"]
return $ codeBlockWith (attrValue "id" e, classes', [])
$ trimNl $ strContentRecursive e
parseBlockquote = do
@@ -993,8 +994,8 @@ parseBlock (Elem e) =
(TableHead nullAttr $ toHeaderRow headrows)
[TableBody nullAttr 0 [] $ map toRow bodyrows]
(TableFoot nullAttr [])
- sect n = sectWith (attrValue "id" e,[],[]) n
- sectWith attr n = do
+ sect n = sectWith(attrValue "id" e) [] [] n
+ sectWith elId classes attrs n = do
isbook <- gets dbBook
let n' = if isbook || n == 0 then n + 1 else n
headerText <- case filterChild (named "title") e `mplus`
@@ -1005,7 +1006,14 @@ parseBlock (Elem e) =
modify $ \st -> st{ dbSectionLevel = n }
b <- getBlocks e
modify $ \st -> st{ dbSectionLevel = n - 1 }
- return $ headerWith attr n' headerText <> b
+ return $ headerWith (elId, classes, maybeToList titleabbrevElAsAttr++attrs) n' headerText <> b
+ titleabbrevElAsAttr = do
+ txt <- case filterChild (named "titleabbrev") e `mplus`
+ (filterChild (named "info") e >>=
+ filterChild (named "titleabbrev")) of
+ Just t -> Just ("titleabbrev", strContentRecursive t)
+ Nothing -> Nothing
+ return txt
lineItems = mapM getInlines $ filterChildren (named "line") e
-- | Admonitions are parsed into a div. Following other Docbook tools that output HTML,
-- we parse the optional title as a div with the @title@ class, and give the
@@ -1079,6 +1087,17 @@ elementToStr :: Content -> Content
elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
elementToStr x = x
+childElTextAsAttr :: Text -> Element -> Maybe (Text, Text)
+childElTextAsAttr n e = case findChild q e of
+ Nothing -> Nothing
+ Just childEl -> Just (n, strContentRecursive childEl)
+ where q = QName n (Just "http://docbook.org/ns/docbook") Nothing
+
+attrValueAsOptionalAttr :: Text -> Element -> Maybe (Text, Text)
+attrValueAsOptionalAttr n e = case attrValue n e of
+ "" -> Nothing
+ _ -> Just (n, attrValue n e)
+
parseInline :: PandocMonad m => Content -> DB m Inlines
parseInline (Text (CData _ s _)) = return $ text s
parseInline (CRef ref) =
@@ -1093,6 +1112,28 @@ parseInline (Elem e) =
if ident /= "" || classes /= []
then innerInlines (spanWith (ident,classes,[]))
else innerInlines id
+ "indexterm" -> do
+ let ident = attrValue "id" e
+ let classes = T.words $ attrValue "role" e
+ let attrs =
+ -- In DocBook, <primary>, <secondary>, <tertiary>, <see>, and <seealso>
+ -- have mixed content models. However, because we're representing these
+ -- elements in Pandoc's AST as attributes of a phrase, we flatten all
+ -- the descendant content of these elements.
+ [ childElTextAsAttr "primary" e
+ , childElTextAsAttr "secondary" e
+ , childElTextAsAttr "tertiary" e
+ , childElTextAsAttr "see" e
+ , childElTextAsAttr "seealso" e
+ , attrValueAsOptionalAttr "significance" e
+ , attrValueAsOptionalAttr "startref" e
+ , attrValueAsOptionalAttr "scope" e
+ , attrValueAsOptionalAttr "class" e
+ -- We don't do anything with the "pagenum" attribute, because these only
+ -- occur within literal <index> sections, which is not supported by Pandoc,
+ -- because Pandoc has no concept of pages.
+ ]
+ return $ spanWith (ident, ("indexterm" : classes), (catMaybes attrs)) mempty
"equation" -> equation e displayMath
"informalequation" -> equation e displayMath
"inlineequation" -> equation e math
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index c06adf7e3..5c8f20c18 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -246,8 +246,8 @@ runToText _ = ""
parPartToText :: ParPart -> T.Text
parPartToText (PlainRun run) = runToText run
-parPartToText (InternalHyperLink _ runs) = T.concat $ map runToText runs
-parPartToText (ExternalHyperLink _ runs) = T.concat $ map runToText runs
+parPartToText (InternalHyperLink _ children) = T.concat $ map parPartToText children
+parPartToText (ExternalHyperLink _ children) = T.concat $ map parPartToText children
parPartToText _ = ""
blacklistedCharStyles :: [CharStyleName]
@@ -322,6 +322,7 @@ runToInlines (InlineDrawing fp title alt bs ext) = do
(lift . lift) $ P.insertMedia fp Nothing bs
return $ imageWith (extentToAttr ext) (T.pack fp) title $ text alt
runToInlines InlineChart = return $ spanWith ("", ["chart"], []) $ text "[CHART]"
+runToInlines InlineDiagram = return $ spanWith ("", ["diagram"], []) $ text "[DIAGRAM]"
extentToAttr :: Extent -> Attr
extentToAttr (Just (w, h)) =
@@ -434,18 +435,21 @@ parPartToInlines' (Drawing fp title alt bs ext) = do
return $ imageWith (extentToAttr ext) (T.pack fp) title $ text alt
parPartToInlines' Chart =
return $ spanWith ("", ["chart"], []) $ text "[CHART]"
-parPartToInlines' (InternalHyperLink anchor runs) = do
- ils <- smushInlines <$> mapM runToInlines runs
+parPartToInlines' Diagram =
+ return $ spanWith ("", ["diagram"], []) $ text "[DIAGRAM]"
+parPartToInlines' (InternalHyperLink anchor children) = do
+ ils <- smushInlines <$> mapM parPartToInlines' children
return $ link ("#" <> anchor) "" ils
-parPartToInlines' (ExternalHyperLink target runs) = do
- ils <- smushInlines <$> mapM runToInlines runs
+parPartToInlines' (ExternalHyperLink target children) = do
+ ils <- smushInlines <$> mapM parPartToInlines' children
return $ link target "" ils
parPartToInlines' (PlainOMath exps) =
return $ math $ writeTeX exps
-parPartToInlines' (Field info runs) =
+parPartToInlines' (Field info children) =
case info of
- HyperlinkField url -> parPartToInlines' $ ExternalHyperLink url runs
- UnknownField -> smushInlines <$> mapM runToInlines runs
+ HyperlinkField url -> parPartToInlines' $ ExternalHyperLink url children
+ PagerefField fieldAnchor True -> parPartToInlines' $ InternalHyperLink fieldAnchor children
+ _ -> smushInlines <$> mapM parPartToInlines' children
parPartToInlines' NullParPart = return mempty
isAnchorSpan :: Inline -> Bool
@@ -532,34 +536,36 @@ trimSps (Many ils) = Many $ Seq.dropWhileL isSp $Seq.dropWhileR isSp ils
extraAttr :: (Eq (StyleName a), HasStyleName a) => a -> Attr
extraAttr s = ("", [], [("custom-style", fromStyleName $ getStyleName s)])
-parStyleToTransform :: PandocMonad m => ParagraphStyle -> DocxContext m (Blocks -> Blocks)
-parStyleToTransform pPr = case pStyle pPr of
- c@(getStyleName -> styleName):cs
- | styleName `elem` divsToKeep -> do
- let pPr' = pPr { pStyle = cs }
- transform <- parStyleToTransform pPr'
- return $ divWith ("", [normalizeToClassName styleName], []) . transform
- | styleName `elem` listParagraphStyles -> do
- let pPr' = pPr { pStyle = cs, indentation = Nothing}
- transform <- parStyleToTransform pPr'
- return $ divWith ("", [normalizeToClassName styleName], []) . transform
- | otherwise -> do
- let pPr' = pPr { pStyle = cs }
- transform <- parStyleToTransform pPr'
- styles <- asks (isEnabled Ext_styles . docxOptions)
- return $
- (if styles then divWith (extraAttr c) else id)
- . (if isBlockQuote c then blockQuote else id)
- . transform
- []
- | Just left <- indentation pPr >>= leftParIndent -> do
- let pPr' = pPr { indentation = Nothing }
- hang = fromMaybe 0 $ indentation pPr >>= hangingParIndent
- transform <- parStyleToTransform pPr'
- return $ if (left - hang) > 0
- then blockQuote . transform
- else transform
- | otherwise -> return id
+paragraphStyleToTransform :: PandocMonad m => ParagraphStyle -> DocxContext m (Blocks -> Blocks)
+paragraphStyleToTransform pPr =
+ let stylenames = map getStyleName (pStyle pPr)
+ transform = if (`elem` listParagraphStyles) `any` stylenames || relativeIndent pPr <= 0
+ then id
+ else blockQuote
+ in do
+ extStylesEnabled <- asks (isEnabled Ext_styles . docxOptions)
+ return $ foldr (\parStyle transform' ->
+ (parStyleToTransform extStylesEnabled parStyle) . transform'
+ ) transform (pStyle pPr)
+
+parStyleToTransform :: Bool -> ParStyle -> Blocks -> Blocks
+parStyleToTransform extStylesEnabled parStyle@(getStyleName -> styleName)
+ | (styleName `elem` divsToKeep) || (styleName `elem` listParagraphStyles) =
+ divWith ("", [normalizeToClassName styleName], [])
+ | otherwise =
+ (if extStylesEnabled then divWith (extraAttr parStyle) else id)
+ . (if isBlockQuote parStyle then blockQuote else id)
+
+-- The relative indent is the indentation minus the indentation of the parent style.
+-- This tells us whether this paragraph in particular was indented more and thus
+-- should be considered a block quote.
+relativeIndent :: ParagraphStyle -> Integer
+relativeIndent pPr =
+ let pStyleLeft = fromMaybe 0 $ pStyleIndentation pPr >>= leftParIndent
+ pStyleHang = fromMaybe 0 $ pStyleIndentation pPr >>= hangingParIndent
+ left = fromMaybe pStyleLeft $ indentation pPr >>= leftParIndent
+ hang = fromMaybe pStyleHang $ indentation pPr >>= hangingParIndent
+ in (left - hang) - (pStyleLeft - pStyleHang)
normalizeToClassName :: (FromStyleName a) => a -> T.Text
normalizeToClassName = T.map go . fromStyleName
@@ -578,7 +584,7 @@ bodyPartToBlocks (Paragraph pPr parparts)
local (\s -> s{ docxInBidi = True })
(bodyPartToBlocks (Paragraph pPr' parparts))
| isCodeDiv pPr = do
- transform <- parStyleToTransform pPr
+ transform <- paragraphStyleToTransform pPr
return $
transform $
codeBlock $
@@ -605,7 +611,7 @@ bodyPartToBlocks (Paragraph pPr parparts)
else prevParaIls <> space) <> ils'
handleInsertion = do
modify $ \s -> s {docxPrevPara = mempty}
- transform <- parStyleToTransform pPr'
+ transform <- paragraphStyleToTransform pPr'
return $ transform $ paraOrPlain ils''
opts <- asks docxOptions
case (pChange pPr', readerTrackChanges opts) of
@@ -620,7 +626,7 @@ bodyPartToBlocks (Paragraph pPr parparts)
, AllChanges) -> do
let attr = ("", ["paragraph-insertion"], addAuthorAndDate cAuthor cDate)
insertMark = spanWith attr mempty
- transform <- parStyleToTransform pPr'
+ transform <- paragraphStyleToTransform pPr'
return $ transform $
paraOrPlain $ ils'' <> insertMark
(Just (TrackedChange Deletion _), AcceptChanges) -> do
@@ -632,7 +638,7 @@ bodyPartToBlocks (Paragraph pPr parparts)
, AllChanges) -> do
let attr = ("", ["paragraph-deletion"], addAuthorAndDate cAuthor cDate)
insertMark = spanWith attr mempty
- transform <- parStyleToTransform pPr'
+ transform <- paragraphStyleToTransform pPr'
return $ transform $
paraOrPlain $ ils'' <> insertMark
_ -> handleInsertion
diff --git a/src/Text/Pandoc/Readers/Docx/Fields.hs b/src/Text/Pandoc/Readers/Docx/Fields.hs
index 442bc3466..5f090b6be 100644
--- a/src/Text/Pandoc/Readers/Docx/Fields.hs
+++ b/src/Text/Pandoc/Readers/Docx/Fields.hs
@@ -21,8 +21,11 @@ import Text.Parsec
import Text.Parsec.Text (Parser)
type URL = T.Text
+type Anchor = T.Text
data FieldInfo = HyperlinkField URL
+ -- The boolean indicates whether the field is a hyperlink.
+ | PagerefField Anchor Bool
| UnknownField
deriving (Show)
@@ -33,6 +36,8 @@ fieldInfo :: Parser FieldInfo
fieldInfo =
try (HyperlinkField <$> hyperlink)
<|>
+ try ((uncurry PagerefField) <$> pageref)
+ <|>
return UnknownField
escapedQuote :: Parser T.Text
@@ -72,3 +77,23 @@ hyperlink = do
("\\l", s) : _ -> farg <> "#" <> s
_ -> farg
return url
+
+-- See ยง17.16.5.45
+pagerefSwitch :: Parser (T.Text, T.Text)
+pagerefSwitch = do
+ sw <- string "\\h"
+ spaces
+ farg <- fieldArgument
+ return (T.pack sw, farg)
+
+pageref :: Parser (Anchor, Bool)
+pageref = do
+ many space
+ string "PAGEREF"
+ spaces
+ farg <- fieldArgument
+ switches <- spaces *> many pagerefSwitch
+ let isLink = case switches of
+ ("\\h", _) : _ -> True
+ _ -> False
+ return (farg, isLink)
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index dbb16a821..87a3aebef 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -50,6 +50,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
, archiveToDocxWithWarnings
, getStyleNames
, pHeading
+ , pStyleIndentation
, constructBogusParStyleData
, leftBiasedMergeRunStyle
, rowsToRowspans
@@ -92,14 +93,13 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes
deriving Show
data ReaderState = ReaderState { stateWarnings :: [T.Text]
- , stateFldCharState :: FldCharState
+ , stateFldCharState :: [FldCharState]
}
deriving Show
data FldCharState = FldCharOpen
| FldCharFieldInfo FieldInfo
- | FldCharContent FieldInfo [Run]
- | FldCharClosed
+ | FldCharContent FieldInfo [ParPart]
deriving (Show)
data DocxError = DocxError
@@ -194,11 +194,6 @@ data Notes = Notes NameSpaces
data Comments = Comments NameSpaces (M.Map T.Text Element)
deriving Show
-data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer
- , rightParIndent :: Maybe Integer
- , hangingParIndent :: Maybe Integer}
- deriving Show
-
data ChangeType = Insertion | Deletion
deriving Show
@@ -318,12 +313,13 @@ data ParPart = PlainRun Run
| CommentStart CommentId Author (Maybe CommentDate) [BodyPart]
| CommentEnd CommentId
| BookMark BookMarkId Anchor
- | InternalHyperLink Anchor [Run]
- | ExternalHyperLink URL [Run]
+ | InternalHyperLink Anchor [ParPart]
+ | ExternalHyperLink URL [ParPart]
| Drawing FilePath T.Text T.Text B.ByteString Extent -- title, alt
| Chart -- placeholder for now
+ | Diagram -- placeholder for now
| PlainOMath [Exp]
- | Field FieldInfo [Run]
+ | Field FieldInfo [ParPart]
| NullParPart -- when we need to return nothing, but
-- not because of an error.
deriving Show
@@ -333,6 +329,7 @@ data Run = Run RunStyle [RunElem]
| Endnote [BodyPart]
| InlineDrawing FilePath T.Text T.Text B.ByteString Extent -- title, alt
| InlineChart -- placeholder
+ | InlineDiagram -- placeholder
deriving Show
data RunElem = TextRun T.Text | LnBrk | Tab | SoftHyphen | NoBreakHyphen
@@ -375,7 +372,7 @@ archiveToDocxWithWarnings archive = do
, envDocXmlPath = docXmlPath
}
rState = ReaderState { stateWarnings = []
- , stateFldCharState = FldCharClosed
+ , stateFldCharState = []
}
(eitherDoc, st) = runD (archiveToDocument archive) rEnv rState
case eitherDoc of
@@ -437,6 +434,7 @@ getStyleNames = fmap getStyleName
constructBogusParStyleData :: ParaStyleName -> ParStyle
constructBogusParStyleData stName = ParStyle
{ headingLev = Nothing
+ , indent = Nothing
, numInfo = Nothing
, psParentStyle = Nothing
, pStyleName = stName
@@ -507,9 +505,7 @@ archiveToRelationships archive docXmlPath =
filePathIsMedia :: FilePath -> Bool
filePathIsMedia fp =
- let (dir, _) = splitFileName fp
- in
- (dir == "word/media/")
+ "media" `elem` splitDirectories (takeDirectory fp)
lookupLevel :: T.Text -> T.Text -> Numbering -> Maybe Level
lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
@@ -673,20 +669,6 @@ elemToCell ns element | isElem ns "w" "tc" element =
return $ Cell (fromMaybe 1 gridSpan) vMerge cellContents
elemToCell _ _ = throwError WrongElem
-elemToParIndentation :: NameSpaces -> Element -> Maybe ParIndentation
-elemToParIndentation ns element | isElem ns "w" "ind" element =
- Just ParIndentation {
- leftParIndent =
- findAttrByName ns "w" "left" element >>=
- stringToInteger
- , rightParIndent =
- findAttrByName ns "w" "right" element >>=
- stringToInteger
- , hangingParIndent =
- findAttrByName ns "w" "hanging" element >>=
- stringToInteger }
-elemToParIndentation _ _ = Nothing
-
testBitMask :: Text -> Int -> Bool
testBitMask bitMaskS n =
case (reads ("0x" ++ T.unpack bitMaskS) :: [(Int, String)]) of
@@ -699,6 +681,9 @@ pHeading = getParStyleField headingLev . pStyle
pNumInfo :: ParagraphStyle -> Maybe (T.Text, T.Text)
pNumInfo = getParStyleField numInfo . pStyle
+pStyleIndentation :: ParagraphStyle -> Maybe ParIndentation
+pStyleIndentation style = (getParStyleField indent . pStyle) style
+
elemToBodyPart :: NameSpaces -> Element -> D BodyPart
elemToBodyPart ns element
| isElem ns "w" "p" element
@@ -715,28 +700,31 @@ elemToBodyPart ns element
elemToBodyPart ns element
| isElem ns "w" "p" element = do
parstyle <- elemToParagraphStyle ns element <$> asks envParStyles
- parparts <- mapD (elemToParPart ns) (elChildren element)
+ parparts' <- mapD (elemToParPart ns) (elChildren element)
+ fldCharState <- gets stateFldCharState
+ modify $ \st -> st {stateFldCharState = emptyFldCharContents fldCharState}
-- Word uses list enumeration for numbered headings, so we only
-- want to infer a list from the styles if it is NOT a heading.
- case pHeading parstyle of
- Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do
- levelInfo <- lookupLevel numId lvl <$> asks envNumbering
- return $ ListItem parstyle numId lvl levelInfo parparts
- _ -> let
- hasCaptionStyle = elem "Caption" (pStyleId <$> pStyle parstyle)
-
- hasSimpleTableField = fromMaybe False $ do
- fldSimple <- findChildByName ns "w" "fldSimple" element
- instr <- findAttrByName ns "w" "instr" fldSimple
- pure ("Table" `elem` T.words instr)
-
- hasComplexTableField = fromMaybe False $ do
- instrText <- findElementByName ns "w" "instrText" element
- pure ("Table" `elem` T.words (strContent instrText))
-
- in if hasCaptionStyle && (hasSimpleTableField || hasComplexTableField)
- then return $ TblCaption parstyle parparts
- else return $ Paragraph parstyle parparts
+ let parparts = parparts' ++ (openFldCharsToParParts fldCharState) in
+ case pHeading parstyle of
+ Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do
+ levelInfo <- lookupLevel numId lvl <$> asks envNumbering
+ return $ ListItem parstyle numId lvl levelInfo parparts
+ _ -> let
+ hasCaptionStyle = elem "Caption" (pStyleId <$> pStyle parstyle)
+
+ hasSimpleTableField = fromMaybe False $ do
+ fldSimple <- findChildByName ns "w" "fldSimple" element
+ instr <- findAttrByName ns "w" "instr" fldSimple
+ pure ("Table" `elem` T.words instr)
+
+ hasComplexTableField = fromMaybe False $ do
+ instrText <- findElementByName ns "w" "instrText" element
+ pure ("Table" `elem` T.words (strContent instrText))
+
+ in if hasCaptionStyle && (hasSimpleTableField || hasComplexTableField)
+ then return $ TblCaption parstyle parparts
+ else return $ Paragraph parstyle parparts
elemToBodyPart ns element
| isElem ns "w" "tbl" element = do
@@ -768,14 +756,30 @@ lookupRelationship docLocation relid rels =
where
pairs = map (\(Relationship loc relid' target) -> ((loc, relid'), target)) rels
+openFldCharsToParParts :: [FldCharState] -> [ParPart]
+openFldCharsToParParts [] = []
+openFldCharsToParParts (FldCharContent info children : ancestors) = case openFldCharsToParParts ancestors of
+ Field parentInfo siblings : _ -> [Field parentInfo $ siblings ++ [Field info $ reverse children]]
+ _ -> [Field info $ reverse children]
+openFldCharsToParParts (_ : ancestors) = openFldCharsToParParts ancestors
+
+emptyFldCharContents :: [FldCharState] -> [FldCharState]
+emptyFldCharContents = map
+ (\x -> case x of
+ FldCharContent info _ -> FldCharContent info []
+ _ -> x)
+
expandDrawingId :: T.Text -> D (FilePath, B.ByteString)
expandDrawingId s = do
location <- asks envLocation
target <- asks (fmap T.unpack . lookupRelationship location s . envRelationships)
case target of
Just filepath -> do
- bytes <- asks (lookup ("word/" ++ filepath) . envMedia)
- case bytes of
+ media <- asks envMedia
+ let filepath' = case filepath of
+ ('/':rest) -> rest
+ _ -> "word/" ++ filepath
+ case lookup filepath' media of
Just bs -> return (filepath, bs)
Nothing -> throwError DocxError
Nothing -> throwError DocxError
@@ -789,44 +793,6 @@ getTitleAndAlt ns element =
in (title, alt)
elemToParPart :: NameSpaces -> Element -> D ParPart
-elemToParPart ns element
- | isElem ns "w" "r" element
- , Just drawingElem <- findChildByName ns "w" "drawing" element
- , pic_ns <- "http://schemas.openxmlformats.org/drawingml/2006/picture"
- , Just picElem <- findElement (QName "pic" (Just pic_ns) (Just "pic")) drawingElem
- = let (title, alt) = getTitleAndAlt ns drawingElem
- a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
- drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
- >>= findAttrByName ns "r" "embed"
- in
- case drawing of
- Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem)
- Nothing -> throwError WrongElem
--- The two cases below are an attempt to deal with images in deprecated vml format.
--- Todo: check out title and attr for deprecated format.
-elemToParPart ns element
- | isElem ns "w" "r" element
- , Just _ <- findChildByName ns "w" "pict" element =
- let drawing = findElement (elemName ns "v" "imagedata") element
- >>= findAttrByName ns "r" "id"
- in
- case drawing of
- Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing)
- Nothing -> throwError WrongElem
-elemToParPart ns element
- | isElem ns "w" "r" element
- , Just objectElem <- findChildByName ns "w" "object" element
- , Just shapeElem <- findChildByName ns "v" "shape" objectElem
- , Just imagedataElem <- findChildByName ns "v" "imagedata" shapeElem
- , Just drawingId <- findAttrByName ns "r" "id" imagedataElem
- = expandDrawingId drawingId >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing)
--- Chart
-elemToParPart ns element
- | isElem ns "w" "r" element
- , Just drawingElem <- findChildByName ns "w" "drawing" element
- , c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/chart"
- , Just _ <- findElement (QName "chart" (Just c_ns) (Just "c")) drawingElem
- = return Chart
{-
The next one is a bit complicated. fldChar fields work by first
having a <w:fldChar fldCharType="begin"> in a run, then a run with
@@ -858,8 +824,13 @@ example (omissions and my comments in brackets):
So we do this in a number of steps. If we encounter the fldchar begin
tag, we start open a fldchar state variable (see state above). We add
the instrtext to it as FieldInfo. Then we close that and start adding
-the runs when we get to separate. Then when we get to end, we produce
-the Field type with appropriate FieldInfo and Runs.
+the children when we get to separate. Then when we get to end, we produce
+the Field type with appropriate FieldInfo and ParParts.
+
+Since there can be nested fields, the fldchar state needs to be a stack,
+so we can have multiple fldchars open at the same time. When a fldchar is
+closed, we either add the resulting field to its parent or we return it if
+there is no parent.
-}
elemToParPart ns element
| isElem ns "w" "r" element
@@ -867,78 +838,142 @@ elemToParPart ns element
, Just fldCharType <- findAttrByName ns "w" "fldCharType" fldChar = do
fldCharState <- gets stateFldCharState
case fldCharState of
- FldCharClosed | fldCharType == "begin" -> do
- modify $ \st -> st {stateFldCharState = FldCharOpen}
+ _ | fldCharType == "begin" -> do
+ modify $ \st -> st {stateFldCharState = FldCharOpen : fldCharState}
+ return NullParPart
+ FldCharFieldInfo info : ancestors | fldCharType == "separate" -> do
+ modify $ \st -> st {stateFldCharState = FldCharContent info [] : ancestors}
return NullParPart
- FldCharFieldInfo info | fldCharType == "separate" -> do
- modify $ \st -> st {stateFldCharState = FldCharContent info []}
+ -- Some fields have no content, since Pandoc doesn't understand any of those fields, we can just close it.
+ FldCharFieldInfo _ : ancestors | fldCharType == "end" -> do
+ modify $ \st -> st {stateFldCharState = ancestors}
return NullParPart
- FldCharContent info runs | fldCharType == "end" -> do
- modify $ \st -> st {stateFldCharState = FldCharClosed}
- return $ Field info $ reverse runs
+ [FldCharContent info children] | fldCharType == "end" -> do
+ modify $ \st -> st {stateFldCharState = []}
+ return $ Field info $ reverse children
+ FldCharContent info children : FldCharContent parentInfo siblings : ancestors | fldCharType == "end" ->
+ let parent = FldCharContent parentInfo $ (Field info (reverse children)) : siblings in do
+ modify $ \st -> st {stateFldCharState = parent : ancestors}
+ return NullParPart
_ -> throwError WrongElem
elemToParPart ns element
| isElem ns "w" "r" element
, Just instrText <- findChildByName ns "w" "instrText" element = do
fldCharState <- gets stateFldCharState
case fldCharState of
- FldCharOpen -> do
+ FldCharOpen : ancestors -> do
info <- eitherToD $ parseFieldInfo $ strContent instrText
- modify $ \st -> st{stateFldCharState = FldCharFieldInfo info}
+ modify $ \st -> st {stateFldCharState = FldCharFieldInfo info : ancestors}
return NullParPart
_ -> return NullParPart
-elemToParPart ns element
+{-
+There is an open fldchar, so we calculate the element and add it to the
+children. For this we need to first change the fldchar state to an empty
+stack to avoid descendants of children simply being added to the state instead
+of to their direct parent element. This would happen in the case of a
+w:hyperlink element for example.
+-}
+elemToParPart ns element = do
+ fldCharState <- gets stateFldCharState
+ case fldCharState of
+ FldCharContent info children : ancestors -> do
+ modify $ \st -> st {stateFldCharState = []}
+ parPart <- elemToParPart' ns element `catchError` \_ -> return NullParPart
+ modify $ \st -> st{stateFldCharState = FldCharContent info (parPart : children) : ancestors}
+ return NullParPart
+ _ -> elemToParPart' ns element
+
+elemToParPart' :: NameSpaces -> Element -> D ParPart
+elemToParPart' ns element
+ | isElem ns "w" "r" element
+ , Just drawingElem <- findChildByName ns "w" "drawing" element
+ , pic_ns <- "http://schemas.openxmlformats.org/drawingml/2006/picture"
+ , Just picElem <- findElement (QName "pic" (Just pic_ns) (Just "pic")) drawingElem
+ = let (title, alt) = getTitleAndAlt ns drawingElem
+ a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
+ drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
+ >>= findAttrByName ns "r" "embed"
+ in
+ case drawing of
+ Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem)
+ Nothing -> throwError WrongElem
+-- The two cases below are an attempt to deal with images in deprecated vml format.
+-- Todo: check out title and attr for deprecated format.
+elemToParPart' ns element
+ | isElem ns "w" "r" element
+ , Just _ <- findChildByName ns "w" "pict" element =
+ let drawing = findElement (elemName ns "v" "imagedata") element
+ >>= findAttrByName ns "r" "id"
+ in
+ case drawing of
+ Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing)
+ Nothing -> throwError WrongElem
+elemToParPart' ns element
+ | isElem ns "w" "r" element
+ , Just objectElem <- findChildByName ns "w" "object" element
+ , Just shapeElem <- findChildByName ns "v" "shape" objectElem
+ , Just imagedataElem <- findChildByName ns "v" "imagedata" shapeElem
+ , Just drawingId <- findAttrByName ns "r" "id" imagedataElem
+ = expandDrawingId drawingId >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing)
+-- Diagram
+elemToParPart' ns element
+ | isElem ns "w" "r" element
+ , Just drawingElem <- findChildByName ns "w" "drawing" element
+ , d_ns <- "http://schemas.openxmlformats.org/drawingml/2006/diagram"
+ , Just _ <- findElement (QName "relIds" (Just d_ns) (Just "dgm")) drawingElem
+ = return Diagram
+-- Chart
+elemToParPart' ns element
+ | isElem ns "w" "r" element
+ , Just drawingElem <- findChildByName ns "w" "drawing" element
+ , c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/chart"
+ , Just _ <- findElement (QName "chart" (Just c_ns) (Just "c")) drawingElem
+ = return Chart
+elemToParPart' ns element
| isElem ns "w" "r" element = do
run <- elemToRun ns element
- -- we check to see if we have an open FldChar in state that we're
- -- recording.
- fldCharState <- gets stateFldCharState
- case fldCharState of
- FldCharContent info runs -> do
- modify $ \st -> st{stateFldCharState = FldCharContent info (run : runs)}
- return NullParPart
- _ -> return $ PlainRun run
-elemToParPart ns element
+ return $ PlainRun run
+elemToParPart' ns element
| Just change <- getTrackedChange ns element = do
runs <- mapD (elemToRun ns) (elChildren element)
return $ ChangedRuns change runs
-elemToParPart ns element
+elemToParPart' ns element
| isElem ns "w" "bookmarkStart" element
, Just bmId <- findAttrByName ns "w" "id" element
, Just bmName <- findAttrByName ns "w" "name" element =
return $ BookMark bmId bmName
-elemToParPart ns element
+elemToParPart' ns element
| isElem ns "w" "hyperlink" element
, Just relId <- findAttrByName ns "r" "id" element = do
location <- asks envLocation
- runs <- mapD (elemToRun ns) (elChildren element)
+ children <- mapD (elemToParPart ns) (elChildren element)
rels <- asks envRelationships
case lookupRelationship location relId rels of
Just target ->
case findAttrByName ns "w" "anchor" element of
- Just anchor -> return $ ExternalHyperLink (target <> "#" <> anchor) runs
- Nothing -> return $ ExternalHyperLink target runs
- Nothing -> return $ ExternalHyperLink "" runs
-elemToParPart ns element
+ Just anchor -> return $ ExternalHyperLink (target <> "#" <> anchor) children
+ Nothing -> return $ ExternalHyperLink target children
+ Nothing -> return $ ExternalHyperLink "" children
+elemToParPart' ns element
| isElem ns "w" "hyperlink" element
, Just anchor <- findAttrByName ns "w" "anchor" element = do
- runs <- mapD (elemToRun ns) (elChildren element)
- return $ InternalHyperLink anchor runs
-elemToParPart ns element
+ children <- mapD (elemToParPart ns) (elChildren element)
+ return $ InternalHyperLink anchor children
+elemToParPart' ns element
| isElem ns "w" "commentRangeStart" element
, Just cmtId <- findAttrByName ns "w" "id" element = do
(Comments _ commentMap) <- asks envComments
case M.lookup cmtId commentMap of
Just cmtElem -> elemToCommentStart ns cmtElem
Nothing -> throwError WrongElem
-elemToParPart ns element
+elemToParPart' ns element
| isElem ns "w" "commentRangeEnd" element
, Just cmtId <- findAttrByName ns "w" "id" element =
return $ CommentEnd cmtId
-elemToParPart ns element
+elemToParPart' ns element
| isElem ns "m" "oMath" element =
fmap PlainOMath (eitherToD $ readOMML $ showElement element)
-elemToParPart _ _ = throwError WrongElem
+elemToParPart' _ _ = throwError WrongElem
elemToCommentStart :: NameSpaces -> Element -> D ParPart
elemToCommentStart ns element
@@ -987,6 +1022,11 @@ childElemToRun ns element
, Just _ <- findElement (QName "chart" (Just c_ns) (Just "c")) element
= return InlineChart
childElemToRun ns element
+ | isElem ns "w" "drawing" element
+ , c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/diagram"
+ , Just _ <- findElement (QName "relIds" (Just c_ns) (Just "dgm")) element
+ = return InlineDiagram
+childElemToRun ns element
| isElem ns "w" "footnoteReference" element
, Just fnId <- findAttrByName ns "w" "id" element = do
notes <- asks envNotes
@@ -1071,8 +1111,7 @@ elemToParagraphStyle ns element sty
in ParagraphStyle
{pStyle = mapMaybe (`M.lookup` sty) style
, indentation =
- findChildByName ns "w" "ind" pPr >>=
- elemToParIndentation ns
+ getIndentation ns element
, dropCap =
case
findChildByName ns "w" "framePr" pPr >>=
diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
index 0d7271d6a..df942579a 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
@@ -21,6 +21,7 @@ module Text.Pandoc.Readers.Docx.Parse.Styles (
, CharStyle
, ParaStyleId(..)
, ParStyle(..)
+ , ParIndentation(..)
, RunStyle(..)
, HasStyleName
, StyleName
@@ -37,6 +38,7 @@ module Text.Pandoc.Readers.Docx.Parse.Styles (
, fromStyleName
, fromStyleId
, stringToInteger
+ , getIndentation
, getNumInfo
, elemToRunStyle
, defaultRunStyle
@@ -115,7 +117,13 @@ data RunStyle = RunStyle { isBold :: Maybe Bool
}
deriving Show
+data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer
+ , rightParIndent :: Maybe Integer
+ , hangingParIndent :: Maybe Integer}
+ deriving Show
+
data ParStyle = ParStyle { headingLev :: Maybe (ParaStyleName, Int)
+ , indent :: Maybe ParIndentation
, numInfo :: Maybe (T.Text, T.Text)
, psParentStyle :: Maybe ParStyle
, pStyleName :: ParaStyleName
@@ -290,6 +298,22 @@ getHeaderLevel ns element
, n > 0 = Just (styleName, fromInteger n)
getHeaderLevel _ _ = Nothing
+getIndentation :: NameSpaces -> Element -> Maybe ParIndentation
+getIndentation ns el = do
+ indElement <- findChildByName ns "w" "pPr" el >>=
+ findChildByName ns "w" "ind"
+ return $ ParIndentation
+ {
+ leftParIndent = findAttrByName ns "w" "left" indElement <|>
+ findAttrByName ns "w" "start" indElement >>=
+ stringToInteger
+ , rightParIndent = findAttrByName ns "w" "right" indElement <|>
+ findAttrByName ns "w" "end" indElement >>=
+ stringToInteger
+ , hangingParIndent = findAttrByName ns "w" "hanging" indElement >>=
+ stringToInteger
+ }
+
getElementStyleName :: Coercible T.Text a => NameSpaces -> Element -> Maybe a
getElementStyleName ns el = coerce <$>
((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val")
@@ -314,6 +338,7 @@ elemToParStyleData ns element parentStyle
= Just $ ParStyle
{
headingLev = getHeaderLevel ns element
+ , indent = getIndentation ns element
, numInfo = getNumInfo ns element
, psParentStyle = parentStyle
, pStyleName = styleName
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index fdf4f28e0..8aa2646b2 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -551,7 +551,7 @@ pFigure = try $ do
let caption = fromMaybe mempty mbcap
case B.toList <$> mbimg of
Just [Image attr _ (url, tit)] ->
- return $ B.para $ B.imageWith attr url ("fig:" <> tit) caption
+ return $ B.simpleFigureWith attr caption url tit
_ -> mzero
pCodeBlock :: PandocMonad m => TagParser m Blocks
@@ -643,7 +643,7 @@ pQ = do
case lookup "cite" attrs of
Just url -> do
let uid = fromMaybe mempty $
- lookup "name" attrs <> lookup "id" attrs
+ lookup "name" attrs <|> lookup "id" attrs
let cls = maybe [] T.words $ lookup "class" attrs
url' <- canonicalizeUrl url
makeQuote $ B.spanWith (uid, cls, [("cite", escapeURI url')])
@@ -705,20 +705,18 @@ pLineBreak = do
pLink :: PandocMonad m => TagParser m Inlines
pLink = try $ do
- tag <- pSatisfy $ tagOpenLit "a" (const True)
+ tag@(TagOpen _ attr') <- pSatisfy $ tagOpenLit "a" (const True)
let title = fromAttrib "title" tag
- -- take id from id attribute if present, otherwise name
- let uid = fromMaybe (fromAttrib "name" tag) $
- maybeFromAttrib "id" tag
- let cls = T.words $ fromAttrib "class" tag
+ let attr = toAttr $ filter (\(k,_) -> k /= "title" && k /= "href") attr'
lab <- mconcat <$> manyTill inline (pCloses "a")
-- check for href; if href, then a link, otherwise a span
case maybeFromAttrib "href" tag of
Nothing ->
- return $ extractSpaces (B.spanWith (uid, cls, [])) lab
+ return $ extractSpaces (B.spanWith attr) lab
Just url' -> do
url <- canonicalizeUrl url'
- return $ extractSpaces (B.linkWith (uid, cls, []) (escapeURI url) title) lab
+ return $ extractSpaces
+ (B.linkWith attr (escapeURI url) title) lab
pImage :: PandocMonad m => TagParser m Inlines
pImage = do
diff --git a/src/Text/Pandoc/Readers/HTML/Parsing.hs b/src/Text/Pandoc/Readers/HTML/Parsing.hs
index bd8d7c96c..a8cdf1de2 100644
--- a/src/Text/Pandoc/Readers/HTML/Parsing.hs
+++ b/src/Text/Pandoc/Readers/HTML/Parsing.hs
@@ -30,11 +30,11 @@ module Text.Pandoc.Readers.HTML.Parsing
)
where
-import Control.Monad (guard, void, mzero)
+import Control.Monad (void, mzero, mplus)
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import Text.HTML.TagSoup
- ( Attribute, Tag (..), isTagText, isTagPosition, isTagOpen, isTagClose, (~==) )
+ ( Attribute, Tag (..), isTagPosition, isTagOpen, isTagClose, (~==) )
import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
import Text.Pandoc.Definition (Attr)
import Text.Pandoc.Parsing
@@ -118,9 +118,11 @@ pCloses tagtype = try $ do
_ -> mzero
pBlank :: PandocMonad m => TagParser m ()
-pBlank = try $ do
- (TagText str) <- pSatisfy isTagText
- guard $ T.all isSpace str
+pBlank = void $ pSatisfy isBlank
+ where
+ isBlank (TagText t) = T.all isSpace t
+ isBlank (TagComment _) = True
+ isBlank _ = False
pLocation :: PandocMonad m => TagParser m ()
pLocation = do
@@ -218,9 +220,10 @@ maybeFromAttrib _ _ = Nothing
mkAttr :: [(Text, Text)] -> Attr
mkAttr attr = (attribsId, attribsClasses, attribsKV)
- where attribsId = fromMaybe "" $ lookup "id" attr
+ where attribsId = fromMaybe "" $ lookup "id" attr `mplus` lookup "name" attr
attribsClasses = T.words (fromMaybe "" $ lookup "class" attr) <> epubTypes
- attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr
+ attribsKV = filter (\(k,_) -> k /= "class" && k /= "id" && k /= "name")
+ attr
epubTypes = T.words $ fromMaybe "" $ lookup "epub:type" attr
toAttr :: [(Text, Text)] -> Attr
diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs
index 6e62e12f5..b23a2abc8 100644
--- a/src/Text/Pandoc/Readers/HTML/Table.hs
+++ b/src/Text/Pandoc/Readers/HTML/Table.hs
@@ -16,7 +16,7 @@ HTML table parser.
module Text.Pandoc.Readers.HTML.Table (pTable) where
import Control.Applicative ((<|>))
-import Data.Maybe (fromMaybe)
+import Data.Maybe (fromMaybe, isJust)
import Data.Either (lefts, rights)
import Data.List.NonEmpty (nonEmpty)
import Data.Text (Text)
@@ -27,12 +27,13 @@ import Text.Pandoc.Definition
import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
import Text.Pandoc.Parsing
( eof, lookAhead, many, many1, manyTill, option, optional
- , optionMaybe, skipMany, try)
+ , optionMaybe, skipMany, try )
import Text.Pandoc.Readers.HTML.Parsing
import Text.Pandoc.Readers.HTML.Types (TagParser)
import Text.Pandoc.Shared (onlySimpleTableCells, safeRead)
import qualified Data.Text as T
import qualified Text.Pandoc.Builder as B
+import Control.Monad (guard)
-- | Parses a @<col>@ element, returning the column's width.
-- An Either value is used: Left i means a "relative length" with
@@ -183,11 +184,13 @@ pTableBody :: PandocMonad m
-> TagParser m TableBody
pTableBody block = try $ do
skipMany pBlank
- attribs <- option [] $ getAttribs <$> pSatisfy (matchTagOpen "tbody" [])
- <* skipMany pBlank
+ mbattribs <- option Nothing $ Just . getAttribs <$>
+ pSatisfy (matchTagOpen "tbody" []) <* skipMany pBlank
bodyheads <- many (pHeaderRow block)
- (rowheads, rows) <- unzip <$> many1 (pRow block <* skipMany pBlank)
+ (rowheads, rows) <- unzip <$> many (pRow block <* skipMany pBlank)
optional $ pSatisfy (matchTagClose "tbody")
+ guard $ isJust mbattribs || not (null bodyheads && null rows)
+ let attribs = fromMaybe [] mbattribs
return $ TableBody (toAttr attribs) (foldr max 0 rowheads) bodyheads rows
where
getAttribs (TagOpen _ attribs) = attribs
diff --git a/src/Text/Pandoc/Readers/Ipynb.hs b/src/Text/Pandoc/Readers/Ipynb.hs
index cd1093109..8e742a888 100644
--- a/src/Text/Pandoc/Readers/Ipynb.hs
+++ b/src/Text/Pandoc/Readers/Ipynb.hs
@@ -19,6 +19,7 @@ import Data.Char (isDigit)
import Data.Maybe (fromMaybe)
import Data.Digest.Pure.SHA (sha1, showDigest)
import Text.Pandoc.Options
+import Control.Applicative ((<|>))
import qualified Data.Scientific as Scientific
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Logging
@@ -76,7 +77,10 @@ cellToBlocks opts lang c = do
let Source ts = cellSource c
let source = mconcat ts
let kvs = jsonMetaToPairs (cellMetadata c)
- let attachments = maybe mempty M.toList $ cellAttachments c
+ let attachments = case cellAttachments c of
+ Nothing -> mempty
+ Just (MimeAttachments m) -> M.toList m
+ let ident = fromMaybe mempty $ cellId c
mapM_ addAttachment attachments
case cellType c of
Ipynb.Markdown -> do
@@ -85,29 +89,34 @@ cellToBlocks opts lang c = do
else do
Pandoc _ bs <- walk fixImage <$> readMarkdown opts source
return bs
- return $ B.divWith ("",["cell","markdown"],kvs)
+ return $ B.divWith (ident,["cell","markdown"],kvs)
$ B.fromList bs
Ipynb.Heading lev -> do
Pandoc _ bs <- readMarkdown opts
(T.replicate lev "#" <> " " <> source)
- return $ B.divWith ("",["cell","markdown"],kvs)
+ return $ B.divWith (ident,["cell","markdown"],kvs)
$ B.fromList bs
Ipynb.Raw -> do
-- we use ipynb to indicate no format given (a wildcard in nbformat)
- let format = fromMaybe "ipynb" $ lookup "format" kvs
+ let format = fromMaybe "ipynb" $ lookup "raw_mimetype" kvs <|> lookup "format" kvs
let format' =
case format of
- "text/html" -> "html"
- "text/latex" -> "latex"
- "application/pdf" -> "latex"
- "text/markdown" -> "markdown"
- "text/x-rsrt" -> "rst"
- _ -> format
- return $ B.divWith ("",["cell","raw"],kvs) $ B.rawBlock format' source
+ "text/html" -> "html"
+ "slides" -> "html"
+ "text/latex" -> "latex"
+ "application/pdf" -> "latex"
+ "pdf" -> "latex"
+ "text/markdown" -> "markdown"
+ "text/x-rst" -> "rst"
+ "text/restructuredtext" -> "rst"
+ "text/asciidoc" -> "asciidoc"
+ _ -> format
+ return $ B.divWith (ident,["cell","raw"],kvs)
+ $ B.rawBlock format' source
Ipynb.Code{ codeOutputs = outputs, codeExecutionCount = ec } -> do
outputBlocks <- mconcat <$> mapM outputToBlock outputs
let kvs' = maybe kvs (\x -> ("execution_count", tshow x):kvs) ec
- return $ B.divWith ("",["cell","code"],kvs') $
+ return $ B.divWith (ident,["cell","code"],kvs') $
B.codeBlockWith ("",[lang],[]) source
<> outputBlocks
@@ -156,7 +165,7 @@ outputToBlock Err{ errName = ename,
-- the output format.
handleData :: PandocMonad m
=> JSONMeta -> MimeBundle -> m B.Blocks
-handleData metadata (MimeBundle mb) =
+handleData (JSONMeta metadata) (MimeBundle mb) =
mconcat <$> mapM dataBlock (M.toList mb)
where
@@ -192,6 +201,9 @@ handleData metadata (MimeBundle mb) =
dataBlock ("text/latex", TextualData t)
= return $ B.rawBlock "latex" t
+ dataBlock ("text/markdown", TextualData t)
+ = return $ B.rawBlock "markdown" t
+
dataBlock ("text/plain", TextualData t) =
return $ B.codeBlock t
@@ -201,7 +213,7 @@ handleData metadata (MimeBundle mb) =
dataBlock _ = return mempty
jsonMetaToMeta :: JSONMeta -> M.Map Text MetaValue
-jsonMetaToMeta = M.map valueToMetaValue
+jsonMetaToMeta (JSONMeta m) = M.map valueToMetaValue m
where
valueToMetaValue :: Value -> MetaValue
valueToMetaValue x@Object{} =
@@ -220,11 +232,11 @@ jsonMetaToMeta = M.map valueToMetaValue
valueToMetaValue Aeson.Null = MetaString ""
jsonMetaToPairs :: JSONMeta -> [(Text, Text)]
-jsonMetaToPairs = M.toList . M.map
+jsonMetaToPairs (JSONMeta m) = M.toList . M.map
(\case
String t
| not (T.all isDigit t)
, t /= "true"
, t /= "false"
-> t
- x -> T.pack $ UTF8.toStringLazy $ Aeson.encode x)
+ x -> T.pack $ UTF8.toStringLazy $ Aeson.encode x) $ m
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index 9cdbf1611..37e0d13bc 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -35,6 +35,7 @@ import Text.Pandoc.XML.Light
import qualified Data.Set as S (fromList, member)
import Data.Set ((\\))
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
+import qualified Data.Foldable as DF
type JATS m = StateT JATSState m
@@ -226,9 +227,19 @@ parseBlock (Elem e) =
mapM getInlines
(filterChildren (const True) t)
Nothing -> return mempty
- img <- getGraphic (Just (capt, attrValue "id" e)) g
- return $ para img
+
+ let figAttributes = DF.toList $
+ ("alt", ) . strContent <$>
+ filterChild (named "alt-text") e
+
+ return $ simpleFigureWith
+ (attrValue "id" e, [], figAttributes)
+ capt
+ (attrValue "href" g)
+ (attrValue "title" g)
+
_ -> divWith (attrValue "id" e, ["fig"], []) <$> getBlocks e
+
parseTable = do
let isCaption x = named "title" x || named "caption" x
capt <- case filterChild isCaption e of
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 27c018e73..20a2db76b 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -390,8 +390,8 @@ inlineCommands = M.unions
unescapeURL .
removeDoubleQuotes $ untokenize src)
-- hyperref
- , ("url", (\url -> link url "" (str url)) . unescapeURL . untokenize <$>
- bracedUrl)
+ , ("url", (\url -> linkWith ("",["uri"],[]) url "" (str url))
+ . unescapeURL . untokenize <$> bracedUrl)
, ("nolinkurl", code . unescapeURL . untokenize <$> bracedUrl)
, ("href", do url <- bracedUrl
sp
@@ -893,7 +893,7 @@ blockCommands = M.fromList
addMeta "bibliography" . splitBibs . untokenize))
, ("addbibresource", mempty <$ (skipopts *> braced >>=
addMeta "bibliography" . splitBibs . untokenize))
- , ("endinput", mempty <$ skipMany anyTok)
+ , ("endinput", mempty <$ skipSameFileToks)
-- includes
, ("lstinputlisting", inputListing)
, ("inputminted", inputMinted)
@@ -924,6 +924,10 @@ blockCommands = M.fromList
, ("epigraph", epigraph)
]
+skipSameFileToks :: PandocMonad m => LP m ()
+skipSameFileToks = do
+ pos <- getPosition
+ skipMany $ infile (sourceName pos)
environments :: PandocMonad m => M.Map Text (LP m Blocks)
environments = M.union (tableEnvironments blocks inline) $
@@ -970,6 +974,7 @@ environments = M.union (tableEnvironments blocks inline) $
, ("toggletrue", braced >>= setToggle True)
, ("togglefalse", braced >>= setToggle False)
, ("iftoggle", try $ ifToggle >> block)
+ , ("CSLReferences", braced >> braced >> env "CSLReferences" blocks)
]
filecontents :: PandocMonad m => LP m Blocks
@@ -1109,24 +1114,28 @@ figure = try $ do
addImageCaption :: PandocMonad m => Blocks -> LP m Blocks
addImageCaption = walkM go
- where go (Image attr@(_, cls, kvs) alt (src,tit))
+ where go p@(Para [Image attr@(_, cls, kvs) _ (src, tit)])
| not ("fig:" `T.isPrefixOf` tit) = do
st <- getState
- let (alt', tit') = case sCaption st of
- Just ils -> (toList ils, "fig:" <> tit)
- Nothing -> (alt, tit)
- attr' = case sLastLabel st of
- Just lab -> (lab, cls, kvs)
- Nothing -> attr
- case attr' of
- ("", _, _) -> return ()
- (ident, _, _) -> do
- num <- getNextNumber sLastFigureNum
- setState
- st{ sLastFigureNum = num
- , sLabels = M.insert ident
- [Str (renderDottedNum num)] (sLabels st) }
- return $ Image attr' alt' (src, tit')
+ case sCaption st of
+ Nothing -> return p
+ Just figureCaption -> do
+ let mblabel = sLastLabel st
+ let attr' = case mblabel of
+ Just lab -> (lab, cls, kvs)
+ Nothing -> attr
+ case attr' of
+ ("", _, _) -> return ()
+ (ident, _, _) -> do
+ num <- getNextNumber sLastFigureNum
+ setState
+ st{ sLastFigureNum = num
+ , sLabels = M.insert ident
+ [Str (renderDottedNum num)] (sLabels st) }
+
+ return $ SimpleFigure attr'
+ (maybe id removeLabel mblabel (B.toList figureCaption))
+ (src, tit)
go x = return x
coloredBlock :: PandocMonad m => Text -> LP m Blocks
diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
index 7b8bca4af..5938096fd 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Inline.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
@@ -35,7 +35,7 @@ import Text.Pandoc.Readers.LaTeX.Parsing
import Text.Pandoc.Extensions (extensionEnabled, Extension(..))
import Text.Pandoc.Parsing (getOption, updateState, getState, notFollowedBy,
manyTill, getInput, setInput, incSourceColumn,
- option, many1, try)
+ option, many1)
import Data.Char (isDigit)
import Text.Pandoc.Highlighting (fromListingsLanguage,)
import Data.Maybe (maybeToList, fromMaybe)
@@ -56,8 +56,7 @@ dolabel = do
let refstr = untokenize v
updateState $ \st ->
st{ sLastLabel = Just refstr }
- return $ spanWith (refstr,[],[("label", refstr)])
- $ inBrackets $ str $ untokenize v
+ return $ spanWith (refstr,[],[("label", refstr)]) mempty
doref :: PandocMonad m => Text -> LP m Inlines
doref cls = do
@@ -160,8 +159,8 @@ romanNumeralArg = spaces *> (parser <|> inBraces)
accentWith :: PandocMonad m
=> LP m Inlines -> Char -> Maybe Char -> LP m Inlines
-accentWith tok combiningAccent fallBack = try $ do
- ils <- tok
+accentWith tok combiningAccent fallBack = do
+ ils <- option mempty tok
case toList ils of
(Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $
-- try to normalize to the combined character:
@@ -339,6 +338,7 @@ refCommands = M.fromList
, ("cref", rawInlineOr "cref" $ doref "ref") -- from cleveref.sty
, ("vref", rawInlineOr "vref" $ doref "ref+page") -- from varioref.sty
, ("eqref", rawInlineOr "eqref" $ doref "eqref") -- from amsmath.sty
+ , ("autoref", rawInlineOr "autoref" $ doref "autoref") -- from hyperref.sty
]
acronymCommands :: PandocMonad m => M.Map Text (LP m Inlines)
diff --git a/src/Text/Pandoc/Readers/LaTeX/Macro.hs b/src/Text/Pandoc/Readers/LaTeX/Macro.hs
index 5495a8e74..d40277eb5 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Macro.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Macro.hs
@@ -15,6 +15,8 @@ import Control.Applicative ((<|>), optional)
import qualified Data.Map as M
import Data.Text (Text)
import qualified Data.Text as T
+import qualified Data.List.NonEmpty as NonEmpty
+import Data.List.NonEmpty (NonEmpty(..))
macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a
macroDef constructor = do
@@ -23,51 +25,91 @@ macroDef constructor = do
guardDisabled Ext_latex_macros)
<|> return mempty
where commandDef = do
- nameMacroPairs <- newcommand <|> letmacro <|> defmacro <|> newif
+ nameMacroPairs <- newcommand <|>
+ checkGlobal (letmacro <|> edefmacro <|> defmacro <|> newif)
guardDisabled Ext_latex_macros <|>
- mapM_ (\(name, macro') ->
- updateState (\s -> s{ sMacros = M.insert name macro'
- (sMacros s) })) nameMacroPairs
+ mapM_ insertMacro nameMacroPairs
environmentDef = do
mbenv <- newenvironment
case mbenv of
Nothing -> return ()
Just (name, macro1, macro2) ->
guardDisabled Ext_latex_macros <|>
- do updateState $ \s -> s{ sMacros =
- M.insert name macro1 (sMacros s) }
- updateState $ \s -> s{ sMacros =
- M.insert ("end" <> name) macro2 (sMacros s) }
+ do insertMacro (name, macro1)
+ insertMacro ("end" <> name, macro2)
-- @\newenvironment{envname}[n-args][default]{begin}{end}@
-- is equivalent to
-- @\newcommand{\envname}[n-args][default]{begin}@
-- @\newcommand{\endenvname}@
+insertMacro :: PandocMonad m => (Text, Macro) -> LP m ()
+insertMacro (name, macro'@(Macro GlobalScope _ _ _ _)) =
+ updateState $ \s ->
+ s{ sMacros = NonEmpty.map (M.insert name macro') (sMacros s) }
+insertMacro (name, macro'@(Macro GroupScope _ _ _ _)) =
+ updateState $ \s ->
+ s{ sMacros = M.insert name macro' (NonEmpty.head (sMacros s)) :|
+ NonEmpty.tail (sMacros s) }
+
+lookupMacro :: PandocMonad m => Text -> LP m Macro
+lookupMacro name = do
+ macros :| _ <- sMacros <$> getState
+ case M.lookup name macros of
+ Just m -> return m
+ Nothing -> fail "Macro not found"
+
letmacro :: PandocMonad m => LP m [(Text, Macro)]
letmacro = do
controlSeq "let"
- (name, contents) <- withVerbatimMode $ do
+ withVerbatimMode $ do
Tok _ (CtrlSeq name) _ <- anyControlSeq
optional $ symbol '='
spaces
-- we first parse in verbatim mode, and then expand macros,
-- because we don't want \let\foo\bar to turn into
-- \let\foo hello if we have previously \def\bar{hello}
+ target <- anyControlSeq <|> singleChar
+ case target of
+ (Tok _ (CtrlSeq name') _) ->
+ (do m <- lookupMacro name'
+ pure [(name, m)])
+ <|> pure [(name,
+ Macro GroupScope ExpandWhenDefined [] Nothing [target])]
+ _ -> pure [(name, Macro GroupScope ExpandWhenDefined [] Nothing [target])]
+
+checkGlobal :: PandocMonad m => LP m [(Text, Macro)] -> LP m [(Text, Macro)]
+checkGlobal p =
+ (controlSeq "global" *>
+ (map (\(n, Macro _ expand arg optarg contents) ->
+ (n, Macro GlobalScope expand arg optarg contents)) <$> p))
+ <|> p
+
+edefmacro :: PandocMonad m => LP m [(Text, Macro)]
+edefmacro = do
+ scope <- (GroupScope <$ controlSeq "edef")
+ <|> (GlobalScope <$ controlSeq "xdef")
+ (name, contents) <- withVerbatimMode $ do
+ Tok _ (CtrlSeq name) _ <- anyControlSeq
+ -- we first parse in verbatim mode, and then expand macros,
+ -- because we don't want \let\foo\bar to turn into
+ -- \let\foo hello if we have previously \def\bar{hello}
contents <- bracedOrToken
return (name, contents)
- contents' <- doMacros' 0 contents
- return [(name, Macro ExpandWhenDefined [] Nothing contents')]
+ -- expand macros
+ contents' <- parseFromToks (many anyTok) contents
+ return [(name, Macro scope ExpandWhenDefined [] Nothing contents')]
defmacro :: PandocMonad m => LP m [(Text, Macro)]
defmacro = do
-- we use withVerbatimMode, because macros are to be expanded
-- at point of use, not point of definition
- controlSeq "def"
+ scope <- (GroupScope <$ controlSeq "def")
+ <|> (GlobalScope <$ controlSeq "gdef")
withVerbatimMode $ do
Tok _ (CtrlSeq name) _ <- anyControlSeq
argspecs <- many (argspecArg <|> argspecPattern)
contents <- bracedOrToken
- return [(name, Macro ExpandWhenUsed argspecs Nothing contents)]
+ return [(name, Macro scope ExpandWhenUsed argspecs Nothing contents)]
-- \newif\iffoo' defines:
-- \iffoo to be \iffalse
@@ -82,16 +124,16 @@ newif = do
-- \def\footrue{\def\iffoo\iftrue}
-- \def\foofalse{\def\iffoo\iffalse}
let base = T.drop 2 name
- return [ (name, Macro ExpandWhenUsed [] Nothing
+ return [ (name, Macro GroupScope ExpandWhenUsed [] Nothing
[Tok pos (CtrlSeq "iffalse") "\\iffalse"])
, (base <> "true",
- Macro ExpandWhenUsed [] Nothing
+ Macro GroupScope ExpandWhenUsed [] Nothing
[ Tok pos (CtrlSeq "def") "\\def"
, Tok pos (CtrlSeq name) ("\\" <> name)
, Tok pos (CtrlSeq "iftrue") "\\iftrue"
])
, (base <> "false",
- Macro ExpandWhenUsed [] Nothing
+ Macro GroupScope ExpandWhenUsed [] Nothing
[ Tok pos (CtrlSeq "def") "\\def"
, Tok pos (CtrlSeq name) ("\\" <> name)
, Tok pos (CtrlSeq "iffalse") "\\iffalse"
@@ -138,14 +180,13 @@ newcommand = do
: (contents' ++
[ Tok pos Symbol "}", Tok pos Symbol "}" ])
_ -> contents'
- macros <- sMacros <$> getState
- case M.lookup name macros of
- Just macro
- | mtype == "newcommand" -> do
- report $ MacroAlreadyDefined txt pos
- return [(name, macro)]
- | mtype == "providecommand" -> return [(name, macro)]
- _ -> return [(name, Macro ExpandWhenUsed argspecs optarg contents)]
+ let macro = Macro GroupScope ExpandWhenUsed argspecs optarg contents
+ (do lookupMacro name
+ case mtype of
+ "providecommand" -> return []
+ "renewcommand" -> return [(name, macro)]
+ _ -> [] <$ report (MacroAlreadyDefined txt pos))
+ <|> pure [(name, macro)]
newenvironment :: PandocMonad m => LP m (Maybe (Text, Macro, Macro))
newenvironment = do
@@ -164,17 +205,23 @@ newenvironment = do
let argspecs = map (\i -> ArgNum i) [1..numargs]
startcontents <- spaces >> bracedOrToken
endcontents <- spaces >> bracedOrToken
- macros <- sMacros <$> getState
- case M.lookup name macros of
- Just _
- | mtype == "newenvironment" -> do
- report $ MacroAlreadyDefined name pos
- return Nothing
- | mtype == "provideenvironment" ->
- return Nothing
- _ -> return $ Just (name,
- Macro ExpandWhenUsed argspecs optarg startcontents,
- Macro ExpandWhenUsed [] Nothing endcontents)
+ -- we need the environment to be in a group so macros defined
+ -- inside behave correctly:
+ let bg = Tok pos (CtrlSeq "bgroup") "\\bgroup "
+ let eg = Tok pos (CtrlSeq "egroup") "\\egroup "
+ let result = (name,
+ Macro GroupScope ExpandWhenUsed argspecs optarg
+ (bg:startcontents),
+ Macro GroupScope ExpandWhenUsed [] Nothing
+ (endcontents ++ [eg]))
+ (do lookupMacro name
+ case mtype of
+ "provideenvironment" -> return Nothing
+ "renewenvironment" -> return (Just result)
+ _ -> do
+ report $ MacroAlreadyDefined name pos
+ return Nothing)
+ <|> return (Just result)
bracketedNum :: PandocMonad m => LP m Int
bracketedNum = do
diff --git a/src/Text/Pandoc/Readers/LaTeX/Math.hs b/src/Text/Pandoc/Readers/LaTeX/Math.hs
index 5b49a0376..01edce7ed 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Math.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Math.hs
@@ -142,14 +142,15 @@ newtheorem inline = do
theoremEnvironment :: PandocMonad m
=> LP m Blocks -> LP m Inlines -> Text -> LP m Blocks
theoremEnvironment blocks opt name = do
+ resetCaption
tmap <- sTheoremMap <$> getState
case M.lookup name tmap of
Nothing -> mzero
Just tspec -> do
optTitle <- option mempty $ (\x -> space <> "(" <> x <> ")") <$> opt
- mblabel <- option Nothing $ Just . untokenize <$>
- try (spaces >> controlSeq "label" >> spaces >> braced)
bs <- env name blocks
+ mblabel <- sLastLabel <$> getState
+
number <-
if theoremNumber tspec
then do
@@ -169,9 +170,7 @@ theoremEnvironment blocks opt name = do
Just ident ->
updateState $ \s ->
s{ sLabels = M.insert ident
- (B.toList $
- theoremName tspec <> "\160" <>
- str (renderDottedNum num)) (sLabels s) }
+ (B.toList $ str (renderDottedNum num)) (sLabels s) }
Nothing -> return ()
return $ space <> B.text (renderDottedNum num)
else return mempty
@@ -181,13 +180,14 @@ theoremEnvironment blocks opt name = do
RemarkStyle -> B.emph
let title = titleEmph (theoremName tspec <> number)
<> optTitle <> "." <> space
- return $ divWith (fromMaybe "" mblabel, [name], []) $ addTitle title
+ return $ divWith (fromMaybe "" mblabel, [name], [])
+ $ addTitle title
+ $ maybe id removeLabel mblabel
$ case theoremStyle tspec of
PlainStyle -> walk italicize bs
_ -> bs
-
proof :: PandocMonad m => LP m Blocks -> LP m Inlines -> LP m Blocks
proof blocks opt = do
title <- option (B.text "Proof") opt
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 9dac4d6ef..9eb4a0cbc 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -45,6 +45,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
, isNewlineTok
, isWordTok
, isArgTok
+ , infile
, spaces
, spaces1
, tokTypeIn
@@ -89,6 +90,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
, resetCaption
, env
, addMeta
+ , removeLabel
) where
import Control.Applicative (many, (<|>))
@@ -102,6 +104,9 @@ import qualified Data.IntMap as IntMap
import qualified Data.Map as M
import qualified Data.Set as Set
import Data.Text (Text)
+import Data.Maybe (fromMaybe)
+import Data.List.NonEmpty (NonEmpty(..))
+import qualified Data.List.NonEmpty as NonEmpty
import qualified Data.Text as T
import Text.Pandoc.Builder
import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
@@ -115,6 +120,7 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
ArgSpec (..), Tok (..), TokType (..))
import Text.Pandoc.Shared
import Text.Parsec.Pos
+import Text.Pandoc.Walk
newtype DottedNum = DottedNum [Int]
deriving (Show, Eq)
@@ -146,7 +152,7 @@ data TheoremSpec =
data LaTeXState = LaTeXState{ sOptions :: ReaderOptions
, sMeta :: Meta
, sQuoteContext :: QuoteContext
- , sMacros :: M.Map Text Macro
+ , sMacros :: NonEmpty (M.Map Text Macro)
, sContainers :: [Text]
, sLogMessages :: [LogMessage]
, sIdentifiers :: Set.Set Text
@@ -173,7 +179,7 @@ defaultLaTeXState :: LaTeXState
defaultLaTeXState = LaTeXState{ sOptions = def
, sMeta = nullMeta
, sQuoteContext = NoQuote
- , sMacros = M.empty
+ , sMacros = M.empty :| []
, sContainers = []
, sLogMessages = []
, sIdentifiers = Set.empty
@@ -220,8 +226,9 @@ instance HasIncludeFiles LaTeXState where
dropLatestIncludeFile s = s { sContainers = drop 1 $ sContainers s }
instance HasMacros LaTeXState where
- extractMacros st = sMacros st
- updateMacros f st = st{ sMacros = f (sMacros st) }
+ extractMacros st = NonEmpty.head $ sMacros st
+ updateMacros f st = st{ sMacros = f (NonEmpty.head (sMacros st))
+ :| NonEmpty.tail (sMacros st) }
instance HasReaderOptions LaTeXState where
extractReaderOptions = sOptions
@@ -254,7 +261,7 @@ rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s, Show a)
rawLaTeXParser toks retokenize parser valParser = do
pstate <- getState
let lstate = def{ sOptions = extractReaderOptions pstate }
- let lstate' = lstate { sMacros = extractMacros pstate }
+ let lstate' = lstate { sMacros = extractMacros pstate :| [] }
let setStartPos = case toks of
Tok pos _ _ : _ -> setPosition pos
_ -> return ()
@@ -267,14 +274,14 @@ rawLaTeXParser toks retokenize parser valParser = do
Right (endpos, toks') -> do
res <- lift $ runParserT (do when retokenize $ do
-- retokenize, applying macros
- ts <- many (satisfyTok (const True))
+ ts <- many anyTok
setInput ts
rawparser)
lstate' "chunk" toks'
case res of
Left _ -> mzero
Right ((val, raw), st) -> do
- updateState (updateMacros (sMacros st <>))
+ updateState (updateMacros ((NonEmpty.head (sMacros st)) <>))
let skipTilPos stopPos = do
anyChar
pos <- getPosition
@@ -296,10 +303,10 @@ rawLaTeXParser toks retokenize parser valParser = do
applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
=> Text -> ParserT Sources s m Text
applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
- do let retokenize = untokenize <$> many (satisfyTok (const True))
+ do let retokenize = untokenize <$> many anyTok
pstate <- getState
let lstate = def{ sOptions = extractReaderOptions pstate
- , sMacros = extractMacros pstate }
+ , sMacros = extractMacros pstate :| [] }
res <- runParserT retokenize lstate "math" (tokenize "math" s)
case res of
Left e -> Prelude.fail (show e)
@@ -552,10 +559,10 @@ doMacros' n inp =
handleMacros n' spos name ts = do
when (n' > 20) -- detect macro expansion loops
$ throwError $ PandocMacroLoop name
- macros <- sMacros <$> getState
+ (macros :| _ ) <- sMacros <$> getState
case M.lookup name macros of
Nothing -> trySpecialMacro name ts
- Just (Macro expansionPoint argspecs optarg newtoks) -> do
+ Just (Macro _scope expansionPoint argspecs optarg newtoks) -> do
let getargs' = do
args <-
(case expansionPoint of
@@ -642,6 +649,9 @@ isArgTok :: Tok -> Bool
isArgTok (Tok _ (Arg _) _) = True
isArgTok _ = False
+infile :: PandocMonad m => SourceName -> LP m Tok
+infile reference = satisfyTok (\(Tok source _ _) -> (sourceName source) == reference)
+
spaces :: PandocMonad m => LP m ()
spaces = skipMany (satisfyTok (tokTypeIn [Comment, Spaces, Newline]))
@@ -745,10 +755,22 @@ primEscape = do
bgroup :: PandocMonad m => LP m Tok
bgroup = try $ do
optional sp
- symbol '{' <|> controlSeq "bgroup" <|> controlSeq "begingroup"
+ t <- symbol '{' <|> controlSeq "bgroup" <|> controlSeq "begingroup"
+ -- Add a copy of the macro table to the top of the macro stack,
+ -- private for this group. We inherit all the macros defined in
+ -- the parent group.
+ updateState $ \s -> s{ sMacros = NonEmpty.cons (NonEmpty.head (sMacros s))
+ (sMacros s) }
+ return t
+
egroup :: PandocMonad m => LP m Tok
-egroup = symbol '}' <|> controlSeq "egroup" <|> controlSeq "endgroup"
+egroup = do
+ t <- symbol '}' <|> controlSeq "egroup" <|> controlSeq "endgroup"
+ -- remove the group's macro table from the stack
+ updateState $ \s -> s{ sMacros = fromMaybe (sMacros s) $
+ NonEmpty.nonEmpty (NonEmpty.tail (sMacros s)) }
+ return t
grouped :: (PandocMonad m, Monoid a) => LP m a -> LP m a
grouped parser = try $ do
@@ -921,6 +943,9 @@ getRawCommand name txt = do
void $ count 4 braced
"def" ->
void $ manyTill anyTok braced
+ "vadjust" ->
+ void (manyTill anyTok braced) <|>
+ void (satisfyTok isPreTok) -- see #7531
_ | isFontSizeCommand name -> return ()
| otherwise -> do
skipopts
@@ -928,6 +953,10 @@ getRawCommand name txt = do
void $ many braced
return $ txt <> untokenize rawargs
+isPreTok :: Tok -> Bool
+isPreTok (Tok _ Word "pre") = True
+isPreTok _ = False
+
isDigitTok :: Tok -> Bool
isDigitTok (Tok _ Word t) = T.all isDigit t
isDigitTok _ = False
@@ -1017,7 +1046,16 @@ resetCaption = updateState $ \st -> st{ sCaption = Nothing
, sLastLabel = Nothing }
env :: PandocMonad m => Text -> LP m a -> LP m a
-env name p = p <* end_ name
+env name p = do
+ -- environments are groups as far as macros are concerned,
+ -- so we need a local copy of the macro table (see above, bgroup, egroup):
+ updateState $ \s -> s{ sMacros = NonEmpty.cons (NonEmpty.head (sMacros s))
+ (sMacros s) }
+ result <- p
+ updateState $ \s -> s{ sMacros = fromMaybe (sMacros s) $
+ NonEmpty.nonEmpty (NonEmpty.tail (sMacros s)) }
+ end_ name
+ return result
tokWith :: PandocMonad m => LP m Inlines -> LP m Inlines
tokWith inlineParser = try $ spaces >>
@@ -1031,3 +1069,16 @@ tokWith inlineParser = try $ spaces >>
addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> LP m ()
addMeta field val = updateState $ \st ->
st{ sMeta = addMetaField field val $ sMeta st }
+
+-- remove label spans to avoid duplicated identifier
+removeLabel :: Walkable [Inline] a => Text -> a -> a
+removeLabel lbl = walk go
+ where
+ go (Span (_,_,kvs) _ : rest)
+ | Just lbl' <- lookup "label" kvs
+ , lbl' == lbl = go (dropWhile isSpaceOrSoftBreak rest)
+ go (x:xs) = x : go xs
+ go [] = []
+ isSpaceOrSoftBreak Space = True
+ isSpaceOrSoftBreak SoftBreak = True
+ isSpaceOrSoftBreak _ = False
diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
index b8bf0ce7f..e4738a763 100644
--- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
@@ -23,11 +23,15 @@ siunitxCommands :: PandocMonad m
=> LP m Inlines -> M.Map Text (LP m Inlines)
siunitxCommands tok = M.fromList
[ ("si", dosi tok)
+ , ("unit", dosi tok) -- v3 version of si
, ("SI", doSI tok)
+ , ("qty", doSI tok) -- v3 version of SI
, ("SIrange", doSIrange True tok)
+ , ("qtyrange", doSIrange True tok) -- v3 version of SIrange
+ , ("SIlist", doSIlist tok)
+ , ("qtylist", doSIlist tok) -- v3 version of SIlist
, ("numrange", doSIrange False tok)
, ("numlist", doSInumlist)
- , ("SIlist", doSIlist tok)
, ("num", doSInum)
, ("ang", doSIang)
]
diff --git a/src/Text/Pandoc/Readers/LaTeX/Table.hs b/src/Text/Pandoc/Readers/LaTeX/Table.hs
index f56728fe1..7d5c4f265 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Table.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Table.hs
@@ -368,7 +368,9 @@ addTableCaption = walkM go
((_,classes,kvs), Just ident) ->
(ident,classes,kvs)
_ -> attr
- return $ addAttrDiv attr' $ Table nullAttr capt spec th tb tf
+ return $ addAttrDiv attr'
+ $ maybe id removeLabel mblabel
+ $ Table nullAttr capt spec th tb tf
go x = return x
-- TODO: For now we add a Div to contain table attributes, since
diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs
index c20b72bc5..a4eae56db 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Types.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs
@@ -15,6 +15,7 @@ module Text.Pandoc.Readers.LaTeX.Types ( Tok(..)
, Macro(..)
, ArgSpec(..)
, ExpansionPoint(..)
+ , MacroScope(..)
, SourcePos
)
where
@@ -43,7 +44,10 @@ tokToText (Tok _ _ t) = t
data ExpansionPoint = ExpandWhenDefined | ExpandWhenUsed
deriving (Eq, Ord, Show)
-data Macro = Macro ExpansionPoint [ArgSpec] (Maybe [Tok]) [Tok]
+data MacroScope = GlobalScope | GroupScope
+ deriving (Eq, Ord, Show)
+
+data Macro = Macro MacroScope ExpansionPoint [ArgSpec] (Maybe [Tok]) [Tok]
deriving Show
data ArgSpec = ArgNum Int | Pattern [Tok]
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 2dc7ddf52..b5017a433 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns #-}
@@ -21,14 +22,14 @@ module Text.Pandoc.Readers.Markdown (
import Control.Monad
import Control.Monad.Except (throwError)
import Data.Char (isAlphaNum, isPunctuation, isSpace)
+import Text.DocLayout (realLength)
import Data.List (transpose, elemIndex, sortOn, foldl')
-import Data.List.NonEmpty (NonEmpty(..))
import qualified Data.Map as M
import Data.Maybe
import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as T
-import qualified Data.ByteString.Lazy as BL
+import qualified Data.ByteString as BS
import System.FilePath (addExtension, takeExtension, takeDirectory)
import qualified System.FilePath.Windows as Windows
import qualified System.FilePath.Posix as Posix
@@ -39,6 +40,7 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..), report)
import Text.Pandoc.Definition as Pandoc
import Text.Pandoc.Emoji (emojiToInline)
import Text.Pandoc.Error
+import Safe.Foldable (maximumBounded)
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Walk (walk)
@@ -72,14 +74,12 @@ readMarkdown opts s = do
yamlToMeta :: PandocMonad m
=> ReaderOptions
-> Maybe FilePath
- -> BL.ByteString
+ -> BS.ByteString
-> m Meta
yamlToMeta opts mbfp bstr = do
let parser = do
oldPos <- getPosition
- case mbfp of
- Nothing -> return ()
- Just fp -> setPosition $ initialPos fp
+ setPosition $ initialPos (fromMaybe "" mbfp)
meta <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks) bstr
setPosition oldPos
return $ runF meta defaultParserState
@@ -95,7 +95,7 @@ yamlToRefs :: PandocMonad m
=> (Text -> Bool)
-> ReaderOptions
-> Maybe FilePath
- -> BL.ByteString
+ -> BS.ByteString
-> m [MetaValue]
yamlToRefs idpred opts mbfp bstr = do
let parser = do
@@ -198,6 +198,7 @@ inlinesInBalancedBrackets =
go openBrackets =
(() <$ (escapedChar <|>
code <|>
+ math <|>
rawHtmlInline <|>
rawLaTeXInline') >> go openBrackets)
<|>
@@ -326,6 +327,7 @@ referenceKey :: PandocMonad m => MarkdownParser m (F Blocks)
referenceKey = try $ do
pos <- getPosition
skipNonindentSpaces
+ notFollowedBy (void cite)
(_,raw) <- reference
char ':'
skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[')
@@ -829,7 +831,7 @@ listLineCommon :: PandocMonad m => MarkdownParser m Text
listLineCommon = T.concat <$> manyTill
( many1Char (satisfy $ \c -> c `notElem` ['\n', '<', '`'])
<|> fmap snd (withRaw code)
- <|> fmap snd (htmlTag isCommentTag)
+ <|> fmap (renderTags . (:[]) . fst) (htmlTag isCommentTag)
<|> countChar 1 anyChar
) newline
@@ -1013,19 +1015,18 @@ normalDefinitionList = do
para :: PandocMonad m => MarkdownParser m (F Blocks)
para = try $ do
exts <- getOption readerExtensions
- let implicitFigures x
- | extensionEnabled Ext_implicit_figures exts = do
- x' <- x
- case B.toList x' of
- [Image attr alt (src,tit)]
- | not (null alt) ->
- -- the fig: at beginning of title indicates a figure
- return $ B.singleton
- $ Image attr alt (src, "fig:" <> tit)
- _ -> return x'
- | otherwise = x
- result <- implicitFigures . trimInlinesF <$> inlines1
- option (B.plain <$> result)
+
+ result <- trimInlinesF <$> inlines1
+ let figureOr constr inlns =
+ case B.toList inlns of
+ [Image attr figCaption (src, tit)]
+ | extensionEnabled Ext_implicit_figures exts
+ , not (null figCaption) -> do
+ B.simpleFigureWith attr (B.fromList figCaption) src tit
+
+ _ -> constr inlns
+
+ option (figureOr B.plain <$> result)
$ try $ do
newline
(mempty <$ blanklines)
@@ -1047,7 +1048,7 @@ para = try $ do
if divLevel > 0
then lookAhead divFenceEnd
else mzero
- return $ B.para <$> result
+ return $ figureOr B.para <$> result
plain :: PandocMonad m => MarkdownParser m (F Blocks)
plain = fmap B.plain . trimInlinesF <$> inlines1
@@ -1124,7 +1125,12 @@ rawHtmlBlocks = do
let selfClosing = "/>" `T.isSuffixOf` raw
-- we don't want '<td> text' to be a code block:
skipMany spaceChar
- indentlevel <- (blankline >> length <$> many (char ' ')) <|> return 0
+ tabStop <- getOption readerTabStop
+ indentlevel <- option 0 $
+ do blankline
+ sum <$> many ( (1 <$ char ' ')
+ <|>
+ (tabStop <$ char '\t') )
-- try to find closing tag
-- we set stateInHtmlBlock so that closing tags that can be either block or
-- inline will not be parsed as inline tags
@@ -1355,26 +1361,30 @@ pipeTable = try $ do
nonindentSpaces
lookAhead nonspaceChar
(heads,(aligns, seplengths)) <- (,) <$> pipeTableRow <*> pipeBreak
- let heads' = take (length aligns) <$> heads
+ let cellContents = parseFromString' pipeTableCell . trim
+ let numcols = length aligns
+ let heads' = take numcols heads
lines' <- many pipeTableRow
- let lines'' = map (take (length aligns) <$>) lines'
- let maxlength = maximum $
- fmap (\x -> T.length . stringify $ runF x def) (heads' :| lines'')
- numColumns <- getOption readerColumns
- let widths = if maxlength > numColumns
+ let lines'' = map (take numcols) lines'
+ let lineWidths = map (sum . map realLength) (heads' : lines'')
+ columns <- getOption readerColumns
+ -- add numcols + 1 for the pipes themselves
+ let widths = if maximumBounded (sum seplengths : lineWidths) + (numcols + 1) > columns
then map (\len ->
fromIntegral len / fromIntegral (sum seplengths))
seplengths
else replicate (length aligns) 0.0
- return (aligns, widths, toHeaderRow <$> heads', map toRow <$> sequence lines'')
+ (headCells :: F [Blocks]) <- sequence <$> mapM cellContents heads'
+ (rows :: F [[Blocks]]) <- sequence <$> mapM (fmap sequence . mapM cellContents) lines''
+ return (aligns, widths, toHeaderRow <$> headCells, map toRow <$> rows)
sepPipe :: PandocMonad m => MarkdownParser m ()
sepPipe = try $ do
char '|' <|> char '+'
notFollowedBy blankline
--- parse a row, also returning probable alignments for org-table cells
-pipeTableRow :: PandocMonad m => MarkdownParser m (F [Blocks])
+-- parse a row, returning raw cell contents
+pipeTableRow :: PandocMonad m => MarkdownParser m [Text]
pipeTableRow = try $ do
scanForPipe
skipMany spaceChar
@@ -1382,13 +1392,11 @@ pipeTableRow = try $ do
-- split into cells
let chunk = void (code <|> math <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline')
<|> void (noneOf "|\n\r")
- let cellContents = withRaw (many chunk) >>=
- parseFromString' pipeTableCell . trim . snd
- cells <- cellContents `sepEndBy1` char '|'
+ cells <- (snd <$> withRaw (many chunk)) `sepEndBy1` char '|'
-- surrounding pipes needed for a one-column table:
guard $ not (length cells == 1 && not openPipe)
blankline
- return $ sequence cells
+ return cells
pipeTableCell :: PandocMonad m => MarkdownParser m (F Blocks)
pipeTableCell =
@@ -1692,21 +1700,29 @@ strikeout = fmap B.strikeout <$>
superscript :: PandocMonad m => MarkdownParser m (F Inlines)
superscript = do
- guardEnabled Ext_superscript
fmap B.superscript <$> try (do
char '^'
- mconcat <$> many1Till (do notFollowedBy spaceChar
- notFollowedBy newline
- inline) (char '^'))
+ mconcat <$> (try regularSuperscript <|> try mmdShortSuperscript))
+ where regularSuperscript = many1Till (do guardEnabled Ext_superscript
+ notFollowedBy spaceChar
+ notFollowedBy newline
+ inline) (char '^')
+ mmdShortSuperscript = do guardEnabled Ext_short_subsuperscripts
+ result <- T.pack <$> many1 alphaNum
+ return $ return $ return $ B.str result
subscript :: PandocMonad m => MarkdownParser m (F Inlines)
subscript = do
- guardEnabled Ext_subscript
fmap B.subscript <$> try (do
char '~'
- mconcat <$> many1Till (do notFollowedBy spaceChar
- notFollowedBy newline
- inline) (char '~'))
+ mconcat <$> (try regularSubscript <|> mmdShortSubscript))
+ where regularSubscript = many1Till (do guardEnabled Ext_subscript
+ notFollowedBy spaceChar
+ notFollowedBy newline
+ inline) (char '~')
+ mmdShortSubscript = do guardEnabled Ext_short_subsuperscripts
+ result <- T.pack <$> many1 alphaNum
+ return $ return $ return $ B.str result
whitespace :: PandocMonad m => MarkdownParser m (F Inlines)
whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
@@ -1768,7 +1784,6 @@ endline = try $ do
reference :: PandocMonad m => MarkdownParser m (F Inlines, Text)
reference = do
guardDisabled Ext_footnotes <|> notFollowedBy' (string "[^")
- guardDisabled Ext_citations <|> notFollowedBy' (string "[@")
withRaw $ trimInlinesF <$> inlinesInBalancedBrackets
parenthesizedChars :: PandocMonad m => MarkdownParser m Text
@@ -2187,6 +2202,7 @@ normalCite = try $ do
citations <- citeList
spnl
char ']'
+ notFollowedBy (oneOf "{([") -- not a link or a bracketed span
return citations
suffix :: PandocMonad m => MarkdownParser m (F Inlines)
@@ -2200,7 +2216,7 @@ suffix = try $ do
prefix :: PandocMonad m => MarkdownParser m (F Inlines)
prefix = trimInlinesF . mconcat <$>
- manyTill inline (char ']'
+ manyTill (notFollowedBy (char ';') >> inline) (char ']'
<|> lookAhead
(try $ do optional (try (char ';' >> spnl))
citeKey True
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 825e4a2eb..9348a8053 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -201,7 +201,12 @@ para = do
contents <- trimInlines . mconcat <$> many1 inline
if F.all (==Space) contents
then return mempty
- else return $ B.para contents
+ else case B.toList contents of
+ -- For the MediaWiki format all images are considered figures
+ [Image attr figureCaption (src, title)] ->
+ return $ B.simpleFigureWith
+ attr (B.fromList figureCaption) src title
+ _ -> return $ B.para contents
table :: PandocMonad m => MWParser m Blocks
table = do
@@ -631,7 +636,7 @@ image = try $ do
let attr = ("", [], kvs)
caption <- (B.str fname <$ sym "]]")
<|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]")))
- return $ B.imageWith attr fname ("fig:" <> stringify caption) caption
+ return $ B.imageWith attr fname (stringify caption) caption
imageOption :: PandocMonad m => MWParser m Text
imageOption = try $ char '|' *> opt
diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs
index cbc523b25..7991dca5c 100644
--- a/src/Text/Pandoc/Readers/Metadata.hs
+++ b/src/Text/Pandoc/Readers/Metadata.hs
@@ -17,102 +17,62 @@ module Text.Pandoc.Readers.Metadata (
yamlMetaBlock,
yamlMap ) where
-import Control.Monad
+
import Control.Monad.Except (throwError)
-import qualified Data.ByteString.Lazy as BL
+import qualified Data.ByteString as B
import qualified Data.Map as M
-import Data.Maybe
import Data.Text (Text)
import qualified Data.Text as T
-import qualified Data.YAML as YAML
-import qualified Data.YAML.Event as YE
+import qualified Data.Yaml as Yaml
+import Data.Aeson (Value(..), Object, Result(..), fromJSON, (.:?), withObject)
+import Data.Aeson.Types (parse)
+import Text.Pandoc.Shared (tshow)
import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
-import Text.Pandoc.Definition
+import Text.Pandoc.Definition hiding (Null)
import Text.Pandoc.Error
-import Text.Pandoc.Parsing hiding (tableWith)
-import Text.Pandoc.Shared
-import qualified Data.Text.Lazy as TL
+import Text.Pandoc.Parsing hiding (tableWith, parse)
+
+
import qualified Text.Pandoc.UTF8 as UTF8
yamlBsToMeta :: (PandocMonad m, HasLastStrPosition st)
=> ParserT Sources st m (Future st MetaValue)
- -> BL.ByteString
+ -> B.ByteString
-> ParserT Sources st m (Future st Meta)
yamlBsToMeta pMetaValue bstr = do
- case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
- Right (YAML.Doc (YAML.Mapping _ _ o):_)
- -> fmap Meta <$> yamlMap pMetaValue o
+ case Yaml.decodeAllEither' bstr of
+ Right (Object o:_) -> fmap Meta <$> yamlMap pMetaValue o
Right [] -> return . return $ mempty
- Right [YAML.Doc (YAML.Scalar _ YAML.SNull)]
- -> return . return $ mempty
- -- the following is what we get from a comment:
- Right [YAML.Doc (YAML.Scalar _ (YAML.SUnknown _ ""))]
- -> return . return $ mempty
+ Right [Null] -> return . return $ mempty
Right _ -> Prelude.fail "expected YAML object"
- Left (yamlpos, err')
- -> do pos <- getPosition
- setPosition $ incSourceLine
- (setSourceColumn pos (YE.posColumn yamlpos))
- (YE.posLine yamlpos - 1)
- Prelude.fail err'
-
-fakePos :: YAML.Pos
-fakePos = YAML.Pos (-1) (-1) 1 0
-
-lookupYAML :: Text
- -> YAML.Node YE.Pos
- -> Maybe (YAML.Node YE.Pos)
-lookupYAML t (YAML.Mapping _ _ m) =
- M.lookup (YAML.Scalar fakePos (YAML.SUnknown YE.untagged t)) m
- `mplus`
- M.lookup (YAML.Scalar fakePos (YAML.SStr t)) m
-lookupYAML _ _ = Nothing
+ Left err' -> do
+ throwError $ PandocParseError
+ $ T.pack $ Yaml.prettyPrintParseException err'
-- Returns filtered list of references.
yamlBsToRefs :: (PandocMonad m, HasLastStrPosition st)
=> ParserT Sources st m (Future st MetaValue)
-> (Text -> Bool) -- ^ Filter for id
- -> BL.ByteString
+ -> B.ByteString
-> ParserT Sources st m (Future st [MetaValue])
yamlBsToRefs pMetaValue idpred bstr =
- case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
- Right (YAML.Doc o@YAML.Mapping{}:_)
- -> case lookupYAML "references" o of
- Just (YAML.Sequence _ _ ns) -> do
- let g n = case lookupYAML "id" n of
- Just n' ->
- case nodeToKey n' of
- Nothing -> False
- Just t -> idpred t ||
- case lookupYAML "other-ids" n of
- Just (YAML.Sequence _ _ ns') ->
- let ts' = mapMaybe nodeToKey ns'
- in any idpred ts'
- _ -> False
- Nothing -> False
- sequence <$>
- mapM (yamlToMetaValue pMetaValue) (filter g ns)
- Just _ ->
- Prelude.fail "expecting sequence in 'references' field"
- Nothing ->
- Prelude.fail "expecting 'references' field"
-
- Right [] -> return . return $ mempty
- Right [YAML.Doc (YAML.Scalar _ YAML.SNull)]
- -> return . return $ mempty
- Right _ -> Prelude.fail "expecting YAML object"
- Left (yamlpos, err')
- -> do pos <- getPosition
- setPosition $ incSourceLine
- (setSourceColumn pos (YE.posColumn yamlpos))
- (YE.posLine yamlpos - 1)
- Prelude.fail err'
-
-
-nodeToKey :: YAML.Node YE.Pos -> Maybe Text
-nodeToKey (YAML.Scalar _ (YAML.SStr t)) = Just t
-nodeToKey (YAML.Scalar _ (YAML.SUnknown _ t)) = Just t
-nodeToKey _ = Nothing
+ case Yaml.decodeAllEither' bstr of
+ Right (Object m : _) -> do
+ let isSelected (String t) = idpred t
+ isSelected _ = False
+ let hasSelectedId (Object o) =
+ case parse (withObject "ref" (.:? "id")) (Object o) of
+ Success (Just id') -> isSelected id'
+ _ -> False
+ hasSelectedId _ = False
+ case parse (withObject "metadata" (.:? "references")) (Object m) of
+ Success (Just refs) -> sequence <$>
+ mapM (yamlToMetaValue pMetaValue) (filter hasSelectedId refs)
+ _ -> return $ return []
+ Right _ -> return . return $ []
+ Left err' -> do
+ throwError $ PandocParseError
+ $ T.pack $ Yaml.prettyPrintParseException err'
normalizeMetaValue :: (PandocMonad m, HasLastStrPosition st)
=> ParserT Sources st m (Future st MetaValue)
@@ -133,47 +93,36 @@ normalizeMetaValue pMetaValue x =
isSpaceChar '\t' = True
isSpaceChar _ = False
-checkBoolean :: Text -> Maybe Bool
-checkBoolean t
- | t == T.pack "true" || t == T.pack "True" || t == T.pack "TRUE" = Just True
- | t == T.pack "false" || t == T.pack "False" || t == T.pack "FALSE" = Just False
- | otherwise = Nothing
-
yamlToMetaValue :: (PandocMonad m, HasLastStrPosition st)
=> ParserT Sources st m (Future st MetaValue)
- -> YAML.Node YE.Pos
+ -> Value
-> ParserT Sources st m (Future st MetaValue)
-yamlToMetaValue pMetaValue (YAML.Scalar _ x) =
- case x of
- YAML.SStr t -> normalizeMetaValue pMetaValue t
- YAML.SBool b -> return $ return $ MetaBool b
- YAML.SFloat d -> return $ return $ MetaString $ tshow d
- YAML.SInt i -> return $ return $ MetaString $ tshow i
- YAML.SUnknown _ t ->
- case checkBoolean t of
- Just b -> return $ return $ MetaBool b
- Nothing -> normalizeMetaValue pMetaValue t
- YAML.SNull -> return $ return $ MetaString ""
-
-yamlToMetaValue pMetaValue (YAML.Sequence _ _ xs) =
- fmap MetaList . sequence
- <$> mapM (yamlToMetaValue pMetaValue) xs
-yamlToMetaValue pMetaValue (YAML.Mapping _ _ o) =
- fmap MetaMap <$> yamlMap pMetaValue o
-yamlToMetaValue _ _ = return $ return $ MetaString ""
+yamlToMetaValue pMetaValue v =
+ case v of
+ String t -> normalizeMetaValue pMetaValue t
+ Bool b -> return $ return $ MetaBool b
+ Number d -> normalizeMetaValue pMetaValue $
+ case fromJSON v of
+ Success (x :: Int) -> tshow x
+ _ -> tshow d
+ Null -> return $ return $ MetaString ""
+ Array{} -> do
+ case fromJSON v of
+ Error err' -> throwError $ PandocParseError $ T.pack err'
+ Success xs -> fmap MetaList . sequence <$>
+ mapM (yamlToMetaValue pMetaValue) xs
+ Object o -> fmap MetaMap <$> yamlMap pMetaValue o
yamlMap :: (PandocMonad m, HasLastStrPosition st)
=> ParserT Sources st m (Future st MetaValue)
- -> M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos)
+ -> Object
-> ParserT Sources st m (Future st (M.Map Text MetaValue))
yamlMap pMetaValue o = do
- kvs <- forM (M.toList o) $ \(key, v) -> do
- k <- maybe (throwError $ PandocParseError
- "Non-string key in YAML mapping")
- return $ nodeToKey key
- return (k, v)
- let kvs' = filter (not . ignorable . fst) kvs
- fmap M.fromList . sequence <$> mapM toMeta kvs'
+ case fromJSON (Object o) of
+ Error err' -> throwError $ PandocParseError $ T.pack err'
+ Success (m' :: M.Map Text Value) -> do
+ let kvs = filter (not . ignorable . fst) $ M.toList m'
+ fmap M.fromList . sequence <$> mapM toMeta kvs
where
ignorable t = "_" `T.isSuffixOf` t
toMeta (k, v) = do
@@ -194,7 +143,7 @@ yamlMetaBlock parser = try $ do
-- by including --- and ..., we allow yaml blocks with just comments:
let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."]))
optional blanklines
- yamlBsToMeta parser $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
+ yamlBsToMeta parser $ UTF8.fromText rawYaml
stopLine :: Monad m => ParserT Sources st m ()
stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index f18d2f9a7..9a689b0e8 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -474,15 +474,16 @@ figure = try $ do
figCaption = fromMaybe mempty $ blockAttrCaption figAttrs
figKeyVals = blockAttrKeyValues figAttrs
attr = (figLabel, mempty, figKeyVals)
- figTitle = (if isFigure then withFigPrefix else id) figName
- in
- B.para . B.imageWith attr imgSrc figTitle <$> figCaption
-
- withFigPrefix :: Text -> Text
- withFigPrefix cs =
- if "fig:" `T.isPrefixOf` cs
- then cs
- else "fig:" <> cs
+ in if isFigure
+ then (\c ->
+ B.simpleFigureWith
+ attr c imgSrc (unstackFig figName)) <$> figCaption
+ else B.para . B.imageWith attr imgSrc figName <$> figCaption
+ unstackFig :: Text -> Text
+ unstackFig figName =
+ if "fig:" `T.isPrefixOf` figName
+ then T.drop 4 figName
+ else figName
-- | Succeeds if looking at the end of the current paragraph
endOfParagraph :: Monad m => OrgParser m ()
@@ -889,7 +890,10 @@ listItem parseIndentedMarker = try . withContext ListItemState $ do
firstLine <- anyLineNewline
blank <- option "" ("\n" <$ blankline)
rest <- T.concat <$> many (listContinuation markerLength)
- contents <- parseFromString blocks $ firstLine <> blank <> rest
+ contents <- parseFromString (do initial <- paraOrPlain <|> pure mempty
+ subsequent <- blocks
+ return $ initial <> subsequent)
+ (firstLine <> blank <> rest)
return (maybe id (prependInlines . checkboxToInlines) box <$> contents)
-- | Prepend inlines to blocks, adding them to the first paragraph or
diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
index 2dcbecb1d..1c4f253cc 100644
--- a/src/Text/Pandoc/Readers/Org/DocumentTree.hs
+++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
@@ -41,6 +41,7 @@ documentTree :: PandocMonad m
-> OrgParser m (F Inlines)
-> OrgParser m (F Headline)
documentTree blocks inline = do
+ properties <- option mempty propertiesDrawer
initialBlocks <- blocks
headlines <- sequence <$> manyTill (headline blocks inline 1) eof
title <- fmap docTitle . orgStateMeta <$> getState
@@ -54,7 +55,7 @@ documentTree blocks inline = do
, headlineText = B.fromList title'
, headlineTags = mempty
, headlinePlanning = emptyPlanning
- , headlineProperties = mempty
+ , headlineProperties = properties
, headlineContents = initialBlocks'
, headlineChildren = headlines'
}
@@ -163,8 +164,15 @@ unprunedHeadlineToBlocks hdln st =
in if not usingSelectedTags ||
any (`Set.member` orgStateSelectTags st) (headlineTags rootNode')
then do headlineBlocks <- headlineToBlocks rootNode'
+ -- add metadata from root node :PROPERTIES:
+ updateState $ \s ->
+ s{ orgStateMeta = foldr
+ (\(PropertyKey k, PropertyValue v) m ->
+ B.setMeta k v <$> m)
+ (orgStateMeta s)
+ (headlineProperties rootNode') }
-- ignore first headline, it's the document's title
- return . drop 1 . B.toList $ headlineBlocks
+ return $ drop 1 $ B.toList headlineBlocks
else do headlineBlocks <- mconcat <$> mapM headlineToBlocks
(headlineChildren rootNode')
return . B.toList $ headlineBlocks
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index 6862dd71e..617f98a10 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -31,11 +31,10 @@ import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
import Text.TeXMath (DisplayType (..), readTeX, writePandoc)
import Text.Pandoc.Sources (ToSources(..))
import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
-
-import Control.Monad (guard, mplus, mzero, unless, void, when)
+import Safe (lastMay)
+import Control.Monad (guard, mplus, mzero, unless, when, void)
import Control.Monad.Trans (lift)
import Data.Char (isAlphaNum, isSpace)
-import Data.List (intersperse)
import qualified Data.Map as M
import Data.Text (Text)
import qualified Data.Text as T
@@ -148,31 +147,177 @@ endline = try $ do
-- Citations
--
--- The state of citations is a bit confusing due to the lack of an official
--- syntax and multiple syntaxes coexisting. The pandocOrgCite syntax was the
--- first to be implemented here and is almost identical to Markdown's citation
--- syntax. The org-ref package is in wide use to handle citations, but the
--- syntax is a bit limiting and not quite as simple to write. The
--- semi-official Org-mode citation syntax is based on John MacFarlane's Pandoc
--- sytax and Org-oriented enhancements contributed by Richard Lawrence and
--- others. It's dubbed Berkeley syntax due the place of activity of its main
--- contributors. All this should be consolidated once an official Org-mode
--- citation syntax has emerged.
+-- We first try to parse official org-cite citations, then fall
+-- back to org-ref citations (which are still in wide use).
+
+-- | A citation in org-cite style
+orgCite :: PandocMonad m => OrgParser m (F [Citation])
+orgCite = try $ do
+ string "[cite"
+ (sty, _variants) <- citeStyle
+ char ':'
+ spnl
+ globalPref <- option mempty (try (citePrefix <* char ';'))
+ items <- citeItems
+ globalSuff <- option mempty (try (char ';' *> citeSuffix))
+ spnl
+ char ']'
+ return $ adjustCiteStyle sty .
+ addPrefixToFirstItem globalPref .
+ addSuffixToLastItem globalSuff $ items
+
+adjustCiteStyle :: CiteStyle -> (F [Citation]) -> (F [Citation])
+adjustCiteStyle sty cs = do
+ cs' <- cs
+ case cs' of
+ [] -> return []
+ (d:ds) -- TODO needs refinement
+ -> case sty of
+ TextStyle -> return $ d{ citationMode = AuthorInText
+ , citationSuffix = dropWhile (== Space)
+ (citationSuffix d)} : ds
+ NoAuthorStyle -> return $ d{ citationMode = SuppressAuthor } : ds
+ _ -> return (d:ds)
+
+addPrefixToFirstItem :: (F Inlines) -> (F [Citation]) -> (F [Citation])
+addPrefixToFirstItem aff cs = do
+ cs' <- cs
+ aff' <- aff
+ case cs' of
+ [] -> return []
+ (d:ds) -> return (d{ citationPrefix =
+ B.toList aff' <> citationPrefix d }:ds)
+
+addSuffixToLastItem :: (F Inlines) -> (F [Citation]) -> (F [Citation])
+addSuffixToLastItem aff cs = do
+ cs' <- cs
+ aff' <- aff
+ case lastMay cs' of
+ Nothing -> return cs'
+ Just d ->
+ return (init cs' ++ [d{ citationSuffix =
+ citationSuffix d <> B.toList aff' }])
+
+citeItems :: PandocMonad m => OrgParser m (F [Citation])
+citeItems = sequence <$> citeItem `sepBy1` (char ';')
+
+citeItem :: PandocMonad m => OrgParser m (F Citation)
+citeItem = do
+ pref <- citePrefix
+ itemKey <- orgCiteKey
+ suff <- citeSuffix
+ return $ do
+ pre' <- pref
+ suf' <- suff
+ return Citation
+ { citationId = itemKey
+ , citationPrefix = B.toList pre'
+ , citationSuffix = B.toList suf'
+ , citationMode = NormalCitation
+ , citationNoteNum = 0
+ , citationHash = 0
+ }
+
+orgCiteKey :: PandocMonad m => OrgParser m Text
+orgCiteKey = do
+ char '@'
+ T.pack <$> many1 (satisfy orgCiteKeyChar)
+
+orgCiteKeyChar :: Char -> Bool
+orgCiteKeyChar c =
+ isAlphaNum c || c `elem` ['.',':','?','!','`','\'','/','*','@','+','|',
+ '(',')','{','}','<','>','&','_','^','$','#',
+ '%','~','-']
+
+rawAffix :: PandocMonad m => Bool -> OrgParser m Text
+rawAffix isPrefix = snd <$> withRaw
+ (many
+ (affixChar
+ <|>
+ try (void (char '[' >> rawAffix isPrefix >> char ']'))))
+ where
+ affixChar = void $ satisfy $ \c ->
+ not (c == '^' || c == ';' || c == '[' || c == ']') &&
+ (not isPrefix || c /= '@')
+
+citePrefix :: PandocMonad m => OrgParser m (F Inlines)
+citePrefix =
+ rawAffix True >>= parseFromString (trimInlinesF . mconcat <$> many inline)
+
+citeSuffix :: PandocMonad m => OrgParser m (F Inlines)
+citeSuffix =
+ rawAffix False >>= parseFromString parseSuffix
+ where
+ parseSuffix = do
+ hasSpace <- option False
+ (True <$ try (spaceChar >> skipSpaces >> lookAhead nonspaceChar))
+ ils <- trimInlinesF . mconcat <$> many inline
+ return $ if hasSpace
+ then (B.space <>) <$> ils
+ else ils
+
+citeStyle :: PandocMonad m => OrgParser m (CiteStyle, [CiteVariant])
+citeStyle = option (DefStyle, []) $ do
+ sty <- option DefStyle $ try $ char '/' *> orgCiteStyle
+ variants <- option [] $ try $ char '/' *> orgCiteVariants
+ return (sty, variants)
+
+orgCiteStyle :: PandocMonad m => OrgParser m CiteStyle
+orgCiteStyle = choice $ map try
+ [ NoAuthorStyle <$ string "noauthor"
+ , NoAuthorStyle <$ string "na"
+ , LocatorsStyle <$ string "locators"
+ , LocatorsStyle <$ char 'l'
+ , NociteStyle <$ string "nocite"
+ , NociteStyle <$ char 'n'
+ , TextStyle <$ string "text"
+ , TextStyle <$ char 't'
+ ]
+
+orgCiteVariants :: PandocMonad m => OrgParser m [CiteVariant]
+orgCiteVariants =
+ (fullnameVariant `sepBy1` (char '-')) <|> (many1 onecharVariant)
+ where
+ fullnameVariant = choice $ map try
+ [ Bare <$ string "bare"
+ , Caps <$ string "caps"
+ , Full <$ string "full"
+ ]
+ onecharVariant = choice
+ [ Bare <$ char 'b'
+ , Caps <$ char 'c'
+ , Full <$ char 'f'
+ ]
+
+data CiteStyle =
+ NoAuthorStyle
+ | LocatorsStyle
+ | NociteStyle
+ | TextStyle
+ | DefStyle
+ deriving Show
+
+data CiteVariant =
+ Caps
+ | Bare
+ | Full
+ deriving Show
+
+
+spnl :: PandocMonad m => OrgParser m ()
+spnl =
+ skipSpaces *> optional (newline *> notFollowedBy blankline *> skipSpaces)
cite :: PandocMonad m => OrgParser m (F Inlines)
-cite = try $ berkeleyCite <|> do
+cite = do
guardEnabled Ext_citations
- (cs, raw) <- withRaw $ choice
- [ pandocOrgCite
+ (cs, raw) <- withRaw $ try $ choice
+ [ orgCite
, orgRefCite
- , berkeleyTextualCite
]
return $ flip B.cite (B.text raw) <$> cs
--- | A citation in Pandoc Org-mode style (@[prefix \@citekey suffix]@).
-pandocOrgCite :: PandocMonad m => OrgParser m (F [Citation])
-pandocOrgCite = try $
- char '[' *> skipSpaces *> citeList <* skipSpaces <* char ']'
+-- org-ref
orgRefCite :: PandocMonad m => OrgParser m (F [Citation])
orgRefCite = try $ choice
@@ -201,100 +346,6 @@ normalOrgRefCite = try $ do
, citationHash = 0
}
--- | Read an Berkeley-style Org-mode citation. Berkeley citation style was
--- develop and adjusted to Org-mode style by John MacFarlane and Richard
--- Lawrence, respectively, both philosophers at UC Berkeley.
-berkeleyCite :: PandocMonad m => OrgParser m (F Inlines)
-berkeleyCite = try $ do
- bcl <- berkeleyCitationList
- return $ do
- parens <- berkeleyCiteParens <$> bcl
- prefix <- berkeleyCiteCommonPrefix <$> bcl
- suffix <- berkeleyCiteCommonSuffix <$> bcl
- citationList <- berkeleyCiteCitations <$> bcl
- return $
- if parens
- then toCite
- . maybe id (alterFirst . prependPrefix) prefix
- . maybe id (alterLast . appendSuffix) suffix
- $ citationList
- else maybe mempty (<> " ") prefix
- <> toListOfCites (map toInTextMode citationList)
- <> maybe mempty (", " <>) suffix
- where
- toCite :: [Citation] -> Inlines
- toCite cs = B.cite cs mempty
-
- toListOfCites :: [Citation] -> Inlines
- toListOfCites = mconcat . intersperse ", " . map (\c -> B.cite [c] mempty)
-
- toInTextMode :: Citation -> Citation
- toInTextMode c = c { citationMode = AuthorInText }
-
- alterFirst, alterLast :: (a -> a) -> [a] -> [a]
- alterFirst _ [] = []
- alterFirst f (c:cs) = f c : cs
- alterLast f = reverse . alterFirst f . reverse
-
- prependPrefix, appendSuffix :: Inlines -> Citation -> Citation
- prependPrefix pre c = c { citationPrefix = B.toList pre <> citationPrefix c }
- appendSuffix suf c = c { citationSuffix = citationSuffix c <> B.toList suf }
-
-data BerkeleyCitationList = BerkeleyCitationList
- { berkeleyCiteParens :: Bool
- , berkeleyCiteCommonPrefix :: Maybe Inlines
- , berkeleyCiteCommonSuffix :: Maybe Inlines
- , berkeleyCiteCitations :: [Citation]
- }
-berkeleyCitationList :: PandocMonad m => OrgParser m (F BerkeleyCitationList)
-berkeleyCitationList = try $ do
- char '['
- parens <- choice [ False <$ berkeleyBareTag, True <$ berkeleyParensTag ]
- char ':'
- skipSpaces
- commonPrefix <- optionMaybe (try $ citationListPart <* char ';')
- citations <- citeList
- commonSuffix <- optionMaybe (try citationListPart)
- char ']'
- return (BerkeleyCitationList parens
- <$> sequence commonPrefix
- <*> sequence commonSuffix
- <*> citations)
- where
- citationListPart :: PandocMonad m => OrgParser m (F Inlines)
- citationListPart = fmap (trimInlinesF . mconcat) . try . many1 $ do
- notFollowedBy' $ citeKey False
- notFollowedBy (oneOf ";]")
- inline
-
-berkeleyBareTag :: PandocMonad m => OrgParser m ()
-berkeleyBareTag = try $ void berkeleyBareTag'
-
-berkeleyParensTag :: PandocMonad m => OrgParser m ()
-berkeleyParensTag = try . void $ enclosedByPair1 '(' ')' berkeleyBareTag'
-
-berkeleyBareTag' :: PandocMonad m => OrgParser m ()
-berkeleyBareTag' = try $ void (string "cite")
-
-berkeleyTextualCite :: PandocMonad m => OrgParser m (F [Citation])
-berkeleyTextualCite = try $ do
- (suppressAuthor, key) <- citeKey False
- returnF . return $ Citation
- { citationId = key
- , citationPrefix = mempty
- , citationSuffix = mempty
- , citationMode = if suppressAuthor then SuppressAuthor else AuthorInText
- , citationNoteNum = 0
- , citationHash = 0
- }
-
--- The following is what a Berkeley-style bracketed textual citation parser
--- would look like. However, as these citations are a subset of Pandoc's Org
--- citation style, this isn't used.
--- berkeleyBracketedTextualCite :: PandocMonad m => OrgParser m (F [Citation])
--- berkeleyBracketedTextualCite = try . (fmap head) $
--- enclosedByPair1 '[' ']' berkeleyTextualCite
-
-- | Read a link-like org-ref style citation. The citation includes pre and
-- post text. However, multiple citations are not possible due to limitations
-- in the syntax.
@@ -345,39 +396,6 @@ orgRefCiteMode =
, ("citeyear", SuppressAuthor)
]
-citeList :: PandocMonad m => OrgParser m (F [Citation])
-citeList = sequence <$> sepEndBy1 citation (try $ char ';' *> skipSpaces)
-
-citation :: PandocMonad m => OrgParser m (F Citation)
-citation = try $ do
- pref <- prefix
- (suppress_author, key) <- citeKey False
- suff <- suffix
- return $ do
- x <- pref
- y <- suff
- return Citation
- { citationId = key
- , citationPrefix = B.toList x
- , citationSuffix = B.toList y
- , citationMode = if suppress_author
- then SuppressAuthor
- else NormalCitation
- , citationNoteNum = 0
- , citationHash = 0
- }
- where
- prefix = trimInlinesF . mconcat <$>
- manyTill inline (char ']' <|> (']' <$ lookAhead (citeKey False)))
- suffix = try $ do
- hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
- skipSpaces
- rest <- trimInlinesF . mconcat <$>
- many (notFollowedBy (oneOf ";]") *> inline)
- return $ if hasSpace
- then (B.space <>) <$> rest
- else rest
-
footnote :: PandocMonad m => OrgParser m (F Inlines)
footnote = try $ do
note <- inlineNote <|> referencedNote
diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs
index a1b21046a..ccb6744e7 100644
--- a/src/Text/Pandoc/Readers/Org/Meta.hs
+++ b/src/Text/Pandoc/Readers/Org/Meta.hs
@@ -27,13 +27,13 @@ import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Shared (blocksToInlines, safeRead)
+import Text.Pandoc.Network.HTTP (urlEncode)
import Control.Monad (mzero, void)
import Data.List (intercalate, intersperse)
import Data.Map (Map)
import Data.Maybe (fromMaybe)
import Data.Text (Text)
-import Network.HTTP (urlEncode)
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Text as T
@@ -188,7 +188,7 @@ parseFormat = try $ replacePlain <|> replaceUrl <|> justAppend
-- inefficient
replacePlain = try $ (\x -> T.concat . flip intersperse x)
<$> sequence [tillSpecifier 's', rest]
- replaceUrl = try $ (\x -> T.concat . flip intersperse x . T.pack . urlEncode . T.unpack)
+ replaceUrl = try $ (\x -> T.concat . flip intersperse x . urlEncode)
<$> sequence [tillSpecifier 'h', rest]
justAppend = try $ (<>) <$> rest
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 3990f0cb5..88471eb0a 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -466,14 +466,11 @@ includeDirective top fields body = do
let classes = maybe [] T.words (lookup "class" fields)
let ident = maybe "" trimr $ lookup "name" fields
let parser =
- case lookup "code" fields of
+ case lookup "code" fields `mplus` lookup "literal" fields of
Just lang ->
(codeblock ident classes fields (trimr lang) False
. sourcesToText) <$> getInput
- Nothing ->
- case lookup "literal" fields of
- Just _ -> B.rawBlock "rst" . sourcesToText <$> getInput
- Nothing -> parseBlocks
+ Nothing -> parseBlocks
let isLiteral = isJust (lookup "code" fields `mplus` lookup "literal" fields)
let selectLines =
(case trim <$> lookup "end-before" fields of
@@ -728,8 +725,8 @@ directive' = do
"figure" -> do
(caption, legend) <- parseFromString' extractCaption body'
let src = escapeURI $ trim top
- return $ B.para (B.imageWith (imgAttr "figclass") src "fig:"
- caption) <> legend
+ return $ B.simpleFigureWith
+ (imgAttr "figclass") caption src "" <> legend
"image" -> do
let src = escapeURI $ trim top
let alt = B.str $ maybe "image" trim $ lookup "alt" fields
@@ -922,14 +919,22 @@ addNewRole roleText fields = do
(baseRole, baseFmt, baseAttr) =
getBaseRole (parentRole, Nothing, nullAttr) customRoles
fmt = if parentRole == "raw" then lookup "format" fields else baseFmt
- annotate :: [Text] -> [Text]
- annotate = maybe id (:) $
- if baseRole == "code"
- then lookup "language" fields
- else Nothing
- attr = let (ident, classes, keyValues) = baseAttr
- -- nub in case role name & language class are the same
- in (ident, nub . (role :) . annotate $ classes, keyValues)
+
+ updateClasses :: [Text] -> [Text]
+ updateClasses oldClasses = let
+
+ codeLanguageClass = if baseRole == "code"
+ then maybeToList (lookup "language" fields)
+ else []
+
+ -- if no ":class:" field is given, the default is the role name
+ classFieldClasses = maybe [role] T.words (lookup "class" fields)
+
+ -- nub in case role name & language class are the same
+ in nub (classFieldClasses ++ codeLanguageClass ++ oldClasses)
+
+ attr = let (ident, baseClasses, keyValues) = baseAttr
+ in (ident, updateClasses baseClasses, keyValues)
-- warn about syntax we ignore
forM_ fields $ \(key, _) -> case key of
@@ -1158,10 +1163,11 @@ referenceNames = do
let rn = try $ do
string ".. _"
ref <- quotedReferenceName
- <|> manyChar ( noneOf ":\n"
+ <|> manyChar ( noneOf "\\:\n"
<|> try (char '\n' <*
string " " <*
notFollowedBy blankline)
+ <|> try (char '\\' *> char ':')
<|> try (char ':' <* lookAhead alphaNum)
)
char ':'
diff --git a/src/Text/Pandoc/Readers/RTF.hs b/src/Text/Pandoc/Readers/RTF.hs
new file mode 100644
index 000000000..3938681f4
--- /dev/null
+++ b/src/Text/Pandoc/Readers/RTF.hs
@@ -0,0 +1,1351 @@
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+ Module : Text.Pandoc.Readers.RTF
+ Copyright : Copyright (C) 2021 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane (<jgm@berkeley.edu>)
+ Stability : alpha
+ Portability : portable
+
+Conversion of RTF documents 'Pandoc' document.
+We target version 1.5 of the RTF spec.
+-}
+module Text.Pandoc.Readers.RTF (readRTF) where
+
+import qualified Data.IntMap as IntMap
+import qualified Data.Sequence as Seq
+import Control.Monad
+import Control.Monad.Except (throwError)
+import Data.List (find, foldl')
+import Data.Word (Word8, Word16)
+import Data.Default
+import Data.Text (Text)
+import qualified Data.Text as T
+import qualified Data.Text.Read as TR
+import Text.Pandoc.Builder (Blocks, Inlines)
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Class.PandocMonad (PandocMonad (..), insertMedia)
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing
+import Text.Pandoc.Shared (safeRead, tshow)
+import Data.Char (isAlphaNum, chr, isAscii, isLetter, isSpace, ord)
+import qualified Data.ByteString.Lazy as BL
+import Data.Digest.Pure.SHA (sha1, showDigest)
+import Data.Maybe (mapMaybe, fromMaybe)
+import Safe (lastMay, initSafe, headDef)
+-- import Debug.Trace
+
+-- TODO:
+-- [ ] more complex table features
+--
+
+-- | Read RTF from an input string and return a Pandoc document.
+readRTF :: (PandocMonad m, ToSources a)
+ => ReaderOptions
+ -> a
+ -> m Pandoc
+readRTF opts s = do
+ let sources = toSources s
+ parsed <- readWithM parseRTF def{ sOptions = opts } sources
+ case parsed of
+ Left e -> throwError e
+ Right d -> return d
+
+data CharSet = ANSI | Mac | Pc | Pca
+ deriving (Show, Eq)
+
+-- first index is the list (or override) id, second is the list level
+type ListTable = IntMap.IntMap ListLevelTable
+type ListLevelTable = IntMap.IntMap ListType
+
+data RTFState = RTFState { sOptions :: ReaderOptions
+ , sCharSet :: CharSet
+ , sGroupStack :: [Properties]
+ , sListStack :: [List]
+ , sCurrentCell :: Blocks
+ , sTableRows :: [TableRow] -- reverse order
+ , sTextContent :: [(Properties, Text)]
+ , sMetadata :: [(Text, Inlines)]
+ , sFontTable :: FontTable
+ , sStylesheet :: Stylesheet
+ , sListTable :: ListTable
+ , sListOverrideTable :: ListTable
+ , sEatChars :: Int
+ } deriving (Show)
+
+instance Default RTFState where
+ def = RTFState { sOptions = def
+ , sCharSet = ANSI
+ , sGroupStack = []
+ , sListStack = []
+ , sCurrentCell = mempty
+ , sTableRows = []
+ , sTextContent = []
+ , sMetadata = []
+ , sFontTable = mempty
+ , sStylesheet = mempty
+ , sListTable = mempty
+ , sListOverrideTable = mempty
+ , sEatChars = 0
+ }
+
+type FontTable = IntMap.IntMap FontFamily
+
+data FontFamily =
+ Roman | Swiss | Modern | Script | Decor | Tech | Bidi
+ deriving (Show, Eq)
+
+data StyleType = ParagraphStyle | SectionStyle | CharStyle | TableStyle
+ deriving (Show, Eq)
+
+data Style =
+ Style { styleNum :: Int
+ , styleType :: StyleType
+ , styleBasedOn :: Maybe Int
+ , styleName :: Text
+ , styleFormatting :: [Tok]
+ } deriving (Show, Eq)
+
+type Stylesheet = IntMap.IntMap Style
+
+data PictType =
+ Emfblip | Pngblip | Jpegblip
+ deriving (Show, Eq)
+
+data Pict =
+ Pict { picType :: Maybe PictType
+ , picWidth :: Maybe Int
+ , picHeight :: Maybe Int
+ , picWidthGoal :: Maybe Int
+ , picHeightGoal :: Maybe Int
+ , picBinary :: Bool
+ , picData :: Text
+ , picName :: Text
+ , picBytes :: BL.ByteString
+ } deriving (Show, Eq)
+
+instance Default Pict where
+ def = Pict { picType = Nothing
+ , picWidth = Nothing
+ , picHeight = Nothing
+ , picWidthGoal = Nothing
+ , picHeightGoal = Nothing
+ , picBinary = False
+ , picData = mempty
+ , picName = mempty
+ , picBytes = mempty }
+
+data Properties =
+ Properties
+ { gBold :: Bool
+ , gItalic :: Bool
+ , gCaps :: Bool
+ , gDeleted :: Bool
+ , gSub :: Bool
+ , gSuper :: Bool
+ , gSmallCaps :: Bool
+ , gUnderline :: Bool
+ , gHyperlink :: Maybe Text
+ , gAnchor :: Maybe Text
+ , gImage :: Maybe Pict
+ , gFontFamily :: Maybe FontFamily
+ , gHidden :: Bool
+ , gUC :: Int -- number of ansi chars to skip after unicode char
+ , gFootnote :: Maybe Blocks
+ , gOutlineLevel :: Maybe ListLevel
+ , gListOverride :: Maybe Override
+ , gListLevel :: Maybe Int
+ , gInTable :: Bool
+ } deriving (Show, Eq)
+
+instance Default Properties where
+ def = Properties { gBold = False
+ , gItalic = False
+ , gCaps = False
+ , gDeleted = False
+ , gSub = False
+ , gSuper = False
+ , gSmallCaps = False
+ , gUnderline = False
+ , gHyperlink = Nothing
+ , gAnchor = Nothing
+ , gImage = Nothing
+ , gFontFamily = Nothing
+ , gHidden = False
+ , gUC = 1
+ , gFootnote = Nothing
+ , gOutlineLevel = Nothing
+ , gListOverride = Nothing
+ , gListLevel = Nothing
+ , gInTable = False
+ }
+
+type RTFParser m = ParserT Sources RTFState m
+
+data ListType = Bullet | Ordered ListAttributes
+ deriving (Show, Eq)
+
+type Override = Int
+
+type ListLevel = Int
+
+data List =
+ List Override ListLevel ListType [Blocks] -- items in reverse order
+ deriving (Show, Eq)
+
+newtype TableRow = TableRow [Blocks] -- cells in reverse order
+ deriving (Show, Eq)
+
+parseRTF :: PandocMonad m => RTFParser m Pandoc
+parseRTF = do
+ skipMany nl
+ toks <- many tok
+ -- return $! traceShowId toks
+ bs <- (case toks of
+ -- if we start with {\rtf1...}, parse that and ignore
+ -- what follows (which in certain cases can be non-RTF content)
+ rtftok@(Tok _ (Grouped (Tok _ (ControlWord "rtf" (Just 1)) : _))) : _
+ -> foldM processTok mempty [rtftok]
+ _ -> foldM processTok mempty toks)
+ >>= emitBlocks
+ unclosed <- closeContainers
+ let doc = B.doc $ bs <> unclosed
+ kvs <- sMetadata <$> getState
+ pure $ foldr (uncurry B.setMeta) doc kvs
+
+data Tok = Tok SourcePos TokContents
+ deriving (Show, Eq)
+
+data TokContents =
+ ControlWord Text (Maybe Int)
+ | ControlSymbol Char
+ | UnformattedText Text
+ | BinData BL.ByteString
+ | HexVal Word8
+ | Grouped [Tok]
+ deriving (Show, Eq)
+
+tok :: PandocMonad m => RTFParser m Tok
+tok = do
+ pos <- getPosition
+ Tok pos <$> ((controlThing <|> unformattedText <|> grouped) <* skipMany nl)
+ where
+ controlThing = do
+ char '\\' *>
+ ( binData
+ <|> (ControlWord <$> letterSequence <*> (parameter <* optional delimChar))
+ <|> (HexVal <$> hexVal)
+ <|> (ControlSymbol <$> anyChar) )
+ binData = try $ do
+ string "bin" <* notFollowedBy letter
+ n <- fromMaybe 0 <$> parameter
+ spaces
+ -- NOTE: We assume here that if the document contains binary
+ -- data, it will not be valid UTF-8 and hence it will have been
+ -- read as latin1, so we can recover the data in the following
+ -- way. This is probably not completely reliable, but I don't
+ -- know if we can do better without making this reader take
+ -- a ByteString input.
+ dat <- BL.pack . map (fromIntegral . ord) <$> count n anyChar
+ return $ BinData dat
+ parameter = do
+ hyph <- string "-" <|> pure ""
+ rest <- many digit
+ let pstr = T.pack $ hyph <> rest
+ return $ safeRead pstr
+ hexVal = do
+ char '\''
+ x <- hexDigit
+ y <- hexDigit
+ return $ hexToWord (T.pack [x,y])
+ letterSequence = T.pack <$> many1 (satisfy (\c -> isAscii c && isLetter c))
+ unformattedText =
+ UnformattedText . T.pack . mconcat <$>
+ many1 ( many1 (satisfy (not . isSpecial))
+ <|> ("" <$ nl))
+ grouped = Grouped <$> (char '{' *> skipMany nl *> manyTill tok (char '}'))
+
+nl :: PandocMonad m => RTFParser m ()
+nl = void (char '\n' <|> char '\r')
+
+isSpecial :: Char -> Bool
+isSpecial '{' = True
+isSpecial '}' = True
+isSpecial '\\' = True
+isSpecial '\n' = True
+isSpecial _ = False
+
+delimChar :: PandocMonad m => RTFParser m Char
+delimChar = satisfy (\c -> not (isAlphaNum c || isSpecial c))
+
+modifyGroup :: PandocMonad m
+ => (Properties -> Properties)
+ -> RTFParser m ()
+modifyGroup f =
+ updateState $ \st ->
+ st{ sGroupStack =
+ case sGroupStack st of
+ [] -> []
+ (x:xs) -> f x : xs }
+
+addFormatting :: (Properties, Text) -> Inlines
+addFormatting (_, "\n") = B.linebreak
+addFormatting (props, _) | gHidden props = mempty
+addFormatting (props, _) | Just bs <- gFootnote props = B.note bs
+addFormatting (props, txt) =
+ (if gBold props then B.strong else id) .
+ (if gItalic props then B.emph else id) .
+ (if gDeleted props then B.strikeout else id) .
+ (if gSub props then B.subscript else id) .
+ (if gSuper props then B.superscript else id) .
+ (if gSmallCaps props then B.smallcaps else id) .
+ (if gUnderline props then B.underline else id) .
+ (case gHyperlink props of
+ Nothing -> id
+ Just linkdest -> B.link linkdest mempty) .
+ (case gAnchor props of
+ Nothing -> id
+ Just ident -> B.spanWith (ident,[],[])) .
+ (case gFontFamily props of
+ Just Modern -> B.code
+ _ -> case gImage props of
+ Just pict ->
+ let attr = ("",[],
+ (case picWidthGoal pict of
+ Nothing -> []
+ Just w -> [("width", tshow (fromIntegral w / 1440
+ :: Double)
+ <> "in")]) ++
+ (case picHeightGoal pict of
+ Nothing -> []
+ Just h -> [("height", tshow (fromIntegral h / 1440
+ :: Double)
+ <> "in")]))
+ in B.imageWith attr (picName pict) "" . B.text
+ Nothing -> B.text) .
+ (if gCaps props then T.toUpper else id)
+ $ txt
+
+addText :: PandocMonad m => Text -> RTFParser m ()
+addText t = do
+ gs <- sGroupStack <$> getState
+ let props = case gs of
+ (x:_) -> x
+ _ -> def
+ updateState (\s -> s{ sTextContent = (props, t) : sTextContent s })
+
+inGroup :: PandocMonad m => RTFParser m a -> RTFParser m a
+inGroup p = do
+ updateState $ \st ->
+ st{ sGroupStack =
+ case sGroupStack st of
+ [] -> [def]
+ (x:xs) -> (x:x:xs) } -- inherit current group's properties
+ result <- p
+ updateState $ \st ->
+ st{ sGroupStack =
+ case sGroupStack st of
+ [] -> [] -- should not happen
+ (_:xs) -> xs }
+ return result
+
+getStyleFormatting :: PandocMonad m => Int -> RTFParser m [Tok]
+getStyleFormatting stynum = do
+ stylesheet <- sStylesheet <$> getState
+ case IntMap.lookup stynum stylesheet of
+ Nothing -> return []
+ Just sty ->
+ case styleBasedOn sty of
+ Just i -> (<> styleFormatting sty) <$> getStyleFormatting i
+ Nothing -> return $ styleFormatting sty
+
+isMetadataField :: Text -> Bool
+isMetadataField "title" = True
+isMetadataField "subject" = True
+isMetadataField "author" = True
+isMetadataField "manager" = True
+isMetadataField "company" = True
+isMetadataField "operator" = True
+isMetadataField "category" = True
+isMetadataField "keywords" = True
+isMetadataField "comment" = True
+isMetadataField "doccomm" = True
+isMetadataField "hlinkbase" = True
+isMetadataField "generator" = True
+isMetadataField _ = False
+
+isHeaderFooter :: Text -> Bool
+isHeaderFooter "header" = True
+isHeaderFooter "headerl" = True
+isHeaderFooter "headerr" = True
+isHeaderFooter "headerf" = True
+isHeaderFooter "footer" = True
+isHeaderFooter "footerl" = True
+isHeaderFooter "footerr" = True
+isHeaderFooter "footerf" = True
+isHeaderFooter _ = False
+
+boolParam :: Maybe Int -> Bool
+boolParam (Just 0) = False
+boolParam _ = True
+
+isUnderline :: Text -> Bool
+isUnderline "ul" = True
+isUnderline "uld" = True
+isUnderline "uldash" = True
+isUnderline "uldashd" = True
+isUnderline "uldashdd" = True
+isUnderline "uldb" = True
+isUnderline "ulth" = True
+isUnderline "ulthd" = True
+isUnderline "ulthdash" = True
+isUnderline "ulw" = True
+isUnderline "ulwave" = True
+isUnderline _ = False
+
+processTok :: PandocMonad m => Blocks -> Tok -> RTFParser m Blocks
+processTok bs (Tok pos tok') = do
+ setPosition pos
+ case tok' of
+ HexVal{} -> return ()
+ UnformattedText{} -> return ()
+ _ -> updateState $ \s -> s{ sEatChars = 0 }
+ case tok' of
+ Grouped (Tok _ (ControlSymbol '*') : toks) ->
+ bs <$ (do oldTextContent <- sTextContent <$> getState
+ processTok mempty (Tok pos (Grouped toks))
+ updateState $ \st -> st{ sTextContent = oldTextContent })
+ Grouped (Tok _ (ControlWord "fonttbl" _) : toks) -> inGroup $ do
+ updateState $ \s -> s{ sFontTable = processFontTable toks }
+ pure bs
+ Grouped (Tok _ (ControlWord "field" _) : toks) ->
+ inGroup $ handleField bs toks
+ Grouped (Tok _ (ControlWord "pict" _) : toks) ->
+ bs <$ inGroup (handlePict toks)
+ Grouped (Tok _ (ControlWord "stylesheet" _) : toks) ->
+ bs <$ inGroup (handleStylesheet toks)
+ Grouped (Tok _ (ControlWord "listtext" _) : _) -> do
+ -- eject any previous list items...sometimes TextEdit
+ -- doesn't put in a \par
+ emitBlocks bs
+ Grouped (Tok _ (ControlWord "pgdsc" _) : _) -> pure bs
+ Grouped (Tok _ (ControlWord "colortbl" _) : _) -> pure bs
+ Grouped (Tok _ (ControlWord "listtable" _) : toks) ->
+ bs <$ inGroup (handleListTable toks)
+ Grouped (Tok _ (ControlWord "listoverridetable" _) : toks) ->
+ bs <$ inGroup (handleListOverrideTable toks)
+ Grouped (Tok _ (ControlWord "wgrffmtfilter" _) : _) -> pure bs
+ Grouped (Tok _ (ControlWord "themedata" _) : _) -> pure bs
+ Grouped (Tok _ (ControlWord "colorschememapping" _) : _) -> pure bs
+ Grouped (Tok _ (ControlWord "datastore" _) : _) -> pure bs
+ Grouped (Tok _ (ControlWord "latentstyles" _) : _) -> pure bs
+ Grouped (Tok _ (ControlWord "pntxta" _) : _) -> pure bs -- TODO
+ Grouped (Tok _ (ControlWord "pntxtb" _) : _) -> pure bs -- TODO
+ Grouped (Tok _ (ControlWord "xmlnstbl" _) : _) -> pure bs
+ Grouped (Tok _ (ControlWord "filetbl" _) : _) -> pure bs
+ Grouped (Tok _ (ControlWord "expandedcolortbl" _) : _) -> pure bs
+ Grouped (Tok _ (ControlWord "listtables" _) : _) -> pure bs
+ Grouped (Tok _ (ControlWord "revtbl" _) : _) -> pure bs
+ Grouped (Tok _ (ControlWord "bkmkstart" _)
+ : Tok _ (UnformattedText t) : _) -> do
+ -- TODO ideally we'd put the span around bkmkstart/end, but this
+ -- is good for now:
+ modifyGroup (\g -> g{ gAnchor = Just $ T.strip t })
+ pure bs
+ Grouped (Tok _ (ControlWord "bkmkend" _) : _) -> do
+ modifyGroup (\g -> g{ gAnchor = Nothing })
+ pure bs
+ Grouped (Tok _ (ControlWord f _) : _) | isHeaderFooter f -> pure bs
+ Grouped (Tok _ (ControlWord "footnote" _) : toks) -> do
+ noteBs <- inGroup $ processDestinationToks toks
+ modifyGroup (\g -> g{ gFootnote = Just noteBs })
+ addText "*"
+ modifyGroup (\g -> g{ gFootnote = Nothing })
+ return bs
+ Grouped (Tok _ (ControlWord "info" _) : toks) ->
+ bs <$ inGroup (processDestinationToks toks)
+ Grouped (Tok _ (ControlWord f _) : toks) | isMetadataField f -> inGroup $ do
+ foldM_ processTok mempty toks
+ annotatedToks <- reverse . sTextContent <$> getState
+ updateState $ \s -> s{ sTextContent = [] }
+ let ils = B.trimInlines . mconcat $ map addFormatting annotatedToks
+ updateState $ \s -> s{ sMetadata = (f, ils) : sMetadata s }
+ pure bs
+ Grouped toks -> inGroup (foldM processTok bs toks)
+ UnformattedText t -> bs <$ do
+ -- return $! traceShowId $! (pos, t)
+ eatChars <- sEatChars <$> getState
+ case eatChars of
+ 0 -> addText t
+ n | n < T.length t -> do
+ updateState $ \s -> s{ sEatChars = 0 }
+ addText (T.drop n t)
+ | otherwise -> do
+ updateState $ \s -> s{ sEatChars = n - T.length t }
+ HexVal n -> bs <$ do
+ eatChars <- sEatChars <$> getState
+ if eatChars == 0
+ then do
+ charset <- sCharSet <$> getState
+ case charset of
+ ANSI -> addText (T.singleton $ ansiToChar n)
+ Mac -> addText (T.singleton $ macToChar n)
+ Pc -> addText (T.singleton $ pcToChar n)
+ Pca -> addText (T.singleton $ pcaToChar n)
+ else updateState $ \s -> s{ sEatChars = eatChars - 1 }
+ ControlWord "ansi" _ -> bs <$
+ updateState (\s -> s{ sCharSet = ANSI })
+ ControlWord "mac" _ -> bs <$
+ updateState (\s -> s{ sCharSet = Mac })
+ ControlWord "pc" _ -> bs <$
+ updateState (\s -> s{ sCharSet = Pc })
+ ControlWord "pca" _ -> bs <$
+ updateState (\s -> s{ sCharSet = Pca })
+ ControlWord "outlinelevel" mbp -> bs <$
+ modifyGroup (\g -> g{ gOutlineLevel = mbp })
+ ControlWord "ls" mbp -> bs <$
+ modifyGroup (\g -> g{ gListOverride = mbp })
+ ControlWord "ilvl" mbp -> bs <$
+ modifyGroup (\g -> g{ gListLevel = mbp })
+ ControlSymbol '\\' -> bs <$ addText "\\"
+ ControlSymbol '{' -> bs <$ addText "{"
+ ControlSymbol '}' -> bs <$ addText "}"
+ ControlSymbol '~' -> bs <$ addText "\x00a0"
+ ControlSymbol '-' -> bs <$ addText "\x00ad"
+ ControlSymbol '_' -> bs <$ addText "\x2011"
+ ControlWord "trowd" _ -> bs <$ do -- add new row
+ updateState $ \s -> s{ sTableRows = TableRow [] : sTableRows s
+ , sCurrentCell = mempty }
+ ControlWord "cell" _ -> bs <$ do
+ new <- emitBlocks mempty
+ curCell <- (<> new) . sCurrentCell <$> getState
+ updateState $ \s -> s{ sTableRows =
+ case sTableRows s of
+ TableRow cs : rs ->
+ TableRow (curCell : cs) : rs
+ [] -> [TableRow [curCell]] -- shouldn't happen
+ , sCurrentCell = mempty }
+ ControlWord "intbl" _ -> bs <$ modifyGroup (\g -> g{ gInTable = True })
+ ControlWord "plain" _ -> bs <$ modifyGroup (const def)
+ ControlWord "lquote" _ -> bs <$ addText "\x2018"
+ ControlWord "rquote" _ -> bs <$ addText "\x2019"
+ ControlWord "ldblquote" _ -> bs <$ addText "\x201C"
+ ControlWord "rdblquote" _ -> bs <$ addText "\x201D"
+ ControlWord "emdash" _ -> bs <$ addText "\x2014"
+ ControlWord "emspace" _ -> bs <$ addText "\x2003"
+ ControlWord "enspace" _ -> bs <$ addText "\x2002"
+ ControlWord "endash" _ -> bs <$ addText "\x2013"
+ ControlWord "bullet" _ -> bs <$ addText "\x2022"
+ ControlWord "tab" _ -> bs <$ addText "\t"
+ ControlWord "line" _ -> bs <$ addText "\n"
+ ControlSymbol '\n' -> bs <$ addText "\n"
+ ControlSymbol '\r' -> bs <$ addText "\n"
+ ControlWord "uc" (Just i) -> bs <$ modifyGroup (\g -> g{ gUC = i })
+ ControlWord "cs" (Just n) -> do
+ getStyleFormatting n >>= foldM processTok bs
+ ControlWord "s" (Just n) -> do
+ getStyleFormatting n >>= foldM processTok bs
+ ControlWord "ds" (Just n) -> do
+ getStyleFormatting n >>= foldM processTok bs
+ ControlWord "f" (Just i) -> bs <$ do
+ fontTable <- sFontTable <$> getState
+ modifyGroup (\g -> g{ gFontFamily = IntMap.lookup i fontTable })
+ ControlWord "u" (Just i) -> bs <$ do
+ st <- getState
+ let curgroup = case sGroupStack st of
+ [] -> def
+ (x:_) -> x
+ updateState $ \s -> s{ sEatChars = gUC curgroup }
+ -- "RTF control words generally accept signed 16-bit numbers as
+ -- arguments. For this reason, Unicode values greater than 32767
+ -- must be expressed as negative numbers."
+ let codepoint :: Word16
+ codepoint = fromIntegral i
+ addText (T.singleton (chr $ fromIntegral codepoint))
+ ControlWord "caps" mbp -> bs <$
+ modifyGroup (\g -> g{ gCaps = boolParam mbp })
+ ControlWord "deleted" mbp -> bs <$
+ modifyGroup (\g -> g{ gDeleted = boolParam mbp })
+ ControlWord "b" mbp -> bs <$
+ modifyGroup (\g -> g{ gBold = boolParam mbp })
+ ControlWord "i" mbp -> bs <$
+ modifyGroup (\g -> g{ gItalic = boolParam mbp })
+ ControlWord "sub" mbp -> bs <$
+ modifyGroup (\g -> g{ gSub = boolParam mbp })
+ ControlWord "super" mbp -> bs <$
+ modifyGroup (\g -> g{ gSuper = boolParam mbp })
+ ControlWord "up" mbp -> bs <$
+ modifyGroup (\g -> g{ gSuper = boolParam mbp })
+ ControlWord "strike" mbp -> bs <$
+ modifyGroup (\g -> g{ gDeleted = boolParam mbp })
+ ControlWord "strikedl" mbp -> bs <$
+ modifyGroup (\g -> g{ gDeleted = boolParam mbp })
+ ControlWord "striked" mbp -> bs <$
+ modifyGroup (\g -> g{ gDeleted = boolParam mbp })
+ ControlWord "scaps" mbp -> bs <$
+ modifyGroup (\g -> g{ gSmallCaps = boolParam mbp })
+ ControlWord "v" mbp -> bs <$
+ modifyGroup (\g -> g{ gHidden = boolParam mbp })
+ ControlWord x mbp | isUnderline x -> bs <$
+ modifyGroup (\g -> g{ gUnderline = boolParam mbp })
+ ControlWord "ulnone" _ -> bs <$
+ modifyGroup (\g -> g{ gUnderline = False })
+ ControlWord "pard" _ -> bs <$ do
+ modifyGroup (const def)
+ getStyleFormatting 0 >>= foldM processTok bs
+ ControlWord "par" _ -> emitBlocks bs
+ _ -> pure bs
+
+processDestinationToks :: PandocMonad m => [Tok] -> RTFParser m Blocks
+processDestinationToks toks = do
+ textContent <- sTextContent <$> getState
+ liststack <- sListStack <$> getState
+ updateState $ \s -> s{ sTextContent = mempty
+ , sListStack = [] }
+ result <- inGroup $
+ foldM processTok mempty toks >>= emitBlocks
+ unclosed <- closeContainers
+ updateState $ \s -> s{ sTextContent = textContent
+ , sListStack = liststack }
+ return $ result <> unclosed
+
+-- close lists >= level
+closeLists :: PandocMonad m => Int -> RTFParser m Blocks
+closeLists lvl = do
+ lists <- sListStack <$> getState
+ case lists of
+ (List _ lvl' lt items : rest) | lvl' >= lvl -> do
+ let newlist = (case lt of
+ Bullet -> B.bulletList
+ Ordered listAttr -> B.orderedListWith listAttr)
+ (reverse items)
+ updateState $ \s -> s{ sListStack = rest }
+ case rest of
+ [] -> do
+ updateState $ \s -> s{ sListStack = rest }
+ pure newlist
+ (List lo lvl'' lt' [] : rest') -> do -- should not happen
+ updateState $ \s -> s{ sListStack =
+ List lo lvl'' lt' [newlist] : rest' }
+ closeLists lvl
+ (List lo lvl'' lt' (i:is) : rest') -> do
+ updateState $ \s -> s{ sListStack =
+ List lo lvl'' lt' (i <> newlist : is) : rest' }
+ closeLists lvl
+ _ -> pure mempty
+
+closeTable :: PandocMonad m => RTFParser m Blocks
+closeTable = do
+ rawrows <- sTableRows <$> getState
+ if null rawrows
+ then return mempty
+ else do
+ let getCells (TableRow cs) = reverse cs
+ let rows = map getCells . reverse $ rawrows
+ updateState $ \s -> s{ sCurrentCell = mempty
+ , sTableRows = [] }
+ return $ B.simpleTable [] rows
+
+closeContainers :: PandocMonad m => RTFParser m Blocks
+closeContainers = do
+ tbl <- closeTable
+ lists <- closeLists 0
+ return $ tbl <> lists
+
+trimFinalLineBreak :: Inlines -> Inlines
+trimFinalLineBreak ils =
+ case Seq.viewr (B.unMany ils) of
+ rest Seq.:> LineBreak -> B.Many rest
+ _ -> ils
+
+emitBlocks :: PandocMonad m => Blocks -> RTFParser m Blocks
+emitBlocks bs = do
+ annotatedToks <- reverse . sTextContent <$> getState
+ updateState $ \s -> s{ sTextContent = [] }
+ let justCode = def{ gFontFamily = Just Modern }
+ let prop = case annotatedToks of
+ [] -> def
+ ((p,_):_) -> p
+ tbl <- if gInTable prop || null annotatedToks
+ then pure mempty
+ else closeTable
+ new <-
+ case annotatedToks of
+ [] -> pure mempty
+ _ | Just lst <- gListOverride prop
+ -> do
+ let level = fromMaybe 0 $ gListLevel prop
+ listOverrideTable <- sListOverrideTable <$> getState
+ let listType = fromMaybe Bullet $
+ IntMap.lookup lst listOverrideTable >>= IntMap.lookup level
+ lists <- sListStack <$> getState
+ -- get para contents of list item
+ let newbs = B.para . B.trimInlines . trimFinalLineBreak . mconcat $
+ map addFormatting annotatedToks
+ case lists of
+ (List lo parentlevel _lt items : cs)
+ | lo == lst
+ , parentlevel == level
+ -- add another item to existing list
+ -> do updateState $ \s ->
+ s{ sListStack =
+ List lo level listType (newbs:items) : cs }
+ pure mempty
+ | lo /= lst || level < parentlevel
+ -- close parent list and add new list
+ -> do new <- closeLists level -- close open lists > level
+ updateState $ \s ->
+ s{ sListStack = List lst level listType [newbs] :
+ sListStack s }
+ pure new
+ _ -> do -- add new list (level > parentlevel)
+ updateState $ \s ->
+ s{ sListStack = List lst level listType [newbs] :
+ sListStack s }
+ pure mempty
+ | Just lvl <- gOutlineLevel prop
+ -> do
+ lists <- closeLists 0
+ pure $ lists <>
+ B.header (lvl + 1)
+ (B.trimInlines . mconcat $ map addFormatting
+ $ removeCommonFormatting
+ annotatedToks)
+ | all ((== justCode) . fst) annotatedToks
+ -> do
+ lists <- closeLists 0
+ pure $ lists <>
+ B.codeBlock (mconcat $ map snd annotatedToks)
+ | all (T.all isSpace . snd) annotatedToks
+ -> closeLists 0
+ | otherwise -> do
+ lists <- closeLists 0
+ pure $ lists <>
+ B.para (B.trimInlines . trimFinalLineBreak . mconcat
+ $ map addFormatting annotatedToks)
+ if gInTable prop
+ then do
+ updateState $ \s -> s{ sCurrentCell = sCurrentCell s <> new }
+ pure bs
+ else do
+ pure $ bs <> tbl <> new
+
+-- Headers often have a style applied. We usually want to remove
+-- this, because headers will have their own styling in the target
+-- format.
+removeCommonFormatting :: [(Properties, Text)] -> [(Properties, Text)]
+removeCommonFormatting =
+ (\ts ->
+ if all (gBold . fst) ts
+ then map (\(p,t) -> (p{ gBold = False }, t)) ts
+ else ts) .
+ (\ts ->
+ if all (gItalic . fst) ts
+ then map (\(p,t) -> (p{ gItalic = False }, t)) ts
+ else ts)
+
+
+-- {\field{\*\fldinst{HYPERLINK "http://pandoc.org"}}{\fldrslt foo}}
+handleField :: PandocMonad m => Blocks -> [Tok] -> RTFParser m Blocks
+handleField bs
+ (Tok _
+ (Grouped
+ (Tok _ (ControlSymbol '*')
+ :Tok _ (ControlWord "fldinst" Nothing)
+ :Tok _ (Grouped (Tok _ (UnformattedText insttext):rest))
+ :_))
+ :linktoks)
+ | Just linkdest <- getHyperlink insttext
+ = do let linkdest' = case rest of
+ (Tok _ (ControlSymbol '\\')
+ : Tok _ (UnformattedText t)
+ : _) | Just bkmrk <- T.stripPrefix "l" t
+ -> "#" <> unquote bkmrk
+ _ -> linkdest
+ modifyGroup $ \g -> g{ gHyperlink = Just linkdest' }
+ result <- foldM processTok bs linktoks
+ modifyGroup $ \g -> g{ gHyperlink = Nothing }
+ return result
+handleField bs _ = pure bs
+
+unquote :: Text -> Text
+unquote = T.dropWhile (=='"') . T.dropWhileEnd (=='"') . T.strip
+
+handleListTable :: PandocMonad m => [Tok] -> RTFParser m ()
+handleListTable toks = do
+ mapM_ handleList toks
+
+handleList :: PandocMonad m => Tok -> RTFParser m ()
+handleList (Tok _ (Grouped (Tok _ (ControlWord "list" _) : toks))) = do
+ let listid = headDef 0 [n | Tok _ (ControlWord "listid" (Just n)) <- toks]
+ let levels = [ts | Tok _ (Grouped (Tok _ (ControlWord "listlevel" _) : ts))
+ <- toks]
+ tbl <- foldM handleListLevel mempty (zip [0..] levels)
+ updateState $ \s -> s{ sListTable = IntMap.insert listid tbl $ sListTable s }
+handleList _ = return ()
+
+handleListLevel :: PandocMonad m
+ => ListLevelTable
+ -> (Int, [Tok])
+ -> RTFParser m ListLevelTable
+handleListLevel levelTable (lvl, toks) = do
+ let start = headDef 1
+ [n | Tok _ (ControlWord "levelstartat" (Just n)) <- toks]
+ let mbNumberStyle =
+ case [n | Tok _ (ControlWord "levelnfc" (Just n)) <- toks] of
+ [] -> Nothing
+ (0:_) -> Just Decimal
+ (1:_) -> Just UpperRoman
+ (2:_) -> Just LowerRoman
+ (3:_) -> Just UpperAlpha
+ (4:_) -> Just LowerAlpha
+ (23:_) -> Nothing
+ (255:_) -> Nothing
+ _ -> Just DefaultStyle
+ let listType = case mbNumberStyle of
+ Nothing -> Bullet
+ Just numStyle -> Ordered (start,numStyle,Period)
+ return $ IntMap.insert lvl listType levelTable
+
+handleListOverrideTable :: PandocMonad m => [Tok] -> RTFParser m ()
+handleListOverrideTable toks = mapM_ handleListOverride toks
+
+handleListOverride :: PandocMonad m => Tok -> RTFParser m ()
+handleListOverride
+ (Tok _ (Grouped (Tok _ (ControlWord "listoverride" _) : toks))) = do
+ let listid = headDef 0 [n | Tok _ (ControlWord "listid" (Just n)) <- toks]
+ let lsn = headDef 0 [n | Tok _ (ControlWord "ls" (Just n)) <- toks]
+ -- TODO override stuff, esp. start num -- for now we just handle indirection
+ listTable <- sListTable <$> getState
+ case IntMap.lookup listid listTable of
+ Nothing -> return ()
+ Just tbl -> updateState $ \s ->
+ s{ sListOverrideTable = IntMap.insert lsn tbl $
+ sListOverrideTable s }
+handleListOverride _ = return ()
+
+handleStylesheet :: PandocMonad m => [Tok] -> RTFParser m ()
+handleStylesheet toks = do
+ let styles = mapMaybe parseStyle toks
+ updateState $ \s -> s{ sStylesheet = IntMap.fromList
+ $ zip (map styleNum styles) styles }
+
+parseStyle :: Tok -> Maybe Style
+parseStyle (Tok _ (Grouped toks)) = do
+ let (styType, styNum, rest) =
+ case toks of
+ Tok _ (ControlWord "s" (Just n)) : ts -> (ParagraphStyle, n, ts)
+ Tok _ (ControlWord "ds" (Just n)) : ts -> (SectionStyle, n, ts)
+ Tok _ (ControlWord "cs" (Just n)) : ts -> (CharStyle, n, ts)
+ Tok _ (ControlWord "ts" (Just n)) : ts -> (TableStyle, n, ts)
+ _ -> (ParagraphStyle, 0, toks)
+ let styName = case lastMay rest of
+ Just (Tok _ (UnformattedText t)) -> T.dropWhileEnd (==';') t
+ _ -> mempty
+ let isBasedOn (Tok _ (ControlWord "sbasedon" (Just _))) = True
+ isBasedOn _ = False
+ let styBasedOn = case find isBasedOn toks of
+ Just (Tok _ (ControlWord "sbasedon" (Just i))) -> Just i
+ _ -> Nothing
+ let isStyleControl (Tok _ (ControlWord x _)) =
+ x `elem` ["cs", "s", "ds", "additive", "sbasedon", "snext",
+ "sautoupd", "shidden", "keycode", "alt", "shift",
+ "ctrl", "fn"]
+ isStyleControl _ = False
+ let styFormatting = filter (not . isStyleControl) (initSafe rest)
+ return $ Style{ styleNum = styNum
+ , styleType = styType
+ , styleBasedOn = styBasedOn
+ , styleName = styName
+ , styleFormatting = styFormatting
+ }
+parseStyle _ = Nothing
+
+hexToWord :: Text -> Word8
+hexToWord t = case TR.hexadecimal t of
+ Left _ -> 0
+ Right (x,_) -> x
+
+
+handlePict :: PandocMonad m => [Tok] -> RTFParser m ()
+handlePict toks = do
+ let pict = foldl' getPictData def toks
+ let altText = "image"
+ let bytes =
+ if picBinary pict
+ then picBytes pict
+ else BL.pack $ map hexToWord $ T.chunksOf 2 $ picData pict
+ let (mimetype, ext) =
+ case picType pict of
+ Just Emfblip -> (Just "image/x-emf", ".emf")
+ Just Pngblip -> (Just "image/png", ".png")
+ Just Jpegblip -> (Just "image/jpeg", ".jpg")
+ Nothing -> (Nothing, "")
+ case mimetype of
+ Just mt -> do
+ let pictname = showDigest (sha1 bytes) <> ext
+ insertMedia pictname (Just mt) bytes
+ modifyGroup $ \g -> g{ gImage = Just pict{ picName = T.pack pictname,
+ picBytes = bytes } }
+ addText altText
+ modifyGroup $ \g -> g{ gImage = Nothing }
+ _ -> return ()
+ where
+ getPictData :: Pict -> Tok -> Pict
+ getPictData pict (Tok _ tok') =
+ case tok' of
+ ControlWord "emfblip" _-> pict{ picType = Just Emfblip }
+ ControlWord "pngblip" _-> pict{ picType = Just Pngblip }
+ ControlWord "jpegblip" _-> pict{ picType = Just Jpegblip }
+ ControlWord "picw" (Just w) -> pict{ picWidth = Just w }
+ ControlWord "pich" (Just h) -> pict{ picHeight = Just h }
+ ControlWord "picwgoal" (Just w) -> pict{ picWidthGoal = Just w }
+ ControlWord "pichgoal" (Just h) -> pict{ picHeightGoal = Just h }
+ BinData d | not (BL.null d)
+ -> pict{ picBinary = True, picBytes = picBytes pict <> d }
+ UnformattedText t -> pict{ picData = t }
+ _ -> pict
+
+
+getHyperlink :: Text -> Maybe Text
+getHyperlink t =
+ case T.stripPrefix "HYPERLINK" (T.strip t) of
+ Nothing -> Nothing
+ Just rest -> Just $ unquote rest
+
+processFontTable :: [Tok] -> FontTable
+processFontTable = snd . foldl' go (0, mempty)
+ where
+ go (fontnum, tbl) (Tok _ tok') =
+ case tok' of
+ (ControlWord "f" (Just i)) -> (i, tbl)
+ (ControlWord "fnil" _) -> (fontnum, tbl)
+ (ControlWord "froman" _) -> (fontnum, IntMap.insert fontnum Roman tbl)
+ (ControlWord "fswiss" _) -> (fontnum, IntMap.insert fontnum Swiss tbl)
+ (ControlWord "fmodern" _) -> (fontnum, IntMap.insert fontnum Modern tbl)
+ (ControlWord "fscript" _) -> (fontnum, IntMap.insert fontnum Script tbl)
+ (ControlWord "fdecor" _) -> (fontnum, IntMap.insert fontnum Decor tbl)
+ (ControlWord "ftech" _) -> (fontnum, IntMap.insert fontnum Tech tbl)
+ (ControlWord "fbidi" _) -> (fontnum, IntMap.insert fontnum Bidi tbl)
+ (Grouped ts) -> foldl' go (fontnum, tbl) ts
+ _ -> (fontnum, tbl)
+
+
+ansiToChar :: Word8 -> Char
+ansiToChar i = chr $
+ case i of
+ 128 -> 8364
+ 130 -> 8218
+ 131 -> 402
+ 132 -> 8222
+ 133 -> 8230
+ 134 -> 8224
+ 135 -> 8225
+ 136 -> 710
+ 137 -> 8240
+ 138 -> 352
+ 139 -> 8249
+ 140 -> 338
+ 142 -> 381
+ 145 -> 8216
+ 146 -> 8217
+ 147 -> 8220
+ 148 -> 8221
+ 149 -> 8226
+ 150 -> 8211
+ 151 -> 8212
+ 152 -> 732
+ 153 -> 8482
+ 154 -> 353
+ 155 -> 8250
+ 156 -> 339
+ 158 -> 382
+ 159 -> 376
+ 173 -> 0xAD
+ _ -> fromIntegral i
+
+macToChar :: Word8 -> Char
+macToChar i = chr $
+ case i of
+ 0x80 -> 0xC4
+ 0x81 -> 0xC5
+ 0x82 -> 0xC7
+ 0x83 -> 0xC9
+ 0x84 -> 0xD1
+ 0x85 -> 0xD6
+ 0x86 -> 0xDC
+ 0x87 -> 0xE1
+ 0x88 -> 0xE0
+ 0x89 -> 0xE2
+ 0x8A -> 0xE4
+ 0x8B -> 0xE3
+ 0x8C -> 0xE5
+ 0x8D -> 0xE7
+ 0x8E -> 0xE9
+ 0x8F -> 0xE8
+ 0x90 -> 0xEA
+ 0x91 -> 0xEB
+ 0x92 -> 0xED
+ 0x93 -> 0xEC
+ 0x94 -> 0xEE
+ 0x95 -> 0xEF
+ 0x96 -> 0xF1
+ 0x97 -> 0xF3
+ 0x98 -> 0xF2
+ 0x99 -> 0xF4
+ 0x9A -> 0xF6
+ 0x9B -> 0xF5
+ 0x9C -> 0xFA
+ 0x9D -> 0xF9
+ 0x9E -> 0xFB
+ 0x9F -> 0xFC
+ 0xA0 -> 0xDD
+ 0xA1 -> 0xB0
+ 0xA2 -> 0xA2
+ 0xA3 -> 0xA3
+ 0xA4 -> 0xA7
+ 0xA5 -> 0xD7
+ 0xA6 -> 0xB6
+ 0xA7 -> 0xDF
+ 0xA8 -> 0xAE
+ 0xA9 -> 0xA9
+ 0xAA -> 0xB2
+ 0xAB -> 0xB4
+ 0xAC -> 0xA8
+ 0xAD -> 0xB3
+ 0xAE -> 0xC6
+ 0xAF -> 0xD8
+ 0xB0 -> 0xB9
+ 0xB1 -> 0xB1
+ 0xB2 -> 0xBC
+ 0xB3 -> 0xBD
+ 0xB4 -> 0xA5
+ 0xB5 -> 0xB5
+ 0xBA -> 0xBE
+ 0xBB -> 0xAA
+ 0xBC -> 0xBA
+ 0xBE -> 0xE6
+ 0xBF -> 0xF8
+ 0xC0 -> 0xBF
+ 0xC1 -> 0xA1
+ 0xC2 -> 0xAC
+ 0xC3 -> 0x0141
+ 0xC4 -> 0x0192
+ 0xC5 -> 0x02CB
+ 0xC7 -> 0xAB
+ 0xC8 -> 0xBB
+ 0xC9 -> 0xA6
+ 0xCA -> 0xA0
+ 0xCB -> 0xC0
+ 0xCC -> 0xC3
+ 0xCD -> 0xD5
+ 0xCE -> 0x0152
+ 0xCF -> 0x0153
+ 0xD0 -> 0xAD
+ 0xD4 -> 0x0142
+ 0xD6 -> 0xF7
+ 0xD8 -> 0xFF
+ 0xD9 -> 0x0178
+ 0xDB -> 0xA4
+ 0xDC -> 0xD0
+ 0xDD -> 0xF0
+ 0xDE -> 0xDE
+ 0xDF -> 0xFE
+ 0xE0 -> 0xFD
+ 0xE1 -> 0xB7
+ 0xE5 -> 0xC2
+ 0xE6 -> 0xCA
+ 0xE7 -> 0xC1
+ 0xE8 -> 0xCB
+ 0xE9 -> 0xC8
+ 0xEA -> 0xCD
+ 0xEB -> 0xCE
+ 0xEC -> 0xCF
+ 0xED -> 0xCC
+ 0xEE -> 0xD3
+ 0xEF -> 0xD4
+ 0xF1 -> 0xD2
+ 0xF2 -> 0xDA
+ 0xF3 -> 0xDB
+ 0xF4 -> 0xD9
+ 0xF5 -> 0x0131
+ 0xF6 -> 0x02C6
+ 0xF7 -> 0x02DC
+ 0xF8 -> 0xAF
+ 0xF9 -> 0x02D8
+ 0xFA -> 0x02D9
+ 0xFB -> 0x02DA
+ 0xFC -> 0xB8
+ 0xFD -> 0x02DD
+ 0xFE -> 0x02DB
+ 0xFF -> 0x02C7
+ _ -> fromIntegral i
+
+pcToChar :: Word8 -> Char
+pcToChar i = chr $
+ case i of
+ 0x80 -> 0xc7
+ 0x81 -> 0xfc
+ 0x82 -> 0xe9
+ 0x83 -> 0xe2
+ 0x84 -> 0xe4
+ 0x85 -> 0xe0
+ 0x86 -> 0xe5
+ 0x87 -> 0xe7
+ 0x88 -> 0xea
+ 0x89 -> 0xeb
+ 0x8a -> 0xe8
+ 0x8b -> 0xef
+ 0x8c -> 0xee
+ 0x8d -> 0xec
+ 0x8e -> 0xc4
+ 0x8f -> 0xc5
+ 0x90 -> 0xc9
+ 0x91 -> 0xe6
+ 0x92 -> 0xc6
+ 0x93 -> 0xf4
+ 0x94 -> 0xf6
+ 0x95 -> 0xf2
+ 0x96 -> 0xfb
+ 0x97 -> 0xf9
+ 0x98 -> 0xff
+ 0x99 -> 0xd6
+ 0x9a -> 0xdc
+ 0x9b -> 0xa2
+ 0x9c -> 0xa3
+ 0x9d -> 0xa5
+ 0x9e -> 0x20a7
+ 0x9f -> 0x0192
+ 0xa0 -> 0xe1
+ 0xa1 -> 0xed
+ 0xa2 -> 0xf3
+ 0xa3 -> 0xfa
+ 0xa4 -> 0xf1
+ 0xa5 -> 0xd1
+ 0xa6 -> 0xaa
+ 0xa7 -> 0xba
+ 0xa8 -> 0xbf
+ 0xa9 -> 0x2310
+ 0xaa -> 0xac
+ 0xab -> 0xbd
+ 0xac -> 0xbc
+ 0xad -> 0xa1
+ 0xae -> 0xab
+ 0xaf -> 0xbb
+ 0xb0 -> 0x2591
+ 0xb1 -> 0x2592
+ 0xb2 -> 0x2593
+ 0xb3 -> 0x2502
+ 0xb4 -> 0x2524
+ 0xb5 -> 0x2561
+ 0xb6 -> 0x2562
+ 0xb7 -> 0x2556
+ 0xb8 -> 0x2555
+ 0xb9 -> 0x2563
+ 0xba -> 0x2551
+ 0xbb -> 0x2557
+ 0xbc -> 0x255d
+ 0xbd -> 0x255c
+ 0xbe -> 0x255b
+ 0xbf -> 0x2510
+ 0xc0 -> 0x2514
+ 0xc1 -> 0x2534
+ 0xc2 -> 0x252c
+ 0xc3 -> 0x251c
+ 0xc4 -> 0x2500
+ 0xc5 -> 0x253c
+ 0xc6 -> 0x255e
+ 0xc7 -> 0x255f
+ 0xc8 -> 0x255a
+ 0xc9 -> 0x2554
+ 0xca -> 0x2569
+ 0xcb -> 0x2566
+ 0xcc -> 0x2560
+ 0xcd -> 0x2550
+ 0xce -> 0x256c
+ 0xcf -> 0x2567
+ 0xd0 -> 0x2568
+ 0xd1 -> 0x2564
+ 0xd2 -> 0x2565
+ 0xd3 -> 0x2559
+ 0xd4 -> 0x2558
+ 0xd5 -> 0x2552
+ 0xd6 -> 0x2553
+ 0xd7 -> 0x256b
+ 0xd8 -> 0x256a
+ 0xd9 -> 0x2518
+ 0xda -> 0x250c
+ 0xdb -> 0x2588
+ 0xdc -> 0x2584
+ 0xdd -> 0x258c
+ 0xde -> 0x2590
+ 0xdf -> 0x2580
+ 0xe0 -> 0x03b1
+ 0xe1 -> 0xdf
+ 0xe2 -> 0x0393
+ 0xe3 -> 0x03c0
+ 0xe4 -> 0x03a3
+ 0xe5 -> 0x03c3
+ 0xe6 -> 0xb5
+ 0xe7 -> 0x03c4
+ 0xe8 -> 0x03a6
+ 0xe9 -> 0x0398
+ 0xea -> 0x03a9
+ 0xeb -> 0x03b4
+ 0xec -> 0x221e
+ 0xed -> 0x03c6
+ 0xee -> 0x03b5
+ 0xef -> 0x2229
+ 0xf0 -> 0x2261
+ 0xf1 -> 0xb1
+ 0xf2 -> 0x2265
+ 0xf3 -> 0x2264
+ 0xf4 -> 0x2320
+ 0xf5 -> 0x2321
+ 0xf6 -> 0xf7
+ 0xf7 -> 0x2248
+ 0xf8 -> 0xb0
+ 0xf9 -> 0x2219
+ 0xfa -> 0xb7
+ 0xfb -> 0x221a
+ 0xfc -> 0x207f
+ 0xfd -> 0xb2
+ 0xfe -> 0x25a0
+ 0xff -> 0xa0
+ _ -> fromIntegral i
+
+pcaToChar :: Word8 -> Char
+pcaToChar i = chr $
+ case i of
+ 0x80 -> 0x00c7
+ 0x81 -> 0x00fc
+ 0x82 -> 0x00e9
+ 0x83 -> 0x00e2
+ 0x84 -> 0x00e4
+ 0x85 -> 0x00e0
+ 0x86 -> 0x00e5
+ 0x87 -> 0x00e7
+ 0x88 -> 0x00ea
+ 0x89 -> 0x00eb
+ 0x8a -> 0x00e8
+ 0x8b -> 0x00ef
+ 0x8c -> 0x00ee
+ 0x8d -> 0x00ec
+ 0x8e -> 0x00c4
+ 0x8f -> 0x00c5
+ 0x90 -> 0x00c9
+ 0x91 -> 0x00e6
+ 0x92 -> 0x00c6
+ 0x93 -> 0x00f4
+ 0x94 -> 0x00f6
+ 0x95 -> 0x00f2
+ 0x96 -> 0x00fb
+ 0x97 -> 0x00f9
+ 0x98 -> 0x00ff
+ 0x99 -> 0x00d6
+ 0x9a -> 0x00dc
+ 0x9b -> 0x00f8
+ 0x9c -> 0x00a3
+ 0x9d -> 0x00d8
+ 0x9e -> 0x00d7
+ 0x9f -> 0x0192
+ 0xa0 -> 0x00e1
+ 0xa1 -> 0x00ed
+ 0xa2 -> 0x00f3
+ 0xa3 -> 0x00fa
+ 0xa4 -> 0x00f1
+ 0xa5 -> 0x00d1
+ 0xa6 -> 0x00aa
+ 0xa7 -> 0x00ba
+ 0xa8 -> 0x00bf
+ 0xa9 -> 0x00ae
+ 0xaa -> 0x00ac
+ 0xab -> 0x00bd
+ 0xac -> 0x00bc
+ 0xad -> 0x00a1
+ 0xae -> 0x00ab
+ 0xaf -> 0x00bb
+ 0xb0 -> 0x2591
+ 0xb1 -> 0x2592
+ 0xb2 -> 0x2593
+ 0xb3 -> 0x2502
+ 0xb4 -> 0x2524
+ 0xb5 -> 0x00c1
+ 0xb6 -> 0x00c2
+ 0xb7 -> 0x00c0
+ 0xb8 -> 0x00a9
+ 0xb9 -> 0x2563
+ 0xba -> 0x2551
+ 0xbb -> 0x2557
+ 0xbc -> 0x255d
+ 0xbd -> 0x00a2
+ 0xbe -> 0x00a5
+ 0xbf -> 0x2510
+ 0xc0 -> 0x2514
+ 0xc1 -> 0x2534
+ 0xc2 -> 0x252c
+ 0xc3 -> 0x251c
+ 0xc4 -> 0x2500
+ 0xc5 -> 0x253c
+ 0xc6 -> 0x00e3
+ 0xc7 -> 0x00c3
+ 0xc8 -> 0x255a
+ 0xc9 -> 0x2554
+ 0xca -> 0x2569
+ 0xcb -> 0x2566
+ 0xcc -> 0x2560
+ 0xcd -> 0x2550
+ 0xce -> 0x256c
+ 0xcf -> 0x00a4
+ 0xd0 -> 0x00f0
+ 0xd1 -> 0x00d0
+ 0xd2 -> 0x00ca
+ 0xd3 -> 0x00cb
+ 0xd4 -> 0x00c8
+ 0xd5 -> 0x0131
+ 0xd6 -> 0x00cd
+ 0xd7 -> 0x00ce
+ 0xd8 -> 0x00cf
+ 0xd9 -> 0x2518
+ 0xda -> 0x250c
+ 0xdb -> 0x2588
+ 0xdc -> 0x2584
+ 0xdd -> 0x00a6
+ 0xde -> 0x00cc
+ 0xdf -> 0x2580
+ 0xe0 -> 0x00d3
+ 0xe1 -> 0x00df
+ 0xe2 -> 0x00d4
+ 0xe3 -> 0x00d2
+ 0xe4 -> 0x00f5
+ 0xe5 -> 0x00d5
+ 0xe6 -> 0x00b5
+ 0xe7 -> 0x00fe
+ 0xe8 -> 0x00de
+ 0xe9 -> 0x00da
+ 0xea -> 0x00db
+ 0xeb -> 0x00d9
+ 0xec -> 0x00fd
+ 0xed -> 0x00dd
+ 0xee -> 0x00af
+ 0xef -> 0x00b4
+ 0xf0 -> 0x00ad
+ 0xf1 -> 0x00b1
+ 0xf2 -> 0x2017
+ 0xf3 -> 0x00be
+ 0xf4 -> 0x00b6
+ 0xf5 -> 0x00a7
+ 0xf6 -> 0x00f7
+ 0xf7 -> 0x00b8
+ 0xf8 -> 0x00b0
+ 0xf9 -> 0x00a8
+ 0xfa -> 0x00b7
+ 0xfb -> 0x00b9
+ 0xfc -> 0x00b3
+ 0xfd -> 0x00b2
+ 0xfe -> 0x25a0
+ 0xff -> 0x00a0
+ _ -> fromIntegral i