diff options
98 files changed, 238 insertions, 91 deletions
diff --git a/MANUAL.txt b/MANUAL.txt index 5b90d039a..dc3b4ca77 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -1464,6 +1464,7 @@ Nonzero exit codes have the following meanings: 24 PandocCiteprocError 31 PandocEpubSubdirectoryError 43 PandocPDFError + 44 PandocXMLError 47 PandocPDFProgramNotFoundError 61 PandocHttpError 62 PandocShouldNeverHappenError diff --git a/pandoc.cabal b/pandoc.cabal index 72e7c2da5..e56456c68 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -493,6 +493,7 @@ library unicode-transforms >= 0.3 && < 0.4, unordered-containers >= 0.2 && < 0.3, xml >= 1.3.12 && < 1.4, + xml-conduit >= 1.7 && < 1.10, zip-archive >= 0.2.3.4 && < 0.5, zlib >= 0.5 && < 0.7 if os(windows) && arch(i386) @@ -686,6 +687,7 @@ library Text.Pandoc.Lua.PandocLua, Text.Pandoc.Lua.Util, Text.Pandoc.Lua.Walk, + Text.Pandoc.XMLParser, Text.Pandoc.CSS, Text.Pandoc.CSV, Text.Pandoc.RoffChar, diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index 204cf15ca..831405f42 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -48,6 +48,7 @@ data PandocError = PandocIOError Text IOError | PandocFailOnWarningError | PandocPDFProgramNotFoundError Text | PandocPDFError Text + | PandocXMLError Text Text | PandocFilterError Text Text | PandocLuaError Text | PandocCouldNotFindDataFileError Text @@ -103,6 +104,8 @@ handleError (Left e) = PandocPDFProgramNotFoundError pdfprog -> err 47 $ pdfprog <> " not found. Please select a different --pdf-engine or install " <> pdfprog PandocPDFError logmsg -> err 43 $ "Error producing PDF.\n" <> logmsg + PandocXMLError fp logmsg -> err 44 $ "Invalid XML" <> + (if T.null fp then "" else " in " <> fp) <> ":\n" <> logmsg PandocFilterError filtername msg -> err 83 $ "Error running filter " <> filtername <> ":\n" <> msg PandocLuaError msg -> err 84 $ "Error running Lua:\n" <> msg diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index e19958f6a..e0a1af8e8 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -45,7 +45,9 @@ import Text.Pandoc.Definition import Text.Pandoc.Options import qualified Text.Pandoc.UTF8 as UTF8 import qualified Text.XML.Light as Xml +import Text.Pandoc.XMLParser (parseXMLElement) import qualified Data.Text as T +import qualified Data.Text.Lazy as TL import qualified Data.Text.Encoding as TE import Control.Applicative import qualified Data.Attoparsec.ByteString.Char8 as A @@ -327,7 +329,8 @@ getSize img = svgSize :: WriterOptions -> ByteString -> Maybe ImageSize svgSize opts img = do - doc <- Xml.parseXMLDoc $ UTF8.toString img + doc <- either (const mzero) return $ parseXMLElement + $ TL.fromStrict $ UTF8.toText img let viewboxSize = do vb <- Xml.findAttrBy (== Xml.QName "viewBox" Nothing Nothing) doc [_,_,w,h] <- mapM safeRead (T.words (T.pack vb)) diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index ada3e98ec..ad0108843 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -12,7 +12,7 @@ Conversion of DocBook XML to 'Pandoc' document. -} module Text.Pandoc.Readers.DocBook ( readDocBook ) where import Control.Monad.State.Strict -import Data.Char (isSpace, toUpper) +import Data.Char (isSpace, toUpper, isLetter) import Data.Default import Data.Either (rights) import Data.Foldable (asum) @@ -21,7 +21,10 @@ import Data.List (intersperse,elemIndex) import Data.Maybe (fromMaybe,mapMaybe) import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.Lazy as TL +import Control.Monad.Except (throwError) import Text.HTML.TagSoup.Entity (lookupEntity) +import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.Builder import Text.Pandoc.Class.PandocMonad (PandocMonad, report) import Text.Pandoc.Options @@ -29,6 +32,7 @@ import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces) import Text.TeXMath (readMathML, writeTeX) import Text.XML.Light +import Text.Pandoc.XMLParser (parseXMLContents) {- @@ -537,22 +541,25 @@ instance Default DBState where readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readDocBook _ inp = do - let tree = normalizeTree . parseXML . handleInstructions $ crFilter inp + tree <- either (throwError . PandocXMLError "") (return . normalizeTree) $ + parseXMLContents (TL.fromStrict . handleInstructions $ crFilter inp) (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree return $ Pandoc (dbMeta st') (toList . mconcat $ bs) --- We treat <?asciidoc-br?> specially (issue #1236), converting it --- to <br/>, since xml-light doesn't parse the instruction correctly. --- Other xml instructions are simply removed from the input stream. +-- We treat certain processing instructions by converting them to tags +-- beginning "pi-". handleInstructions :: Text -> Text -handleInstructions = T.pack . handleInstructions' . T.unpack - -handleInstructions' :: String -> String -handleInstructions' ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions' xs -handleInstructions' xs = case break (=='<') xs of - (ys, []) -> ys - ([], '<':zs) -> '<' : handleInstructions' zs - (ys, zs) -> ys ++ handleInstructions' zs +handleInstructions t = + let (x,y) = T.breakOn "<?" t + in if T.null y + then x + else + let (w,z) = T.breakOn "?>" y + in (if T.takeWhile (\c -> isLetter c || c == '-') + (T.drop 2 w) `elem` ["asciidoc-br", "dbfo"] + then x <> "<pi-" <> T.drop 2 w <> "/>" + else x <> w <> T.take 2 z) <> + handleInstructions (T.drop 2 z) getFigure :: PandocMonad m => Element -> DB m Blocks getFigure e = do @@ -892,7 +899,11 @@ parseBlock (Elem e) = "subtitle" -> return mempty -- handled in parent element _ -> skip >> getBlocks e where skip = do - lift $ report $ IgnoredElement $ T.pack $ qName (elName e) + let qn = T.pack $ qName $ elName e + let name = if "pi-" `T.isPrefixOf` qn + then "<?" <> qn <> "?>" + else qn + lift $ report $ IgnoredElement name return mempty codeBlockWithLang = do @@ -964,7 +975,7 @@ parseBlock (Elem e) = cs -> map toAlignment cs let parseWidth s = safeRead (T.filter (\x -> (x >= '0' && x <= '9') || x == '.') s) - let textWidth = case filterChild (named "?dbfo") e of + let textWidth = case filterChild (named "pi-dbfo") e of Just d -> case attrValue "table-width" d of "" -> 1.0 w -> fromMaybe 100.0 (parseWidth w) / 100.0 @@ -1165,12 +1176,15 @@ parseInline (Elem e) = "title" -> return mempty "affiliation" -> skip -- Note: this isn't a real docbook tag; it's what we convert - -- <?asciidor-br?> to in handleInstructions, above. A kludge to - -- work around xml-light's inability to parse an instruction. - "br" -> return linebreak + -- <?asciidor-br?> to in handleInstructions, above. + "pi-asciidoc-br" -> return linebreak _ -> skip >> innerInlines id where skip = do - lift $ report $ IgnoredElement $ T.pack $ qName (elName e) + let qn = T.pack $ qName $ elName e + let name = if "pi-" `T.isPrefixOf` qn + then "<?" <> qn <> "?>" + else qn + lift $ report $ IgnoredElement name return mempty innerInlines f = extractSpaces f . mconcat <$> diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index fdcffcc3f..056dab6c2 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -74,6 +74,7 @@ import Text.TeXMath.Readers.OMML (readOMML) import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, textToFont) import Text.XML.Light import qualified Text.XML.Light.Cursor as XMLC +import Text.Pandoc.XMLParser (parseXMLElement) data ReaderEnv = ReaderEnv { envNotes :: Notes , envComments :: Comments @@ -343,10 +344,16 @@ archiveToDocxWithWarnings archive = do Right doc -> Right (Docx doc, stateWarnings st) Left e -> Left e +parseXMLFromEntry :: Entry -> Maybe Element +parseXMLFromEntry entry = + case parseXMLElement (UTF8.toTextLazy (fromEntry entry)) of + Left _ -> Nothing + Right el -> Just el + getDocumentXmlPath :: Archive -> Maybe FilePath getDocumentXmlPath zf = do entry <- findEntryByPath "_rels/.rels" zf - relsElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry + relsElem <- parseXMLFromEntry entry let rels = filterChildrenName (\n -> qName n == "Relationship") relsElem rel <- find (\e -> findAttr (QName "Type" Nothing Nothing) e == Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument") @@ -362,7 +369,7 @@ archiveToDocument :: Archive -> D Document archiveToDocument zf = do docPath <- asks envDocXmlPath entry <- maybeToD $ findEntryByPath docPath zf - docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry + docElem <- maybeToD $ parseXMLFromEntry entry let namespaces = elemToNameSpaces docElem bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem let bodyElem' = fromMaybe bodyElem (walkDocument namespaces bodyElem) @@ -401,9 +408,9 @@ constructBogusParStyleData stName = ParStyle archiveToNotes :: Archive -> Notes archiveToNotes zf = let fnElem = findEntryByPath "word/footnotes.xml" zf - >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) + >>= parseXMLFromEntry enElem = findEntryByPath "word/endnotes.xml" zf - >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) + >>= parseXMLFromEntry fn_namespaces = maybe [] elemToNameSpaces fnElem en_namespaces = maybe [] elemToNameSpaces enElem ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces @@ -415,7 +422,7 @@ archiveToNotes zf = archiveToComments :: Archive -> Comments archiveToComments zf = let cmtsElem = findEntryByPath "word/comments.xml" zf - >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) + >>= parseXMLFromEntry cmts_namespaces = maybe [] elemToNameSpaces cmtsElem cmts = elemToComments cmts_namespaces <$> (cmtsElem >>= walkDocument cmts_namespaces) in @@ -445,7 +452,7 @@ filePathToRelationships :: Archive -> FilePath -> FilePath -> [Relationship] filePathToRelationships ar docXmlPath fp | Just relType <- filePathToRelType fp docXmlPath , Just entry <- findEntryByPath fp ar - , Just relElems <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry = + , Just relElems <- parseXMLFromEntry entry = mapMaybe (relElemToRelationship relType) $ elChildren relElems filePathToRelationships _ _ _ = [] @@ -527,7 +534,7 @@ archiveToNumbering' zf = case findEntryByPath "word/numbering.xml" zf of Nothing -> Just $ Numbering [] [] [] Just entry -> do - numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry + numberingElem <- parseXMLFromEntry entry let namespaces = elemToNameSpaces numberingElem numElems = findChildrenByName namespaces "w" "num" numberingElem absNumElems = findChildrenByName namespaces "w" "abstractNum" numberingElem diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs index 236167187..edade8654 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs @@ -53,6 +53,7 @@ import Data.Coerce import Text.Pandoc.Readers.Docx.Util import qualified Text.Pandoc.UTF8 as UTF8 import Text.XML.Light +import Text.Pandoc.XMLParser (parseXMLElement) newtype CharStyleId = CharStyleId T.Text deriving (Show, Eq, Ord, IsString, FromStyleId) @@ -135,19 +136,22 @@ defaultRunStyle = RunStyle { isBold = Nothing , rParentStyle = Nothing } -archiveToStyles' :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2) => - (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2) +archiveToStyles' + :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2) + => (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2) archiveToStyles' conv1 conv2 zf = - let stylesElem = findEntryByPath "word/styles.xml" zf >>= - (parseXMLDoc . UTF8.toStringLazy . fromEntry) - in - case stylesElem of - Nothing -> (M.empty, M.empty) - Just styElem -> - let namespaces = elemToNameSpaces styElem - in - ( M.fromList $ map (\r -> (conv1 r, r)) $ buildBasedOnList namespaces styElem Nothing, - M.fromList $ map (\p -> (conv2 p, p)) $ buildBasedOnList namespaces styElem Nothing) + case findEntryByPath "word/styles.xml" zf of + Nothing -> (M.empty, M.empty) + Just entry -> + case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of + Left _ -> (M.empty, M.empty) + Right styElem -> + let namespaces = elemToNameSpaces styElem + in + ( M.fromList $ map (\r -> (conv1 r, r)) $ + buildBasedOnList namespaces styElem Nothing, + M.fromList $ map (\p -> (conv2 p, p)) $ + buildBasedOnList namespaces styElem Nothing) isBasedOnStyle :: (ElemToStyle a, FromStyleId (StyleId a)) => NameSpaces -> Element -> Maybe a -> Bool isBasedOnStyle ns element parentStyle diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index 5e3326e6d..369c4f0c9 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -17,7 +17,7 @@ module Text.Pandoc.Readers.EPUB (readEPUB) where -import Codec.Archive.Zip (Archive (..), Entry, findEntryByPath, fromEntry, +import Codec.Archive.Zip (Archive (..), Entry(..), findEntryByPath, fromEntry, toArchiveOrFail) import Control.DeepSeq (NFData, deepseq) import Control.Monad (guard, liftM, liftM2, mplus) @@ -41,9 +41,10 @@ import Text.Pandoc.MIME (MimeType) import Text.Pandoc.Options (ReaderOptions (..)) import Text.Pandoc.Readers.HTML (readHtml) import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI) -import qualified Text.Pandoc.UTF8 as UTF8 (toStringLazy) +import qualified Text.Pandoc.UTF8 as UTF8 (toTextLazy) import Text.Pandoc.Walk (query, walk) import Text.XML.Light +import Text.Pandoc.XMLParser (parseXMLElement) type Items = M.Map String (FilePath, MimeType) @@ -181,7 +182,7 @@ renameMeta s = T.pack s getManifest :: PandocMonad m => Archive -> m (String, Element) getManifest archive = do metaEntry <- findEntryByPathE ("META-INF" </> "container.xml") archive - docElem <- (parseXMLDocE . UTF8.toStringLazy . fromEntry) metaEntry + docElem <- parseXMLDocE metaEntry let namespaces = mapMaybe attrToNSPair (elAttribs docElem) ns <- mkE "xmlns not in namespaces" (lookup "xmlns" namespaces) as <- fmap (map attrToPair . elAttribs) @@ -190,7 +191,7 @@ getManifest archive = do let rootdir = dropFileName manifestFile --mime <- lookup "media-type" as manifest <- findEntryByPathE manifestFile archive - (rootdir,) <$> (parseXMLDocE . UTF8.toStringLazy . fromEntry $ manifest) + (rootdir,) <$> parseXMLDocE manifest -- Fixup @@ -284,8 +285,12 @@ findEntryByPathE :: PandocMonad m => FilePath -> Archive -> m Entry findEntryByPathE (normalise . unEscapeString -> path) a = mkE ("No entry on path: " ++ path) $ findEntryByPath path a -parseXMLDocE :: PandocMonad m => String -> m Element -parseXMLDocE doc = mkE "Unable to parse XML doc" $ parseXMLDoc doc +parseXMLDocE :: PandocMonad m => Entry -> m Element +parseXMLDocE entry = + either (throwError . PandocXMLError fp) return $ parseXMLElement doc + where + doc = UTF8.toTextLazy . fromEntry $ entry + fp = T.pack $ eRelativePath entry findElementE :: PandocMonad m => QName -> Element -> m Element findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs index b0d2f092b..b804eab4f 100644 --- a/src/Text/Pandoc/Readers/FB2.hs +++ b/src/Text/Pandoc/Readers/FB2.hs @@ -32,6 +32,7 @@ import Data.List (intersperse) import qualified Data.Map as M import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.Lazy as TL import Data.Default import Data.Maybe import Text.HTML.TagSoup.Entity (lookupEntity) @@ -42,6 +43,7 @@ import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Shared (crFilter) import Text.XML.Light +import Text.Pandoc.XMLParser (parseXMLElement) type FB2 m = StateT FB2State m @@ -64,10 +66,10 @@ instance HasMeta FB2State where readFB2 :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readFB2 _ inp = - case parseXMLDoc $ crFilter inp of - Nothing -> throwError $ PandocParseError "Not an XML document" - Just e -> do - (bs, st) <- runStateT (parseRootElement e) def + case parseXMLElement $ TL.fromStrict $ crFilter inp of + Left msg -> throwError $ PandocXMLError "" msg + Right el -> do + (bs, st) <- runStateT (parseRootElement el) def let authors = if null $ fb2Authors st then id else setMeta "author" (map text $ reverse $ fb2Authors st) diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs index c638da519..dfd343b7a 100644 --- a/src/Text/Pandoc/Readers/JATS.hs +++ b/src/Text/Pandoc/Readers/JATS.hs @@ -14,6 +14,8 @@ Conversion of JATS XML to 'Pandoc' document. module Text.Pandoc.Readers.JATS ( readJATS ) where import Control.Monad.State.Strict +import Control.Monad.Except (throwError) +import Text.Pandoc.Error (PandocError(..)) import Data.Char (isDigit, isSpace, toUpper) import Data.Default import Data.Generics @@ -22,6 +24,7 @@ import qualified Data.Map as Map import Data.Maybe (maybeToList, fromMaybe) import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.Lazy as TL import Text.HTML.TagSoup.Entity (lookupEntity) import Text.Pandoc.Builder import Text.Pandoc.Class.PandocMonad (PandocMonad) @@ -29,6 +32,7 @@ import Text.Pandoc.Options import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces) import Text.TeXMath (readMathML, writeTeX) import Text.XML.Light +import Text.Pandoc.XMLParser (parseXMLContents) import qualified Data.Set as S (fromList, member) import Data.Set ((\\)) @@ -51,8 +55,9 @@ instance Default JATSState where readJATS :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readJATS _ inp = do - let tree = normalizeTree . parseXML - $ T.unpack $ crFilter inp + tree <- either (throwError . PandocXMLError "") + (return . normalizeTree) $ + parseXMLContents (TL.fromStrict $ crFilter inp) (bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree return $ Pandoc (jatsMeta st') (toList . mconcat $ bs) diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index 5b8996025..bdadc4dd9 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -19,14 +19,18 @@ import Data.Generics import Data.Maybe (fromMaybe) import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.Lazy as TL import Text.HTML.TagSoup.Entity (lookupEntity) import Text.Pandoc.Builder import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Options +import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.Readers.HTML (readHtml) import Text.Pandoc.Readers.Markdown (readMarkdown) import Text.Pandoc.Shared (crFilter, blocksToInlines') import Text.XML.Light +import Text.Pandoc.XMLParser (parseXMLContents) +import Control.Monad.Except (throwError) type OPML m = StateT OPMLState m @@ -49,8 +53,10 @@ instance Default OPMLState where readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readOPML opts inp = do (bs, st') <- runStateT - (mapM parseBlock $ normalizeTree $ - parseXML (T.unpack (crFilter inp))) def{ opmlOptions = opts } + (case parseXMLContents (TL.fromStrict (crFilter inp)) of + Left msg -> throwError $ PandocXMLError "" msg + Right ns -> mapM parseBlock $ normalizeTree ns) + def{ opmlOptions = opts } return $ setTitle (opmlDocTitle st') $ setAuthors (opmlDocAuthors st') $ diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs index 9943d3147..85308deb1 100644 --- a/src/Text/Pandoc/Readers/Odt.hs +++ b/src/Text/Pandoc/Readers/Odt.hs @@ -15,6 +15,7 @@ module Text.Pandoc.Readers.Odt ( readOdt ) where import Codec.Archive.Zip import qualified Text.XML.Light as XML +import Text.Pandoc.XMLParser (parseXMLElement) import qualified Data.ByteString.Lazy as B @@ -66,18 +67,18 @@ bytesToOdt bytes = case toArchiveOrFail bytes of -- archiveToOdt :: Archive -> Either PandocError (Pandoc, MediaBag) -archiveToOdt archive = either (Left. PandocParseError) Right $ do - let onFailure msg Nothing = Left msg +archiveToOdt archive = do + let onFailure msg Nothing = Left $ PandocParseError msg onFailure _ (Just x) = Right x contentEntry <- onFailure "Could not find content.xml" (findEntryByPath "content.xml" archive) stylesEntry <- onFailure "Could not find styles.xml" (findEntryByPath "styles.xml" archive) - contentElem <- onFailure "Could not find content element" - (entryToXmlElem contentEntry) - stylesElem <- onFailure "Could not find styles element" - (entryToXmlElem stylesEntry) - styles <- either (\_ -> Left "Could not read styles") Right + contentElem <- entryToXmlElem contentEntry + stylesElem <- entryToXmlElem stylesEntry + styles <- either + (\_ -> Left $ PandocParseError "Could not read styles") + Right (chooseMax (readStylesAt stylesElem ) (readStylesAt contentElem)) let filePathIsOdtMedia :: FilePath -> Bool filePathIsOdtMedia fp = @@ -85,10 +86,13 @@ archiveToOdt archive = either (Left. PandocParseError) Right $ do in (dir == "Pictures/") || (dir /= "./" && name == "content.xml") let media = filteredFilesFromArchive archive filePathIsOdtMedia let startState = readerState styles media - either (\_ -> Left "Could not convert opendocument") Right + either (\_ -> Left $ PandocParseError "Could not convert opendocument") Right (runConverter' read_body startState contentElem) -- -entryToXmlElem :: Entry -> Maybe XML.Element -entryToXmlElem = XML.parseXMLDoc . UTF8.toStringLazy . fromEntry +entryToXmlElem :: Entry -> Either PandocError XML.Element +entryToXmlElem entry = + case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of + Right x -> Right x + Left msg -> Left $ PandocXMLError (T.pack $ eRelativePath entry) msg diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 1f16f6772..e99fa2567 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -55,8 +55,9 @@ import Text.Pandoc.Walk (query, walk, walkM) import Text.Pandoc.Writers.HTML (writeHtmlStringForEPUB) import Text.Printf (printf) import Text.XML.Light (Attr (..), Element (..), Node (..), QName (..), - add_attrs, lookupAttr, node, onlyElems, parseXML, + add_attrs, lookupAttr, node, onlyElems, ppElement, showElement, strContent, unode, unqual) +import Text.Pandoc.XMLParser (parseXMLContents) import Text.Pandoc.XML (escapeStringForXML) import Text.DocTemplates (FromContext(lookupContext), Context(..), ToContext(toVal), Val(..)) @@ -160,7 +161,12 @@ mkEntry path content = do getEPUBMetadata :: PandocMonad m => WriterOptions -> Meta -> E m EPUBMetadata getEPUBMetadata opts meta = do let md = metadataFromMeta opts meta - let elts = maybe [] (onlyElems . parseXML) $ writerEpubMetadata opts + elts <- case writerEpubMetadata opts of + Nothing -> return [] + Just t -> case parseXMLContents (TL.fromStrict t) of + Left msg -> throwError $ + PandocXMLError "epub metadata" msg + Right ns -> return (onlyElems ns) let md' = foldr addMetadataFromXML md elts let addIdentifier m = if null (epubIdentifier m) @@ -836,7 +842,8 @@ pandocToEPUB version opts doc = do : case subs of [] -> [] (_:_) -> [unode "ol" ! [("class","toc")] $ subs] - where titElements = parseXML titRendered + where titElements = either (const []) id $ + parseXMLContents (TL.fromStrict titRendered) titRendered = case P.runPure (writeHtmlStringForEPUB version opts{ writerTemplate = Nothing diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 25b1f28d1..9334d6e9a 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -19,7 +19,7 @@ FictionBook is an XML-based e-book format. For more information see: module Text.Pandoc.Writers.FB2 (writeFB2) where import Control.Monad (zipWithM) -import Control.Monad.Except (catchError) +import Control.Monad.Except (catchError, throwError) import Control.Monad.State.Strict (StateT, evalStateT, get, gets, lift, liftM, modify) import Data.ByteString.Base64 (encode) import Data.Char (isAscii, isControl, isSpace) @@ -27,16 +27,18 @@ import Data.Either (lefts, rights) import Data.List (intercalate) import Data.Text (Text, pack) import qualified Data.Text as T +import qualified Data.Text.Lazy as TL import qualified Data.Text.Encoding as TE import Network.HTTP (urlEncode) import Text.XML.Light import qualified Text.XML.Light as X import qualified Text.XML.Light.Cursor as XC -import qualified Text.XML.Light.Input as XI +import Text.Pandoc.XMLParser (parseXMLContents) import Text.Pandoc.Class.PandocMonad (PandocMonad, report) import qualified Text.Pandoc.Class.PandocMonad as P import Text.Pandoc.Definition +import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.Logging import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..), def) import Text.Pandoc.Shared (capitalize, isURI, orderedListMarkers, @@ -307,7 +309,10 @@ blockToXml (CodeBlock _ s) = return . spaceBeforeAfter . map (el "p" . el "code" . T.unpack) . T.lines $ s blockToXml (RawBlock f str) = if f == Format "fb2" - then return $ XI.parseXML str + then + case parseXMLContents (TL.fromStrict str) of + Left msg -> throwError $ PandocXMLError "" msg + Right nds -> return nds else return [] blockToXml (Div _ bs) = cMapM blockToXml bs blockToXml (BlockQuote bs) = list . el "cite" <$> cMapM blockToXml bs diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 05dfad5eb..a32ff618c 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -13,7 +13,7 @@ Conversion of 'Pandoc' documents to ODT. -} module Text.Pandoc.Writers.ODT ( writeODT ) where import Codec.Archive.Zip -import Control.Monad.Except (catchError) +import Control.Monad.Except (catchError, throwError) import Control.Monad.State.Strict import qualified Data.ByteString.Lazy as B import Data.Generics (everywhere', mkT) @@ -27,6 +27,7 @@ import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import qualified Text.Pandoc.Class.PandocMonad as P import Text.Pandoc.Definition +import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.ImageSize import Text.Pandoc.Logging import Text.Pandoc.MIME (extensionFromMimeType, getMimeType) @@ -35,10 +36,11 @@ import Text.DocLayout import Text.Pandoc.Shared (stringify, pandocVersion, tshow) import Text.Pandoc.Writers.Shared (lookupMetaString, lookupMetaBlocks, fixDisplayMath) -import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toStringLazy) +import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toTextLazy) import Text.Pandoc.Walk import Text.Pandoc.Writers.OpenDocument (writeOpenDocument) import Text.Pandoc.XML +import Text.Pandoc.XMLParser (parseXMLElement) import Text.TeXMath import Text.XML.Light @@ -172,17 +174,18 @@ updateStyleWithLang :: PandocMonad m => Maybe Lang -> Archive -> O m Archive updateStyleWithLang Nothing arch = return arch updateStyleWithLang (Just lang) arch = do epochtime <- floor `fmap` lift P.getPOSIXTime - return arch{ zEntries = [if eRelativePath e == "styles.xml" - then case parseXMLDoc - (toStringLazy (fromEntry e)) of - Nothing -> e - Just d -> - toEntry "styles.xml" epochtime - ( fromStringLazy - . ppTopElement - . addLang lang $ d ) - else e - | e <- zEntries arch] } + entries <- mapM (\e -> if eRelativePath e == "styles.xml" + then case parseXMLElement + (toTextLazy (fromEntry e)) of + Left msg -> throwError $ + PandocXMLError "styles.xml" msg + Right d -> return $ + toEntry "styles.xml" epochtime + ( fromStringLazy + . ppTopElement + . addLang lang $ d ) + else return e) (zEntries arch) + return arch{ zEntries = entries } addLang :: Lang -> Element -> Element addLang lang = everywhere' (mkT updateLangAttr) diff --git a/src/Text/Pandoc/Writers/OOXML.hs b/src/Text/Pandoc/Writers/OOXML.hs index 3ac007f4e..8f60e70d5 100644 --- a/src/Text/Pandoc/Writers/OOXML.hs +++ b/src/Text/Pandoc/Writers/OOXML.hs @@ -35,6 +35,7 @@ import qualified Data.Text as T import Text.Pandoc.Class.PandocMonad (PandocMonad) import qualified Text.Pandoc.UTF8 as UTF8 import Text.XML.Light as XML +import Text.Pandoc.XMLParser (parseXMLElement) mknode :: Node t => String -> [(String,String)] -> t -> Element mknode s attrs = @@ -62,10 +63,10 @@ parseXml refArchive distArchive relpath = findEntryByPath relpath distArchive of Nothing -> throwError $ PandocSomeError $ T.pack relpath <> " missing in reference file" - Just e -> case parseXMLDoc . UTF8.toStringLazy . fromEntry $ e of - Nothing -> throwError $ PandocSomeError $ - T.pack relpath <> " corrupt in reference file" - Just d -> return d + Just e -> case parseXMLElement . UTF8.toTextLazy . fromEntry $ e of + Left msg -> + throwError $ PandocXMLError (T.pack relpath) msg + Right d -> return d -- Copied from Util diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs index 8554db622..cd092969b 100644 --- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs +++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs @@ -29,6 +29,7 @@ import Data.Time.Clock (UTCTime) import Data.Time.Clock.POSIX (utcTimeToPOSIXSeconds, posixSecondsToUTCTime) import System.FilePath.Posix (splitDirectories, splitExtension, takeExtension) import Text.XML.Light +import Text.Pandoc.XMLParser (parseXMLElement) import Text.Pandoc.Definition import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Class.PandocMonad (PandocMonad) @@ -77,7 +78,8 @@ getPresentationSize :: Archive -> Archive -> Maybe (Integer, Integer) getPresentationSize refArchive distArchive = do entry <- findEntryByPath "ppt/presentation.xml" refArchive `mplus` findEntryByPath "ppt/presentation.xml" distArchive - presElement <- parseXMLDoc $ UTF8.toStringLazy $ fromEntry entry + presElement <- either (const Nothing) return $ + parseXMLElement $ UTF8.toTextLazy $ fromEntry entry let ns = elemToNameSpaces presElement sldSize <- findChild (elemName ns "p" "sldSz") presElement cxS <- findAttr (QName "cx" Nothing Nothing) sldSize diff --git a/src/Text/Pandoc/XMLParser.hs b/src/Text/Pandoc/XMLParser.hs new file mode 100644 index 000000000..8ad22a66a --- /dev/null +++ b/src/Text/Pandoc/XMLParser.hs @@ -0,0 +1,66 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.XMLParser + Copyright : Copyright (C) 2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Bridge to allow using xml-conduit's parser with xml-light's types. +-} +module Text.Pandoc.XMLParser + ( parseXMLElement + , parseXMLContents + , module Text.XML.Light.Types + ) where + +import qualified Control.Exception as E +import qualified Text.XML as Conduit +import Text.XML.Unresolved (InvalidEventStream(..)) +import qualified Text.XML.Light as Light +import Text.XML.Light.Types +import qualified Data.Text as T +import qualified Data.Text.Lazy as TL +import qualified Data.Map as M +import Data.Maybe (mapMaybe) + +-- Drop in replacement for parseXMLDoc in xml-light. +parseXMLElement :: TL.Text -> Either T.Text Light.Element +parseXMLElement t = + elementToElement . Conduit.documentRoot <$> + either (Left . T.pack . E.displayException) Right + (Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t) + +parseXMLContents :: TL.Text -> Either T.Text [Light.Content] +parseXMLContents t = + case Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t of + Left e -> + case E.fromException e of + Just (ContentAfterRoot _) -> + elContent <$> parseXMLElement ("<wrapper>" <> t <> "</wrapper>") + _ -> Left . T.pack . E.displayException $ e + Right x -> Right [Light.Elem . elementToElement . Conduit.documentRoot $ x] + +elementToElement :: Conduit.Element -> Light.Element +elementToElement (Conduit.Element name attribMap nodes) = + Light.Element (nameToQname name) attrs (mapMaybe nodeToContent nodes) Nothing + where + attrs = map (\(n,v) -> Light.Attr (nameToQname n) (T.unpack v)) $ + M.toList attribMap + nameToQname (Conduit.Name localName mbns mbpref) = + case mbpref of + Nothing | "xmlns:" `T.isPrefixOf` localName -> + Light.QName (T.unpack $ T.drop 6 localName) (T.unpack <$> mbns) + (Just "xmlns") + _ -> Light.QName (T.unpack localName) (T.unpack <$> mbns) + (T.unpack <$> mbpref) + +nodeToContent :: Conduit.Node -> Maybe Light.Content +nodeToContent (Conduit.NodeElement el) = + Just (Light.Elem (elementToElement el)) +nodeToContent (Conduit.NodeContent t) = + Just (Light.Text (Light.CData Light.CDataText (T.unpack t) Nothing)) +nodeToContent _ = Nothing + diff --git a/test/Tests/Readers/JATS.hs b/test/Tests/Readers/JATS.hs index 525499c86..a9c9a0586 100644 --- a/test/Tests/Readers/JATS.hs +++ b/test/Tests/Readers/JATS.hs @@ -88,6 +88,7 @@ tests = [ testGroup "inline code" "<p>\n\ \ <inline-formula><alternatives>\n\ \ <tex-math><![CDATA[\\sigma|_{\\{x\\}}]]></tex-math>\n\ + \ </alternatives></inline-formula>\n\ \</p>" =?> para (math "\\sigma|_{\\{x\\}}") , test jats "math ml only" $ diff --git a/test/command/5321.md b/test/command/5321.md index 081abe2a0..83404632a 100644 --- a/test/command/5321.md +++ b/test/command/5321.md @@ -4,7 +4,7 @@ <caption> <p>bar</p> </caption> - <graphic xlink:href="foo.png" xlink:alt-text="baz"> + <graphic xlink:href="foo.png" xlink:alt-text="baz" /> </fig> ^D [Para [Image ("fig-1",[],[]) [Str "bar"] ("foo.png","fig:")]] @@ -17,7 +17,7 @@ <title>foo</title> <p>bar</p> </caption> - <graphic xlink:href="foo.png" xlink:alt-text="baz"> + <graphic xlink:href="foo.png" xlink:alt-text="baz" /> </fig> ^D [Para [Image ("fig-1",[],[]) [Str "foo",LineBreak,Str "bar"] ("foo.png","fig:")]] diff --git a/test/docbook-reader.docbook b/test/docbook-reader.docbook index 02568d8de..5717d78d0 100644 --- a/test/docbook-reader.docbook +++ b/test/docbook-reader.docbook @@ -1,6 +1,11 @@ <?xml version="1.0" encoding="utf-8" ?> <!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN" - "http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd"> +"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd" +[ +<!ENTITY GHC "GHC" > +<!ENTITY let "LET" > +<!ENTITY case "CASE" > +]> <article> <articleinfo> <title>Pandoc Test Suite</title> diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx Binary files differindex 3e1bf16e7..d3b16d0f2 100644 --- a/test/docx/golden/block_quotes.docx +++ b/test/docx/golden/block_quotes.docx diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx Binary files differindex 66f055063..6293ef493 100644 --- a/test/docx/golden/codeblock.docx +++ b/test/docx/golden/codeblock.docx diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx Binary files differindex fb3a02a0a..4205a1516 100644 --- a/test/docx/golden/comments.docx +++ b/test/docx/golden/comments.docx diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx Binary files differindex bc6c2702a..adb3f23db 100644 --- a/test/docx/golden/custom_style_no_reference.docx +++ b/test/docx/golden/custom_style_no_reference.docx diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx Binary files differindex 8c555a5bd..92c8137fe 100644 --- a/test/docx/golden/custom_style_preserve.docx +++ b/test/docx/golden/custom_style_preserve.docx diff --git a/test/docx/golden/custom_style_reference.docx b/test/docx/golden/custom_style_reference.docx Binary files differindex 5f96cc911..f53470617 100644 --- a/test/docx/golden/custom_style_reference.docx +++ b/test/docx/golden/custom_style_reference.docx diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx Binary files differindex c21b3a5b3..d6af90a72 100644 --- a/test/docx/golden/definition_list.docx +++ b/test/docx/golden/definition_list.docx diff --git a/test/docx/golden/document-properties-short-desc.docx b/test/docx/golden/document-properties-short-desc.docx Binary files differindex 92ce144e9..e18dbe853 100644 --- a/test/docx/golden/document-properties-short-desc.docx +++ b/test/docx/golden/document-properties-short-desc.docx diff --git a/test/docx/golden/document-properties.docx b/test/docx/golden/document-properties.docx Binary files differindex d21b67309..820299043 100644 --- a/test/docx/golden/document-properties.docx +++ b/test/docx/golden/document-properties.docx diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx Binary files differindex 3558a47bf..ae0f41d12 100644 --- a/test/docx/golden/headers.docx +++ b/test/docx/golden/headers.docx diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx Binary files differindex 606df92a3..94cd35dfa 100644 --- a/test/docx/golden/image.docx +++ b/test/docx/golden/image.docx diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx Binary files differindex 759269cac..879f2a25b 100644 --- a/test/docx/golden/inline_code.docx +++ b/test/docx/golden/inline_code.docx diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx Binary files differindex c37777080..93f86478f 100644 --- a/test/docx/golden/inline_formatting.docx +++ b/test/docx/golden/inline_formatting.docx diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx Binary files differindex 9450b1a73..967d297f2 100644 --- a/test/docx/golden/inline_images.docx +++ b/test/docx/golden/inline_images.docx diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx Binary files differindex 6f0b830e6..c5614e2fa 100644 --- a/test/docx/golden/link_in_notes.docx +++ b/test/docx/golden/link_in_notes.docx diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx Binary files differindex e53889cfb..0f39a831f 100644 --- a/test/docx/golden/links.docx +++ b/test/docx/golden/links.docx diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx Binary files differindex 5dbe298b7..07046f223 100644 --- a/test/docx/golden/lists.docx +++ b/test/docx/golden/lists.docx diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx Binary files differindex 194181288..3656618e6 100644 --- a/test/docx/golden/lists_continuing.docx +++ b/test/docx/golden/lists_continuing.docx diff --git a/test/docx/golden/lists_multiple_initial.docx b/test/docx/golden/lists_multiple_initial.docx Binary files differindex 6e0b634f7..8798253d5 100644 --- a/test/docx/golden/lists_multiple_initial.docx +++ b/test/docx/golden/lists_multiple_initial.docx diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx Binary files differindex 477178e77..0a24d1840 100644 --- a/test/docx/golden/lists_restarting.docx +++ b/test/docx/golden/lists_restarting.docx diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx Binary files differindex 51110356e..52bb7a217 100644 --- a/test/docx/golden/nested_anchors_in_header.docx +++ b/test/docx/golden/nested_anchors_in_header.docx diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx Binary files differindex b6206cdf5..182c06c64 100644 --- a/test/docx/golden/notes.docx +++ b/test/docx/golden/notes.docx diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx Binary files differindex 07b576080..7b69a56a3 100644 --- a/test/docx/golden/raw-blocks.docx +++ b/test/docx/golden/raw-blocks.docx diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx Binary files differindex d46095eb7..3d3a35701 100644 --- a/test/docx/golden/raw-bookmarks.docx +++ b/test/docx/golden/raw-bookmarks.docx diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx Binary files differindex 7caba4e93..5ae37b406 100644 --- a/test/docx/golden/table_one_row.docx +++ b/test/docx/golden/table_one_row.docx diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx Binary files differindex 6aaa6da61..c29aa6716 100644 --- a/test/docx/golden/table_with_list_cell.docx +++ b/test/docx/golden/table_with_list_cell.docx diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx Binary files differindex 5746c5ad0..664493246 100644 --- a/test/docx/golden/tables.docx +++ b/test/docx/golden/tables.docx diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx Binary files differindex 5f22dccc6..b6d15340e 100644 --- a/test/docx/golden/track_changes_deletion.docx +++ b/test/docx/golden/track_changes_deletion.docx diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx Binary files differindex ab5c4f56d..f8e1092d2 100644 --- a/test/docx/golden/track_changes_insertion.docx +++ b/test/docx/golden/track_changes_insertion.docx diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx Binary files differindex 085f33162..b4cda82f2 100644 --- a/test/docx/golden/track_changes_move.docx +++ b/test/docx/golden/track_changes_move.docx diff --git a/test/docx/golden/track_changes_scrubbed_metadata.docx b/test/docx/golden/track_changes_scrubbed_metadata.docx Binary files differindex 1ac86d5c8..ee222efa0 100644 --- a/test/docx/golden/track_changes_scrubbed_metadata.docx +++ b/test/docx/golden/track_changes_scrubbed_metadata.docx diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx Binary files differindex c2c443b19..c6f8d9c96 100644 --- a/test/docx/golden/unicode.docx +++ b/test/docx/golden/unicode.docx diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx Binary files differindex 5ea18d32e..ea8146690 100644 --- a/test/docx/golden/verbatim_subsuper.docx +++ b/test/docx/golden/verbatim_subsuper.docx diff --git a/test/jats-reader.native b/test/jats-reader.native index ab77dd1a0..0715ea8cc 100644 --- a/test/jats-reader.native +++ b/test/jats-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"]]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) +Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber's",Space,Str "markdown",Space,Str "test",Space,Str "suite."] ,Header 1 ("headers",[],[]) [Str "Headers"] ,Header 2 ("level-2-with-an-embedded-link",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",SoftBreak,Link ("",[],[]) [Str "embedded",SoftBreak,Str "link"] ("/url","")] diff --git a/test/jats-reader.xml b/test/jats-reader.xml index f75b3e95a..f33cb9ab3 100644 --- a/test/jats-reader.xml +++ b/test/jats-reader.xml @@ -20,6 +20,7 @@ <surname>MacFarlane</surname> <given-names>John</given-names> </name> + </contrib> <contrib contrib-type="author"> <name> <surname>Anonymous</surname> diff --git a/test/pptx/code-custom.pptx b/test/pptx/code-custom.pptx Binary files differindex aa9b7692a..58070eb3f 100644 --- a/test/pptx/code-custom.pptx +++ b/test/pptx/code-custom.pptx diff --git a/test/pptx/code-custom_templated.pptx b/test/pptx/code-custom_templated.pptx Binary files differindex 9aaef4cb5..db9b7e371 100644 --- a/test/pptx/code-custom_templated.pptx +++ b/test/pptx/code-custom_templated.pptx diff --git a/test/pptx/code.pptx b/test/pptx/code.pptx Binary files differindex 1737ec757..c7b1ed7d5 100644 --- a/test/pptx/code.pptx +++ b/test/pptx/code.pptx diff --git a/test/pptx/code_templated.pptx b/test/pptx/code_templated.pptx Binary files differindex 87fb560ef..6944d92bf 100644 --- a/test/pptx/code_templated.pptx +++ b/test/pptx/code_templated.pptx diff --git a/test/pptx/document-properties-short-desc.pptx b/test/pptx/document-properties-short-desc.pptx Binary files differindex 961c31020..ae0d28429 100644 --- a/test/pptx/document-properties-short-desc.pptx +++ b/test/pptx/document-properties-short-desc.pptx diff --git a/test/pptx/document-properties-short-desc_templated.pptx b/test/pptx/document-properties-short-desc_templated.pptx Binary files differindex 894738ef7..37c74c69a 100644 --- a/test/pptx/document-properties-short-desc_templated.pptx +++ b/test/pptx/document-properties-short-desc_templated.pptx diff --git a/test/pptx/document-properties.pptx b/test/pptx/document-properties.pptx Binary files differindex 188e8d826..324e443a1 100644 --- a/test/pptx/document-properties.pptx +++ b/test/pptx/document-properties.pptx diff --git a/test/pptx/document-properties_templated.pptx b/test/pptx/document-properties_templated.pptx Binary files differindex 253e8c0a7..c81b983e3 100644 --- a/test/pptx/document-properties_templated.pptx +++ b/test/pptx/document-properties_templated.pptx diff --git a/test/pptx/endnotes.pptx b/test/pptx/endnotes.pptx Binary files differindex e230420d2..30ce33db6 100644 --- a/test/pptx/endnotes.pptx +++ b/test/pptx/endnotes.pptx diff --git a/test/pptx/endnotes_templated.pptx b/test/pptx/endnotes_templated.pptx Binary files differindex 49384fd65..d6c604968 100644 --- a/test/pptx/endnotes_templated.pptx +++ b/test/pptx/endnotes_templated.pptx diff --git a/test/pptx/endnotes_toc.pptx b/test/pptx/endnotes_toc.pptx Binary files differindex cdf1be4ad..000e17ecd 100644 --- a/test/pptx/endnotes_toc.pptx +++ b/test/pptx/endnotes_toc.pptx diff --git a/test/pptx/endnotes_toc_templated.pptx b/test/pptx/endnotes_toc_templated.pptx Binary files differindex c4fcbad45..fdcd2e29b 100644 --- a/test/pptx/endnotes_toc_templated.pptx +++ b/test/pptx/endnotes_toc_templated.pptx diff --git a/test/pptx/images.pptx b/test/pptx/images.pptx Binary files differindex 4a13b5b7f..e73126376 100644 --- a/test/pptx/images.pptx +++ b/test/pptx/images.pptx diff --git a/test/pptx/images_templated.pptx b/test/pptx/images_templated.pptx Binary files differindex 7a6e9700e..e3f968e9e 100644 --- a/test/pptx/images_templated.pptx +++ b/test/pptx/images_templated.pptx diff --git a/test/pptx/inline_formatting.pptx b/test/pptx/inline_formatting.pptx Binary files differindex 926c8ff3f..eadb9372e 100644 --- a/test/pptx/inline_formatting.pptx +++ b/test/pptx/inline_formatting.pptx diff --git a/test/pptx/inline_formatting_templated.pptx b/test/pptx/inline_formatting_templated.pptx Binary files differindex 16f48e182..8ca6bab2b 100644 --- a/test/pptx/inline_formatting_templated.pptx +++ b/test/pptx/inline_formatting_templated.pptx diff --git a/test/pptx/lists.pptx b/test/pptx/lists.pptx Binary files differindex f47b17a74..ae188ee68 100644 --- a/test/pptx/lists.pptx +++ b/test/pptx/lists.pptx diff --git a/test/pptx/lists_templated.pptx b/test/pptx/lists_templated.pptx Binary files differindex 88109a95e..60301fa50 100644 --- a/test/pptx/lists_templated.pptx +++ b/test/pptx/lists_templated.pptx diff --git a/test/pptx/raw_ooxml.pptx b/test/pptx/raw_ooxml.pptx Binary files differindex 84020708f..17124a50d 100644 --- a/test/pptx/raw_ooxml.pptx +++ b/test/pptx/raw_ooxml.pptx diff --git a/test/pptx/raw_ooxml_templated.pptx b/test/pptx/raw_ooxml_templated.pptx Binary files differindex a2f77e945..19ae7dd4e 100644 --- a/test/pptx/raw_ooxml_templated.pptx +++ b/test/pptx/raw_ooxml_templated.pptx diff --git a/test/pptx/remove_empty_slides.pptx b/test/pptx/remove_empty_slides.pptx Binary files differindex 48bf7bc8a..b650b7585 100644 --- a/test/pptx/remove_empty_slides.pptx +++ b/test/pptx/remove_empty_slides.pptx diff --git a/test/pptx/remove_empty_slides_templated.pptx b/test/pptx/remove_empty_slides_templated.pptx Binary files differindex 23b134a5f..0ab029614 100644 --- a/test/pptx/remove_empty_slides_templated.pptx +++ b/test/pptx/remove_empty_slides_templated.pptx diff --git a/test/pptx/slide_breaks.pptx b/test/pptx/slide_breaks.pptx Binary files differindex d6eebeffb..2a6e35080 100644 --- a/test/pptx/slide_breaks.pptx +++ b/test/pptx/slide_breaks.pptx diff --git a/test/pptx/slide_breaks_slide_level_1.pptx b/test/pptx/slide_breaks_slide_level_1.pptx Binary files differindex a6c76a187..a7bcf6a4b 100644 --- a/test/pptx/slide_breaks_slide_level_1.pptx +++ b/test/pptx/slide_breaks_slide_level_1.pptx diff --git a/test/pptx/slide_breaks_slide_level_1_templated.pptx b/test/pptx/slide_breaks_slide_level_1_templated.pptx Binary files differindex 1fbde815b..21b018c25 100644 --- a/test/pptx/slide_breaks_slide_level_1_templated.pptx +++ b/test/pptx/slide_breaks_slide_level_1_templated.pptx diff --git a/test/pptx/slide_breaks_templated.pptx b/test/pptx/slide_breaks_templated.pptx Binary files differindex cb3af4aa1..4ec4772a4 100644 --- a/test/pptx/slide_breaks_templated.pptx +++ b/test/pptx/slide_breaks_templated.pptx diff --git a/test/pptx/slide_breaks_toc.pptx b/test/pptx/slide_breaks_toc.pptx Binary files differindex dff386885..5983657b6 100644 --- a/test/pptx/slide_breaks_toc.pptx +++ b/test/pptx/slide_breaks_toc.pptx diff --git a/test/pptx/slide_breaks_toc_templated.pptx b/test/pptx/slide_breaks_toc_templated.pptx Binary files differindex 43b125f5e..dd54c7082 100644 --- a/test/pptx/slide_breaks_toc_templated.pptx +++ b/test/pptx/slide_breaks_toc_templated.pptx diff --git a/test/pptx/speaker_notes.pptx b/test/pptx/speaker_notes.pptx Binary files differindex 3314a1c65..b3e5ed5b9 100644 --- a/test/pptx/speaker_notes.pptx +++ b/test/pptx/speaker_notes.pptx diff --git a/test/pptx/speaker_notes_after_metadata.pptx b/test/pptx/speaker_notes_after_metadata.pptx Binary files differindex 27a136838..1078854bb 100644 --- a/test/pptx/speaker_notes_after_metadata.pptx +++ b/test/pptx/speaker_notes_after_metadata.pptx diff --git a/test/pptx/speaker_notes_after_metadata_templated.pptx b/test/pptx/speaker_notes_after_metadata_templated.pptx Binary files differindex 7aa3b6a87..5116c6c4e 100644 --- a/test/pptx/speaker_notes_after_metadata_templated.pptx +++ b/test/pptx/speaker_notes_after_metadata_templated.pptx diff --git a/test/pptx/speaker_notes_afterheader.pptx b/test/pptx/speaker_notes_afterheader.pptx Binary files differindex d43709ca7..0c8e49bd9 100644 --- a/test/pptx/speaker_notes_afterheader.pptx +++ b/test/pptx/speaker_notes_afterheader.pptx diff --git a/test/pptx/speaker_notes_afterheader_templated.pptx b/test/pptx/speaker_notes_afterheader_templated.pptx Binary files differindex 793ea10f6..68695939d 100644 --- a/test/pptx/speaker_notes_afterheader_templated.pptx +++ b/test/pptx/speaker_notes_afterheader_templated.pptx diff --git a/test/pptx/speaker_notes_afterseps.pptx b/test/pptx/speaker_notes_afterseps.pptx Binary files differindex 2f4d3b820..7ed9b946d 100644 --- a/test/pptx/speaker_notes_afterseps.pptx +++ b/test/pptx/speaker_notes_afterseps.pptx diff --git a/test/pptx/speaker_notes_afterseps_templated.pptx b/test/pptx/speaker_notes_afterseps_templated.pptx Binary files differindex 94a221398..79fc82345 100644 --- a/test/pptx/speaker_notes_afterseps_templated.pptx +++ b/test/pptx/speaker_notes_afterseps_templated.pptx diff --git a/test/pptx/speaker_notes_templated.pptx b/test/pptx/speaker_notes_templated.pptx Binary files differindex 22040c88c..9f943c279 100644 --- a/test/pptx/speaker_notes_templated.pptx +++ b/test/pptx/speaker_notes_templated.pptx diff --git a/test/pptx/start_numbering_at.pptx b/test/pptx/start_numbering_at.pptx Binary files differindex 18477380b..ac72d8ced 100644 --- a/test/pptx/start_numbering_at.pptx +++ b/test/pptx/start_numbering_at.pptx diff --git a/test/pptx/start_numbering_at_templated.pptx b/test/pptx/start_numbering_at_templated.pptx Binary files differindex 4b9d0ba4d..15c7b5469 100644 --- a/test/pptx/start_numbering_at_templated.pptx +++ b/test/pptx/start_numbering_at_templated.pptx diff --git a/test/pptx/tables.pptx b/test/pptx/tables.pptx Binary files differindex 1c5b54185..926c5e699 100644 --- a/test/pptx/tables.pptx +++ b/test/pptx/tables.pptx diff --git a/test/pptx/tables_templated.pptx b/test/pptx/tables_templated.pptx Binary files differindex 1314f4de4..a37e72d2c 100644 --- a/test/pptx/tables_templated.pptx +++ b/test/pptx/tables_templated.pptx diff --git a/test/pptx/two_column.pptx b/test/pptx/two_column.pptx Binary files differindex 9018be36e..7f86533fe 100644 --- a/test/pptx/two_column.pptx +++ b/test/pptx/two_column.pptx diff --git a/test/pptx/two_column_templated.pptx b/test/pptx/two_column_templated.pptx Binary files differindex 35e93af67..89e3db0ab 100644 --- a/test/pptx/two_column_templated.pptx +++ b/test/pptx/two_column_templated.pptx |