diff options
author | John MacFarlane <jgm@berkeley.edu> | 2021-02-08 23:35:19 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2021-02-10 22:04:11 -0800 |
commit | 8ca191604dcd13af27c11d2da225da646ebce6fc (patch) | |
tree | 9663e0b951ecfce7efd08efd79dcd4b957601b85 /src/Text/Pandoc/Readers | |
parent | 9994ad977d03e97baadf680793c58a66ba7e77e9 (diff) | |
download | pandoc-8ca191604dcd13af27c11d2da225da646ebce6fc.tar.gz |
Add new unexported module T.P.XMLParser.
This exports functions that uses xml-conduit's parser to
produce an xml-light Element or [Content]. This allows
existing pandoc code to use a better parser without
much modification.
The new parser is used in all places where xml-light's
parser was previously used. Benchmarks show a significant
performance improvement in parsing XML-based formats
(especially ODT and FB2).
Note that the xml-light types use String, so the
conversion from xml-conduit types involves a lot
of extra allocation. It would be desirable to
avoid that in the future by gradually switching
to using xml-conduit directly. This can be done
module by module.
The new parser also reports errors, which we report
when possible.
A new constructor PandocXMLError has been added to
PandocError in T.P.Error [API change].
Closes #7091, which was the main stimulus.
These changes revealed the need for some changes
in the tests. The docbook-reader.docbook test
lacked definitions for the entities it used; these
have been added. And the docx golden tests have been
updated, because the new parser does not preserve
the order of attributes.
Add entity defs to docbook-reader.docbook.
Update golden tests for docx.
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r-- | src/Text/Pandoc/Readers/DocBook.hs | 52 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse.hs | 21 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse/Styles.hs | 28 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/EPUB.hs | 17 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/FB2.hs | 10 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/JATS.hs | 9 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/OPML.hs | 10 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Odt.hs | 24 |
8 files changed, 109 insertions, 62 deletions
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index ada3e98ec..ad0108843 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -12,7 +12,7 @@ Conversion of DocBook XML to 'Pandoc' document. -} module Text.Pandoc.Readers.DocBook ( readDocBook ) where import Control.Monad.State.Strict -import Data.Char (isSpace, toUpper) +import Data.Char (isSpace, toUpper, isLetter) import Data.Default import Data.Either (rights) import Data.Foldable (asum) @@ -21,7 +21,10 @@ import Data.List (intersperse,elemIndex) import Data.Maybe (fromMaybe,mapMaybe) import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.Lazy as TL +import Control.Monad.Except (throwError) import Text.HTML.TagSoup.Entity (lookupEntity) +import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.Builder import Text.Pandoc.Class.PandocMonad (PandocMonad, report) import Text.Pandoc.Options @@ -29,6 +32,7 @@ import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces) import Text.TeXMath (readMathML, writeTeX) import Text.XML.Light +import Text.Pandoc.XMLParser (parseXMLContents) {- @@ -537,22 +541,25 @@ instance Default DBState where readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readDocBook _ inp = do - let tree = normalizeTree . parseXML . handleInstructions $ crFilter inp + tree <- either (throwError . PandocXMLError "") (return . normalizeTree) $ + parseXMLContents (TL.fromStrict . handleInstructions $ crFilter inp) (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree return $ Pandoc (dbMeta st') (toList . mconcat $ bs) --- We treat <?asciidoc-br?> specially (issue #1236), converting it --- to <br/>, since xml-light doesn't parse the instruction correctly. --- Other xml instructions are simply removed from the input stream. +-- We treat certain processing instructions by converting them to tags +-- beginning "pi-". handleInstructions :: Text -> Text -handleInstructions = T.pack . handleInstructions' . T.unpack - -handleInstructions' :: String -> String -handleInstructions' ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions' xs -handleInstructions' xs = case break (=='<') xs of - (ys, []) -> ys - ([], '<':zs) -> '<' : handleInstructions' zs - (ys, zs) -> ys ++ handleInstructions' zs +handleInstructions t = + let (x,y) = T.breakOn "<?" t + in if T.null y + then x + else + let (w,z) = T.breakOn "?>" y + in (if T.takeWhile (\c -> isLetter c || c == '-') + (T.drop 2 w) `elem` ["asciidoc-br", "dbfo"] + then x <> "<pi-" <> T.drop 2 w <> "/>" + else x <> w <> T.take 2 z) <> + handleInstructions (T.drop 2 z) getFigure :: PandocMonad m => Element -> DB m Blocks getFigure e = do @@ -892,7 +899,11 @@ parseBlock (Elem e) = "subtitle" -> return mempty -- handled in parent element _ -> skip >> getBlocks e where skip = do - lift $ report $ IgnoredElement $ T.pack $ qName (elName e) + let qn = T.pack $ qName $ elName e + let name = if "pi-" `T.isPrefixOf` qn + then "<?" <> qn <> "?>" + else qn + lift $ report $ IgnoredElement name return mempty codeBlockWithLang = do @@ -964,7 +975,7 @@ parseBlock (Elem e) = cs -> map toAlignment cs let parseWidth s = safeRead (T.filter (\x -> (x >= '0' && x <= '9') || x == '.') s) - let textWidth = case filterChild (named "?dbfo") e of + let textWidth = case filterChild (named "pi-dbfo") e of Just d -> case attrValue "table-width" d of "" -> 1.0 w -> fromMaybe 100.0 (parseWidth w) / 100.0 @@ -1165,12 +1176,15 @@ parseInline (Elem e) = "title" -> return mempty "affiliation" -> skip -- Note: this isn't a real docbook tag; it's what we convert - -- <?asciidor-br?> to in handleInstructions, above. A kludge to - -- work around xml-light's inability to parse an instruction. - "br" -> return linebreak + -- <?asciidor-br?> to in handleInstructions, above. + "pi-asciidoc-br" -> return linebreak _ -> skip >> innerInlines id where skip = do - lift $ report $ IgnoredElement $ T.pack $ qName (elName e) + let qn = T.pack $ qName $ elName e + let name = if "pi-" `T.isPrefixOf` qn + then "<?" <> qn <> "?>" + else qn + lift $ report $ IgnoredElement name return mempty innerInlines f = extractSpaces f . mconcat <$> diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index fdcffcc3f..056dab6c2 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -74,6 +74,7 @@ import Text.TeXMath.Readers.OMML (readOMML) import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, textToFont) import Text.XML.Light import qualified Text.XML.Light.Cursor as XMLC +import Text.Pandoc.XMLParser (parseXMLElement) data ReaderEnv = ReaderEnv { envNotes :: Notes , envComments :: Comments @@ -343,10 +344,16 @@ archiveToDocxWithWarnings archive = do Right doc -> Right (Docx doc, stateWarnings st) Left e -> Left e +parseXMLFromEntry :: Entry -> Maybe Element +parseXMLFromEntry entry = + case parseXMLElement (UTF8.toTextLazy (fromEntry entry)) of + Left _ -> Nothing + Right el -> Just el + getDocumentXmlPath :: Archive -> Maybe FilePath getDocumentXmlPath zf = do entry <- findEntryByPath "_rels/.rels" zf - relsElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry + relsElem <- parseXMLFromEntry entry let rels = filterChildrenName (\n -> qName n == "Relationship") relsElem rel <- find (\e -> findAttr (QName "Type" Nothing Nothing) e == Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument") @@ -362,7 +369,7 @@ archiveToDocument :: Archive -> D Document archiveToDocument zf = do docPath <- asks envDocXmlPath entry <- maybeToD $ findEntryByPath docPath zf - docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry + docElem <- maybeToD $ parseXMLFromEntry entry let namespaces = elemToNameSpaces docElem bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem let bodyElem' = fromMaybe bodyElem (walkDocument namespaces bodyElem) @@ -401,9 +408,9 @@ constructBogusParStyleData stName = ParStyle archiveToNotes :: Archive -> Notes archiveToNotes zf = let fnElem = findEntryByPath "word/footnotes.xml" zf - >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) + >>= parseXMLFromEntry enElem = findEntryByPath "word/endnotes.xml" zf - >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) + >>= parseXMLFromEntry fn_namespaces = maybe [] elemToNameSpaces fnElem en_namespaces = maybe [] elemToNameSpaces enElem ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces @@ -415,7 +422,7 @@ archiveToNotes zf = archiveToComments :: Archive -> Comments archiveToComments zf = let cmtsElem = findEntryByPath "word/comments.xml" zf - >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) + >>= parseXMLFromEntry cmts_namespaces = maybe [] elemToNameSpaces cmtsElem cmts = elemToComments cmts_namespaces <$> (cmtsElem >>= walkDocument cmts_namespaces) in @@ -445,7 +452,7 @@ filePathToRelationships :: Archive -> FilePath -> FilePath -> [Relationship] filePathToRelationships ar docXmlPath fp | Just relType <- filePathToRelType fp docXmlPath , Just entry <- findEntryByPath fp ar - , Just relElems <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry = + , Just relElems <- parseXMLFromEntry entry = mapMaybe (relElemToRelationship relType) $ elChildren relElems filePathToRelationships _ _ _ = [] @@ -527,7 +534,7 @@ archiveToNumbering' zf = case findEntryByPath "word/numbering.xml" zf of Nothing -> Just $ Numbering [] [] [] Just entry -> do - numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry + numberingElem <- parseXMLFromEntry entry let namespaces = elemToNameSpaces numberingElem numElems = findChildrenByName namespaces "w" "num" numberingElem absNumElems = findChildrenByName namespaces "w" "abstractNum" numberingElem diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs index 236167187..edade8654 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs @@ -53,6 +53,7 @@ import Data.Coerce import Text.Pandoc.Readers.Docx.Util import qualified Text.Pandoc.UTF8 as UTF8 import Text.XML.Light +import Text.Pandoc.XMLParser (parseXMLElement) newtype CharStyleId = CharStyleId T.Text deriving (Show, Eq, Ord, IsString, FromStyleId) @@ -135,19 +136,22 @@ defaultRunStyle = RunStyle { isBold = Nothing , rParentStyle = Nothing } -archiveToStyles' :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2) => - (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2) +archiveToStyles' + :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2) + => (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2) archiveToStyles' conv1 conv2 zf = - let stylesElem = findEntryByPath "word/styles.xml" zf >>= - (parseXMLDoc . UTF8.toStringLazy . fromEntry) - in - case stylesElem of - Nothing -> (M.empty, M.empty) - Just styElem -> - let namespaces = elemToNameSpaces styElem - in - ( M.fromList $ map (\r -> (conv1 r, r)) $ buildBasedOnList namespaces styElem Nothing, - M.fromList $ map (\p -> (conv2 p, p)) $ buildBasedOnList namespaces styElem Nothing) + case findEntryByPath "word/styles.xml" zf of + Nothing -> (M.empty, M.empty) + Just entry -> + case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of + Left _ -> (M.empty, M.empty) + Right styElem -> + let namespaces = elemToNameSpaces styElem + in + ( M.fromList $ map (\r -> (conv1 r, r)) $ + buildBasedOnList namespaces styElem Nothing, + M.fromList $ map (\p -> (conv2 p, p)) $ + buildBasedOnList namespaces styElem Nothing) isBasedOnStyle :: (ElemToStyle a, FromStyleId (StyleId a)) => NameSpaces -> Element -> Maybe a -> Bool isBasedOnStyle ns element parentStyle diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index 5e3326e6d..369c4f0c9 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -17,7 +17,7 @@ module Text.Pandoc.Readers.EPUB (readEPUB) where -import Codec.Archive.Zip (Archive (..), Entry, findEntryByPath, fromEntry, +import Codec.Archive.Zip (Archive (..), Entry(..), findEntryByPath, fromEntry, toArchiveOrFail) import Control.DeepSeq (NFData, deepseq) import Control.Monad (guard, liftM, liftM2, mplus) @@ -41,9 +41,10 @@ import Text.Pandoc.MIME (MimeType) import Text.Pandoc.Options (ReaderOptions (..)) import Text.Pandoc.Readers.HTML (readHtml) import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI) -import qualified Text.Pandoc.UTF8 as UTF8 (toStringLazy) +import qualified Text.Pandoc.UTF8 as UTF8 (toTextLazy) import Text.Pandoc.Walk (query, walk) import Text.XML.Light +import Text.Pandoc.XMLParser (parseXMLElement) type Items = M.Map String (FilePath, MimeType) @@ -181,7 +182,7 @@ renameMeta s = T.pack s getManifest :: PandocMonad m => Archive -> m (String, Element) getManifest archive = do metaEntry <- findEntryByPathE ("META-INF" </> "container.xml") archive - docElem <- (parseXMLDocE . UTF8.toStringLazy . fromEntry) metaEntry + docElem <- parseXMLDocE metaEntry let namespaces = mapMaybe attrToNSPair (elAttribs docElem) ns <- mkE "xmlns not in namespaces" (lookup "xmlns" namespaces) as <- fmap (map attrToPair . elAttribs) @@ -190,7 +191,7 @@ getManifest archive = do let rootdir = dropFileName manifestFile --mime <- lookup "media-type" as manifest <- findEntryByPathE manifestFile archive - (rootdir,) <$> (parseXMLDocE . UTF8.toStringLazy . fromEntry $ manifest) + (rootdir,) <$> parseXMLDocE manifest -- Fixup @@ -284,8 +285,12 @@ findEntryByPathE :: PandocMonad m => FilePath -> Archive -> m Entry findEntryByPathE (normalise . unEscapeString -> path) a = mkE ("No entry on path: " ++ path) $ findEntryByPath path a -parseXMLDocE :: PandocMonad m => String -> m Element -parseXMLDocE doc = mkE "Unable to parse XML doc" $ parseXMLDoc doc +parseXMLDocE :: PandocMonad m => Entry -> m Element +parseXMLDocE entry = + either (throwError . PandocXMLError fp) return $ parseXMLElement doc + where + doc = UTF8.toTextLazy . fromEntry $ entry + fp = T.pack $ eRelativePath entry findElementE :: PandocMonad m => QName -> Element -> m Element findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs index b0d2f092b..b804eab4f 100644 --- a/src/Text/Pandoc/Readers/FB2.hs +++ b/src/Text/Pandoc/Readers/FB2.hs @@ -32,6 +32,7 @@ import Data.List (intersperse) import qualified Data.Map as M import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.Lazy as TL import Data.Default import Data.Maybe import Text.HTML.TagSoup.Entity (lookupEntity) @@ -42,6 +43,7 @@ import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Shared (crFilter) import Text.XML.Light +import Text.Pandoc.XMLParser (parseXMLElement) type FB2 m = StateT FB2State m @@ -64,10 +66,10 @@ instance HasMeta FB2State where readFB2 :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readFB2 _ inp = - case parseXMLDoc $ crFilter inp of - Nothing -> throwError $ PandocParseError "Not an XML document" - Just e -> do - (bs, st) <- runStateT (parseRootElement e) def + case parseXMLElement $ TL.fromStrict $ crFilter inp of + Left msg -> throwError $ PandocXMLError "" msg + Right el -> do + (bs, st) <- runStateT (parseRootElement el) def let authors = if null $ fb2Authors st then id else setMeta "author" (map text $ reverse $ fb2Authors st) diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs index c638da519..dfd343b7a 100644 --- a/src/Text/Pandoc/Readers/JATS.hs +++ b/src/Text/Pandoc/Readers/JATS.hs @@ -14,6 +14,8 @@ Conversion of JATS XML to 'Pandoc' document. module Text.Pandoc.Readers.JATS ( readJATS ) where import Control.Monad.State.Strict +import Control.Monad.Except (throwError) +import Text.Pandoc.Error (PandocError(..)) import Data.Char (isDigit, isSpace, toUpper) import Data.Default import Data.Generics @@ -22,6 +24,7 @@ import qualified Data.Map as Map import Data.Maybe (maybeToList, fromMaybe) import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.Lazy as TL import Text.HTML.TagSoup.Entity (lookupEntity) import Text.Pandoc.Builder import Text.Pandoc.Class.PandocMonad (PandocMonad) @@ -29,6 +32,7 @@ import Text.Pandoc.Options import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces) import Text.TeXMath (readMathML, writeTeX) import Text.XML.Light +import Text.Pandoc.XMLParser (parseXMLContents) import qualified Data.Set as S (fromList, member) import Data.Set ((\\)) @@ -51,8 +55,9 @@ instance Default JATSState where readJATS :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readJATS _ inp = do - let tree = normalizeTree . parseXML - $ T.unpack $ crFilter inp + tree <- either (throwError . PandocXMLError "") + (return . normalizeTree) $ + parseXMLContents (TL.fromStrict $ crFilter inp) (bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree return $ Pandoc (jatsMeta st') (toList . mconcat $ bs) diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index 5b8996025..bdadc4dd9 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -19,14 +19,18 @@ import Data.Generics import Data.Maybe (fromMaybe) import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.Lazy as TL import Text.HTML.TagSoup.Entity (lookupEntity) import Text.Pandoc.Builder import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Options +import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.Readers.HTML (readHtml) import Text.Pandoc.Readers.Markdown (readMarkdown) import Text.Pandoc.Shared (crFilter, blocksToInlines') import Text.XML.Light +import Text.Pandoc.XMLParser (parseXMLContents) +import Control.Monad.Except (throwError) type OPML m = StateT OPMLState m @@ -49,8 +53,10 @@ instance Default OPMLState where readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readOPML opts inp = do (bs, st') <- runStateT - (mapM parseBlock $ normalizeTree $ - parseXML (T.unpack (crFilter inp))) def{ opmlOptions = opts } + (case parseXMLContents (TL.fromStrict (crFilter inp)) of + Left msg -> throwError $ PandocXMLError "" msg + Right ns -> mapM parseBlock $ normalizeTree ns) + def{ opmlOptions = opts } return $ setTitle (opmlDocTitle st') $ setAuthors (opmlDocAuthors st') $ diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs index 9943d3147..85308deb1 100644 --- a/src/Text/Pandoc/Readers/Odt.hs +++ b/src/Text/Pandoc/Readers/Odt.hs @@ -15,6 +15,7 @@ module Text.Pandoc.Readers.Odt ( readOdt ) where import Codec.Archive.Zip import qualified Text.XML.Light as XML +import Text.Pandoc.XMLParser (parseXMLElement) import qualified Data.ByteString.Lazy as B @@ -66,18 +67,18 @@ bytesToOdt bytes = case toArchiveOrFail bytes of -- archiveToOdt :: Archive -> Either PandocError (Pandoc, MediaBag) -archiveToOdt archive = either (Left. PandocParseError) Right $ do - let onFailure msg Nothing = Left msg +archiveToOdt archive = do + let onFailure msg Nothing = Left $ PandocParseError msg onFailure _ (Just x) = Right x contentEntry <- onFailure "Could not find content.xml" (findEntryByPath "content.xml" archive) stylesEntry <- onFailure "Could not find styles.xml" (findEntryByPath "styles.xml" archive) - contentElem <- onFailure "Could not find content element" - (entryToXmlElem contentEntry) - stylesElem <- onFailure "Could not find styles element" - (entryToXmlElem stylesEntry) - styles <- either (\_ -> Left "Could not read styles") Right + contentElem <- entryToXmlElem contentEntry + stylesElem <- entryToXmlElem stylesEntry + styles <- either + (\_ -> Left $ PandocParseError "Could not read styles") + Right (chooseMax (readStylesAt stylesElem ) (readStylesAt contentElem)) let filePathIsOdtMedia :: FilePath -> Bool filePathIsOdtMedia fp = @@ -85,10 +86,13 @@ archiveToOdt archive = either (Left. PandocParseError) Right $ do in (dir == "Pictures/") || (dir /= "./" && name == "content.xml") let media = filteredFilesFromArchive archive filePathIsOdtMedia let startState = readerState styles media - either (\_ -> Left "Could not convert opendocument") Right + either (\_ -> Left $ PandocParseError "Could not convert opendocument") Right (runConverter' read_body startState contentElem) -- -entryToXmlElem :: Entry -> Maybe XML.Element -entryToXmlElem = XML.parseXMLDoc . UTF8.toStringLazy . fromEntry +entryToXmlElem :: Entry -> Either PandocError XML.Element +entryToXmlElem entry = + case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of + Right x -> Right x + Left msg -> Left $ PandocXMLError (T.pack $ eRelativePath entry) msg |