From 73add0578989e1da6e9cd1de68e2e4142f789188 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 17 Feb 2021 09:54:39 -0800 Subject: Docx reader: use Map instead of list for Namespaces. This gives a speedup of about 5-10%. The reader is now approximately twice as fast as in the last release. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 14 +++++++------- src/Text/Pandoc/Readers/Docx/Util.hs | 26 +++++++++++++------------- 2 files changed, 20 insertions(+), 20 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index c76f3c171..f8ed248d7 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -396,9 +396,9 @@ archiveToNotes zf = >>= parseXMLFromEntry enElem = findEntryByPath "word/endnotes.xml" zf >>= parseXMLFromEntry - fn_namespaces = maybe [] elemToNameSpaces fnElem - en_namespaces = maybe [] elemToNameSpaces enElem - ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces + fn_namespaces = maybe mempty elemToNameSpaces fnElem + en_namespaces = maybe mempty elemToNameSpaces enElem + ns = M.union fn_namespaces en_namespaces fn = fnElem >>= elemToNotes ns "footnote" . walkDocument ns en = enElem >>= elemToNotes ns "endnote" . walkDocument ns in @@ -408,7 +408,7 @@ archiveToComments :: Archive -> Comments archiveToComments zf = let cmtsElem = findEntryByPath "word/comments.xml" zf >>= parseXMLFromEntry - cmts_namespaces = maybe [] elemToNameSpaces cmtsElem + cmts_namespaces = maybe mempty elemToNameSpaces cmtsElem cmts = elemToComments cmts_namespaces . walkDocument cmts_namespaces <$> cmtsElem in @@ -518,7 +518,7 @@ levelElemToLevel _ _ = Nothing archiveToNumbering' :: Archive -> Maybe Numbering archiveToNumbering' zf = case findEntryByPath "word/numbering.xml" zf of - Nothing -> Just $ Numbering [] [] [] + Nothing -> Just $ Numbering mempty [] [] Just entry -> do numberingElem <- parseXMLFromEntry entry let namespaces = elemToNameSpaces numberingElem @@ -530,7 +530,7 @@ archiveToNumbering' zf = archiveToNumbering :: Archive -> Numbering archiveToNumbering archive = - fromMaybe (Numbering [] [] []) (archiveToNumbering' archive) + fromMaybe (Numbering mempty [] []) (archiveToNumbering' archive) elemToNotes :: NameSpaces -> Text -> Element -> Maybe (M.Map T.Text Element) elemToNotes ns notetype element @@ -875,7 +875,7 @@ childElemToRun ns element = let (title, alt) = getTitleAndAlt ns element a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem - >>= findAttr (QName "embed" (lookup "r" ns) (Just "r")) + >>= findAttr (QName "embed" (M.lookup "r" ns) (Just "r")) in case drawing of Just s -> expandDrawingId s >>= diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs index 21df03d9e..ac331cba6 100644 --- a/src/Text/Pandoc/Readers/Docx/Util.hs +++ b/src/Text/Pandoc/Readers/Docx/Util.hs @@ -22,42 +22,42 @@ module Text.Pandoc.Readers.Docx.Util ( , findAttrByName ) where -import Data.Maybe (mapMaybe) import qualified Data.Text as T import Data.Text (Text) import Text.Pandoc.XML.Light +import qualified Data.Map as M -type NameSpaces = [(Text, Text)] +type NameSpaces = M.Map Text Text elemToNameSpaces :: Element -> NameSpaces -elemToNameSpaces = mapMaybe attrToNSPair . elAttribs - -attrToNSPair :: Attr -> Maybe (Text, Text) -attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) -attrToNSPair _ = Nothing +elemToNameSpaces = foldr (\(Attr qn val) -> + case qn of + QName s _ (Just "xmlns") -> M.insert s val + _ -> id) mempty . elAttribs elemName :: NameSpaces -> Text -> Text -> QName elemName ns prefix name = - QName name (lookup prefix ns) (if T.null prefix then Nothing else Just prefix) + QName name (M.lookup prefix ns) + (if T.null prefix then Nothing else Just prefix) isElem :: NameSpaces -> Text -> Text -> Element -> Bool isElem ns prefix name element = - let ns' = ns ++ elemToNameSpaces element + let ns' = ns <> elemToNameSpaces element in qName (elName element) == name && - qURI (elName element) == lookup prefix ns' + qURI (elName element) == M.lookup prefix ns' findChildByName :: NameSpaces -> Text -> Text -> Element -> Maybe Element findChildByName ns pref name el = - let ns' = ns ++ elemToNameSpaces el + let ns' = ns <> elemToNameSpaces el in findChild (elemName ns' pref name) el findChildrenByName :: NameSpaces -> Text -> Text -> Element -> [Element] findChildrenByName ns pref name el = - let ns' = ns ++ elemToNameSpaces el + let ns' = ns <> elemToNameSpaces el in findChildren (elemName ns' pref name) el findAttrByName :: NameSpaces -> Text -> Text -> Element -> Maybe Text findAttrByName ns pref name el = - let ns' = ns ++ elemToNameSpaces el + let ns' = ns <> elemToNameSpaces el in findAttr (elemName ns' pref name) el -- cgit v1.2.3