aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2021-02-17 09:54:39 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2021-02-17 09:54:39 -0800
commit73add0578989e1da6e9cd1de68e2e4142f789188 (patch)
tree487b672abce9a7e9efb8b89d0d62030ab6b3588e /src/Text
parent80a1d5c9b60b676ba7b7e6ed0267197c8f0ec459 (diff)
downloadpandoc-73add0578989e1da6e9cd1de68e2e4142f789188.tar.gz
Docx reader: use Map instead of list for Namespaces.
This gives a speedup of about 5-10%. The reader is now approximately twice as fast as in the last release.
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs14
-rw-r--r--src/Text/Pandoc/Readers/Docx/Util.hs26
2 files changed, 20 insertions, 20 deletions
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index c76f3c171..f8ed248d7 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -396,9 +396,9 @@ archiveToNotes zf =
>>= parseXMLFromEntry
enElem = findEntryByPath "word/endnotes.xml" zf
>>= parseXMLFromEntry
- fn_namespaces = maybe [] elemToNameSpaces fnElem
- en_namespaces = maybe [] elemToNameSpaces enElem
- ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
+ fn_namespaces = maybe mempty elemToNameSpaces fnElem
+ en_namespaces = maybe mempty elemToNameSpaces enElem
+ ns = M.union fn_namespaces en_namespaces
fn = fnElem >>= elemToNotes ns "footnote" . walkDocument ns
en = enElem >>= elemToNotes ns "endnote" . walkDocument ns
in
@@ -408,7 +408,7 @@ archiveToComments :: Archive -> Comments
archiveToComments zf =
let cmtsElem = findEntryByPath "word/comments.xml" zf
>>= parseXMLFromEntry
- cmts_namespaces = maybe [] elemToNameSpaces cmtsElem
+ cmts_namespaces = maybe mempty elemToNameSpaces cmtsElem
cmts = elemToComments cmts_namespaces . walkDocument cmts_namespaces <$>
cmtsElem
in
@@ -518,7 +518,7 @@ levelElemToLevel _ _ = Nothing
archiveToNumbering' :: Archive -> Maybe Numbering
archiveToNumbering' zf =
case findEntryByPath "word/numbering.xml" zf of
- Nothing -> Just $ Numbering [] [] []
+ Nothing -> Just $ Numbering mempty [] []
Just entry -> do
numberingElem <- parseXMLFromEntry entry
let namespaces = elemToNameSpaces numberingElem
@@ -530,7 +530,7 @@ archiveToNumbering' zf =
archiveToNumbering :: Archive -> Numbering
archiveToNumbering archive =
- fromMaybe (Numbering [] [] []) (archiveToNumbering' archive)
+ fromMaybe (Numbering mempty [] []) (archiveToNumbering' archive)
elemToNotes :: NameSpaces -> Text -> Element -> Maybe (M.Map T.Text Element)
elemToNotes ns notetype element
@@ -875,7 +875,7 @@ childElemToRun ns element
= let (title, alt) = getTitleAndAlt ns element
a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
- >>= findAttr (QName "embed" (lookup "r" ns) (Just "r"))
+ >>= findAttr (QName "embed" (M.lookup "r" ns) (Just "r"))
in
case drawing of
Just s -> expandDrawingId s >>=
diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs
index 21df03d9e..ac331cba6 100644
--- a/src/Text/Pandoc/Readers/Docx/Util.hs
+++ b/src/Text/Pandoc/Readers/Docx/Util.hs
@@ -22,42 +22,42 @@ module Text.Pandoc.Readers.Docx.Util (
, findAttrByName
) where
-import Data.Maybe (mapMaybe)
import qualified Data.Text as T
import Data.Text (Text)
import Text.Pandoc.XML.Light
+import qualified Data.Map as M
-type NameSpaces = [(Text, Text)]
+type NameSpaces = M.Map Text Text
elemToNameSpaces :: Element -> NameSpaces
-elemToNameSpaces = mapMaybe attrToNSPair . elAttribs
-
-attrToNSPair :: Attr -> Maybe (Text, Text)
-attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
-attrToNSPair _ = Nothing
+elemToNameSpaces = foldr (\(Attr qn val) ->
+ case qn of
+ QName s _ (Just "xmlns") -> M.insert s val
+ _ -> id) mempty . elAttribs
elemName :: NameSpaces -> Text -> Text -> QName
elemName ns prefix name =
- QName name (lookup prefix ns) (if T.null prefix then Nothing else Just prefix)
+ QName name (M.lookup prefix ns)
+ (if T.null prefix then Nothing else Just prefix)
isElem :: NameSpaces -> Text -> Text -> Element -> Bool
isElem ns prefix name element =
- let ns' = ns ++ elemToNameSpaces element
+ let ns' = ns <> elemToNameSpaces element
in qName (elName element) == name &&
- qURI (elName element) == lookup prefix ns'
+ qURI (elName element) == M.lookup prefix ns'
findChildByName :: NameSpaces -> Text -> Text -> Element -> Maybe Element
findChildByName ns pref name el =
- let ns' = ns ++ elemToNameSpaces el
+ let ns' = ns <> elemToNameSpaces el
in findChild (elemName ns' pref name) el
findChildrenByName :: NameSpaces -> Text -> Text -> Element -> [Element]
findChildrenByName ns pref name el =
- let ns' = ns ++ elemToNameSpaces el
+ let ns' = ns <> elemToNameSpaces el
in findChildren (elemName ns' pref name) el
findAttrByName :: NameSpaces -> Text -> Text -> Element -> Maybe Text
findAttrByName ns pref name el =
- let ns' = ns ++ elemToNameSpaces el
+ let ns' = ns <> elemToNameSpaces el
in findAttr (elemName ns' pref name) el