Rename Text.Pandoc.XMLParser -> Text.Pandoc.XML.Light...

..and add new definitions isomorphic to xml-light's, but with Text instead of String. This allows us to keep most of the code in existing readers that use xml-light, but avoid lots of unnecessary allocation. We also add versions of the functions from xml-light's Text.XML.Light.Output and Text.XML.Light.Proc that operate on our modified XML types, and functions that convert xml-light types to our types (since some of our dependencies, like texmath, use xml-light). Update golden tests for docx and pptx. OOXML test: Use `showContent` instead of `ppContent` in `displayDiff`. Docx: Do a manual traversal to unwrap sdt and smartTag. This is faster, and needed to pass the tests. Benchmarks: A = prior to 8ca191604dcd13af27c11d2da225da646ebce6fc (Feb 8) B = as of 8ca191604dcd13af27c11d2da225da646ebce6fc (Feb 8) C = this commit | Reader | A | B | C | | ------- | ----- | ------ | ----- | | docbook | 18 ms | 12 ms | 10 ms | | opml | 65 ms | 62 ms | 35 ms | | jats | 15 ms | 11 ms | 9 ms | | docx | 72 ms | 69 ms | 44 ms | | odt | 78 ms | 41 ms | 28 ms | | epub | 64 ms | 61 ms | 56 ms | | fb2 | 14 ms | 5 ms | 4 ms |
author: John MacFarlane <jgm@berkeley.edu> 2021-02-14 22:29:21 -0800
committer: John MacFarlane <jgm@berkeley.edu> 2021-02-16 16:55:20 -0800
commit: 967e7f5fb990b29de48b37be1db40fb149a8cf55 (patch)
tree: b9f903a5f2af14f20e769903e80659b9bffd59ff /src/Text/Pandoc/Readers/Odt/Generic
parent: b5b576184c3c1668aad0c904e186136b81a0dd54 (diff)
download: pandoc-967e7f5fb990b29de48b37be1db40fb149a8cf55.tar.gz
3 files changed, 22 insertions, 37 deletions
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
index 77174c793..78a7fc0b2 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
@@ -14,9 +14,10 @@ typesafe Haskell namespace identifiers and unsafe "real world" namespaces.
 module Text.Pandoc.Readers.Odt.Generic.Namespaces where
 
 import qualified Data.Map as M
+import Data.Text (Text)
 
 --
-type NameSpaceIRI          = String
+type NameSpaceIRI          = Text
 
 --
 type NameSpaceIRIs     nsID = M.Map nsID NameSpaceIRI
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
index 6dc56a0d9..edefe3c70 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
@@ -20,7 +20,6 @@ module Text.Pandoc.Readers.Odt.Generic.Utils
 , reverseComposition
 , tryToRead
 , Lookupable(..)
-, readLookupables
 , readLookupable
 , readPercent
 , findBy
@@ -30,11 +29,11 @@ module Text.Pandoc.Readers.Odt.Generic.Utils
 
 import Control.Category (Category, (<<<), (>>>))
 import qualified Control.Category as Cat (id)
-import Control.Monad (msum)
-
+import Data.Char (isSpace)
 import qualified Data.Foldable as F (Foldable, foldr)
 import Data.Maybe
-
+import Data.Text (Text)
+import qualified Data.Text as T
 
 -- | Equivalent to
 -- > foldr (.) id
@@ -76,8 +75,8 @@ swing = flip.(.flip id)
 -- (nobody wants that) while the latter returns "to much" for simple purposes.
 -- This function instead applies 'reads' and returns the first match (if any)
 -- in a 'Maybe'.
-tryToRead :: (Read r) => String -> Maybe r
-tryToRead = reads >>> listToMaybe >>> fmap fst
+tryToRead :: (Read r) => Text -> Maybe r
+tryToRead = (reads . T.unpack) >>> listToMaybe >>> fmap fst
 
 -- | A version of 'reads' that requires a '%' sign after the number
 readPercent :: ReadS Int
@@ -88,26 +87,12 @@ readPercent s = [ (i,s') | (i   , r ) <- reads s
 -- | Data that can be looked up.
 -- This is mostly a utility to read data with kind *.
 class Lookupable a where
-  lookupTable :: [(String, a)]
-
--- | The idea is to use this function as if there was a declaration like
---
--- > instance (Lookupable a) => (Read a) where
--- >   readsPrec _ = readLookupables
--- .
--- But including this code in this form would need UndecideableInstances.
--- That is a bad idea. Luckily 'readLookupable' (without the s at the end)
--- can be used directly in almost any case.
-readLookupables :: (Lookupable a) => String -> [(a,String)]
-readLookupables s = [ (a,rest) | (word,rest) <- lex s,
-                                 a <- maybeToList (lookup word lookupTable)
-                    ]
+  lookupTable :: [(Text, a)]
 
 -- | Very similar to a simple 'lookup' in the 'lookupTable', but with a lexer.
-readLookupable :: (Lookupable a) => String -> Maybe a
-readLookupable s = msum
-                 $ map ((`lookup` lookupTable).fst)
-                 $ lex s
+readLookupable :: (Lookupable a) => Text -> Maybe a
+readLookupable s =
+  lookup (T.takeWhile (not . isSpace) $ T.dropWhile isSpace s) lookupTable
 
 uncurry3 :: (a->b->c                -> z) -> (a,b,c          ) -> z
 uncurry4 :: (a->b->c->d             -> z) -> (a,b,c,d        ) -> z
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
index 00c636a0d..0d921e23b 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE TupleSections   #-}
 {-# LANGUAGE GADTs           #-}
 {-# LANGUAGE LambdaCase      #-}
@@ -60,11 +61,11 @@ import           Control.Arrow
 import           Data.Bool ( bool )
 import           Data.Either ( rights )
 import qualified Data.Map             as M
-import qualified Data.Text            as T
+import           Data.Text (Text)
 import           Data.Default
 import           Data.Maybe
 
-import qualified Text.XML.Light       as XML
+import qualified Text.Pandoc.XML.Light as XML
 
 import           Text.Pandoc.Readers.Odt.Arrows.State
 import           Text.Pandoc.Readers.Odt.Arrows.Utils
@@ -78,13 +79,13 @@ import           Text.Pandoc.Readers.Odt.Generic.Fallible
 --------------------------------------------------------------------------------
 
 --
-type ElementName           = String
-type AttributeName         = String
-type AttributeValue        = String
-type TextAttributeValue    = T.Text
+type ElementName           = Text
+type AttributeName         = Text
+type AttributeValue        = Text
+type TextAttributeValue    = Text
 
 --
-type NameSpacePrefix       = String
+type NameSpacePrefix       = Text
 
 --
 type NameSpacePrefixes nsID = M.Map nsID NameSpacePrefix
@@ -461,7 +462,7 @@ lookupDefaultingAttr     :: (NameSpaceID nsID, Lookupable a, Default a)
 lookupDefaultingAttr nsID attrName
                          = lookupAttrWithDefault nsID attrName def
 
--- | Return value as a (Maybe String)
+-- | Return value as a (Maybe Text)
 findAttr'               :: (NameSpaceID nsID)
                         => nsID -> AttributeName
                         -> XMLConverter nsID extraState x (Maybe AttributeValue)
@@ -477,7 +478,6 @@ findAttrText' nsID attrName
                         =         qualifyName nsID attrName
                               &&& getCurrentElement
                           >>% XML.findAttr
-                          >>^ fmap T.pack
 
 -- | Return value as string or fail
 findAttr               :: (NameSpaceID nsID)
@@ -492,7 +492,6 @@ findAttrText           :: (NameSpaceID nsID)
                        -> FallibleXMLConverter nsID extraState x TextAttributeValue
 findAttrText nsID attrName
                        = findAttr' nsID attrName
-                         >>^ fmap T.pack
                          >>> maybeToChoice
 
 -- | Return value as string or return provided default value
@@ -511,7 +510,7 @@ findAttrTextWithDefault :: (NameSpaceID nsID)
                         -> XMLConverter nsID extraState x TextAttributeValue
 findAttrTextWithDefault nsID attrName deflt
                        = findAttr' nsID attrName
-                         >>^ maybe deflt T.pack
+                         >>^ fromMaybe deflt
 
 -- | Read and return value or fail
 readAttr               :: (NameSpaceID nsID, Read attrValue)
@@ -748,7 +747,7 @@ matchContent lookups fallback
 -- Internals
 --------------------------------------------------------------------------------
 
-stringToBool' :: String -> Maybe Bool
+stringToBool' :: Text -> Maybe Bool
 stringToBool' val | val `elem` trueValues  = Just True
                   | val `elem` falseValues = Just False
                   | otherwise              = Nothing
author	John MacFarlane <jgm@berkeley.edu>	2021-02-14 22:29:21 -0800
committer	John MacFarlane <jgm@berkeley.edu>	2021-02-16 16:55:20 -0800
commit	967e7f5fb990b29de48b37be1db40fb149a8cf55 (patch)
tree	b9f903a5f2af14f20e769903e80659b9bffd59ff /src/Text/Pandoc/Readers/Odt/Generic
parent	b5b576184c3c1668aad0c904e186136b81a0dd54 (diff)
download	pandoc-967e7f5fb990b29de48b37be1db40fb149a8cf55.tar.gz