From 967e7f5fb990b29de48b37be1db40fb149a8cf55 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 14 Feb 2021 22:29:21 -0800 Subject: Rename Text.Pandoc.XMLParser -> Text.Pandoc.XML.Light... ..and add new definitions isomorphic to xml-light's, but with Text instead of String. This allows us to keep most of the code in existing readers that use xml-light, but avoid lots of unnecessary allocation. We also add versions of the functions from xml-light's Text.XML.Light.Output and Text.XML.Light.Proc that operate on our modified XML types, and functions that convert xml-light types to our types (since some of our dependencies, like texmath, use xml-light). Update golden tests for docx and pptx. OOXML test: Use `showContent` instead of `ppContent` in `displayDiff`. Docx: Do a manual traversal to unwrap sdt and smartTag. This is faster, and needed to pass the tests. Benchmarks: A = prior to 8ca191604dcd13af27c11d2da225da646ebce6fc (Feb 8) B = as of 8ca191604dcd13af27c11d2da225da646ebce6fc (Feb 8) C = this commit | Reader | A | B | C | | ------- | ----- | ------ | ----- | | docbook | 18 ms | 12 ms | 10 ms | | opml | 65 ms | 62 ms | 35 ms | | jats | 15 ms | 11 ms | 9 ms | | docx | 72 ms | 69 ms | 44 ms | | odt | 78 ms | 41 ms | 28 ms | | epub | 64 ms | 61 ms | 56 ms | | fb2 | 14 ms | 5 ms | 4 ms | --- src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs | 3 +- src/Text/Pandoc/Readers/Odt/Generic/Utils.hs | 33 ++++++---------------- .../Pandoc/Readers/Odt/Generic/XMLConverter.hs | 23 ++++++++------- 3 files changed, 22 insertions(+), 37 deletions(-) (limited to 'src/Text/Pandoc/Readers/Odt/Generic') diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs index 77174c793..78a7fc0b2 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs @@ -14,9 +14,10 @@ typesafe Haskell namespace identifiers and unsafe "real world" namespaces. module Text.Pandoc.Readers.Odt.Generic.Namespaces where import qualified Data.Map as M +import Data.Text (Text) -- -type NameSpaceIRI = String +type NameSpaceIRI = Text -- type NameSpaceIRIs nsID = M.Map nsID NameSpaceIRI diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs index 6dc56a0d9..edefe3c70 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs @@ -20,7 +20,6 @@ module Text.Pandoc.Readers.Odt.Generic.Utils , reverseComposition , tryToRead , Lookupable(..) -, readLookupables , readLookupable , readPercent , findBy @@ -30,11 +29,11 @@ module Text.Pandoc.Readers.Odt.Generic.Utils import Control.Category (Category, (<<<), (>>>)) import qualified Control.Category as Cat (id) -import Control.Monad (msum) - +import Data.Char (isSpace) import qualified Data.Foldable as F (Foldable, foldr) import Data.Maybe - +import Data.Text (Text) +import qualified Data.Text as T -- | Equivalent to -- > foldr (.) id @@ -76,8 +75,8 @@ swing = flip.(.flip id) -- (nobody wants that) while the latter returns "to much" for simple purposes. -- This function instead applies 'reads' and returns the first match (if any) -- in a 'Maybe'. -tryToRead :: (Read r) => String -> Maybe r -tryToRead = reads >>> listToMaybe >>> fmap fst +tryToRead :: (Read r) => Text -> Maybe r +tryToRead = (reads . T.unpack) >>> listToMaybe >>> fmap fst -- | A version of 'reads' that requires a '%' sign after the number readPercent :: ReadS Int @@ -88,26 +87,12 @@ readPercent s = [ (i,s') | (i , r ) <- reads s -- | Data that can be looked up. -- This is mostly a utility to read data with kind *. class Lookupable a where - lookupTable :: [(String, a)] - --- | The idea is to use this function as if there was a declaration like --- --- > instance (Lookupable a) => (Read a) where --- > readsPrec _ = readLookupables --- . --- But including this code in this form would need UndecideableInstances. --- That is a bad idea. Luckily 'readLookupable' (without the s at the end) --- can be used directly in almost any case. -readLookupables :: (Lookupable a) => String -> [(a,String)] -readLookupables s = [ (a,rest) | (word,rest) <- lex s, - a <- maybeToList (lookup word lookupTable) - ] + lookupTable :: [(Text, a)] -- | Very similar to a simple 'lookup' in the 'lookupTable', but with a lexer. -readLookupable :: (Lookupable a) => String -> Maybe a -readLookupable s = msum - $ map ((`lookup` lookupTable).fst) - $ lex s +readLookupable :: (Lookupable a) => Text -> Maybe a +readLookupable s = + lookup (T.takeWhile (not . isSpace) $ T.dropWhile isSpace s) lookupTable uncurry3 :: (a->b->c -> z) -> (a,b,c ) -> z uncurry4 :: (a->b->c->d -> z) -> (a,b,c,d ) -> z diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs index 00c636a0d..0d921e23b 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE LambdaCase #-} @@ -60,11 +61,11 @@ import Control.Arrow import Data.Bool ( bool ) import Data.Either ( rights ) import qualified Data.Map as M -import qualified Data.Text as T +import Data.Text (Text) import Data.Default import Data.Maybe -import qualified Text.XML.Light as XML +import qualified Text.Pandoc.XML.Light as XML import Text.Pandoc.Readers.Odt.Arrows.State import Text.Pandoc.Readers.Odt.Arrows.Utils @@ -78,13 +79,13 @@ import Text.Pandoc.Readers.Odt.Generic.Fallible -------------------------------------------------------------------------------- -- -type ElementName = String -type AttributeName = String -type AttributeValue = String -type TextAttributeValue = T.Text +type ElementName = Text +type AttributeName = Text +type AttributeValue = Text +type TextAttributeValue = Text -- -type NameSpacePrefix = String +type NameSpacePrefix = Text -- type NameSpacePrefixes nsID = M.Map nsID NameSpacePrefix @@ -461,7 +462,7 @@ lookupDefaultingAttr :: (NameSpaceID nsID, Lookupable a, Default a) lookupDefaultingAttr nsID attrName = lookupAttrWithDefault nsID attrName def --- | Return value as a (Maybe String) +-- | Return value as a (Maybe Text) findAttr' :: (NameSpaceID nsID) => nsID -> AttributeName -> XMLConverter nsID extraState x (Maybe AttributeValue) @@ -477,7 +478,6 @@ findAttrText' nsID attrName = qualifyName nsID attrName &&& getCurrentElement >>% XML.findAttr - >>^ fmap T.pack -- | Return value as string or fail findAttr :: (NameSpaceID nsID) @@ -492,7 +492,6 @@ findAttrText :: (NameSpaceID nsID) -> FallibleXMLConverter nsID extraState x TextAttributeValue findAttrText nsID attrName = findAttr' nsID attrName - >>^ fmap T.pack >>> maybeToChoice -- | Return value as string or return provided default value @@ -511,7 +510,7 @@ findAttrTextWithDefault :: (NameSpaceID nsID) -> XMLConverter nsID extraState x TextAttributeValue findAttrTextWithDefault nsID attrName deflt = findAttr' nsID attrName - >>^ maybe deflt T.pack + >>^ fromMaybe deflt -- | Read and return value or fail readAttr :: (NameSpaceID nsID, Read attrValue) @@ -748,7 +747,7 @@ matchContent lookups fallback -- Internals -------------------------------------------------------------------------------- -stringToBool' :: String -> Maybe Bool +stringToBool' :: Text -> Maybe Bool stringToBool' val | val `elem` trueValues = Just True | val `elem` falseValues = Just False | otherwise = Nothing -- cgit v1.2.3