aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Odt/Generic
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2021-02-14 22:29:21 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2021-02-16 16:55:20 -0800
commit967e7f5fb990b29de48b37be1db40fb149a8cf55 (patch)
treeb9f903a5f2af14f20e769903e80659b9bffd59ff /src/Text/Pandoc/Readers/Odt/Generic
parentb5b576184c3c1668aad0c904e186136b81a0dd54 (diff)
downloadpandoc-967e7f5fb990b29de48b37be1db40fb149a8cf55.tar.gz
Rename Text.Pandoc.XMLParser -> Text.Pandoc.XML.Light...
..and add new definitions isomorphic to xml-light's, but with Text instead of String. This allows us to keep most of the code in existing readers that use xml-light, but avoid lots of unnecessary allocation. We also add versions of the functions from xml-light's Text.XML.Light.Output and Text.XML.Light.Proc that operate on our modified XML types, and functions that convert xml-light types to our types (since some of our dependencies, like texmath, use xml-light). Update golden tests for docx and pptx. OOXML test: Use `showContent` instead of `ppContent` in `displayDiff`. Docx: Do a manual traversal to unwrap sdt and smartTag. This is faster, and needed to pass the tests. Benchmarks: A = prior to 8ca191604dcd13af27c11d2da225da646ebce6fc (Feb 8) B = as of 8ca191604dcd13af27c11d2da225da646ebce6fc (Feb 8) C = this commit | Reader | A | B | C | | ------- | ----- | ------ | ----- | | docbook | 18 ms | 12 ms | 10 ms | | opml | 65 ms | 62 ms | 35 ms | | jats | 15 ms | 11 ms | 9 ms | | docx | 72 ms | 69 ms | 44 ms | | odt | 78 ms | 41 ms | 28 ms | | epub | 64 ms | 61 ms | 56 ms | | fb2 | 14 ms | 5 ms | 4 ms |
Diffstat (limited to 'src/Text/Pandoc/Readers/Odt/Generic')
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs3
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/Utils.hs33
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs23
3 files changed, 22 insertions, 37 deletions
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
index 77174c793..78a7fc0b2 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
@@ -14,9 +14,10 @@ typesafe Haskell namespace identifiers and unsafe "real world" namespaces.
module Text.Pandoc.Readers.Odt.Generic.Namespaces where
import qualified Data.Map as M
+import Data.Text (Text)
--
-type NameSpaceIRI = String
+type NameSpaceIRI = Text
--
type NameSpaceIRIs nsID = M.Map nsID NameSpaceIRI
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
index 6dc56a0d9..edefe3c70 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
@@ -20,7 +20,6 @@ module Text.Pandoc.Readers.Odt.Generic.Utils
, reverseComposition
, tryToRead
, Lookupable(..)
-, readLookupables
, readLookupable
, readPercent
, findBy
@@ -30,11 +29,11 @@ module Text.Pandoc.Readers.Odt.Generic.Utils
import Control.Category (Category, (<<<), (>>>))
import qualified Control.Category as Cat (id)
-import Control.Monad (msum)
-
+import Data.Char (isSpace)
import qualified Data.Foldable as F (Foldable, foldr)
import Data.Maybe
-
+import Data.Text (Text)
+import qualified Data.Text as T
-- | Equivalent to
-- > foldr (.) id
@@ -76,8 +75,8 @@ swing = flip.(.flip id)
-- (nobody wants that) while the latter returns "to much" for simple purposes.
-- This function instead applies 'reads' and returns the first match (if any)
-- in a 'Maybe'.
-tryToRead :: (Read r) => String -> Maybe r
-tryToRead = reads >>> listToMaybe >>> fmap fst
+tryToRead :: (Read r) => Text -> Maybe r
+tryToRead = (reads . T.unpack) >>> listToMaybe >>> fmap fst
-- | A version of 'reads' that requires a '%' sign after the number
readPercent :: ReadS Int
@@ -88,26 +87,12 @@ readPercent s = [ (i,s') | (i , r ) <- reads s
-- | Data that can be looked up.
-- This is mostly a utility to read data with kind *.
class Lookupable a where
- lookupTable :: [(String, a)]
-
--- | The idea is to use this function as if there was a declaration like
---
--- > instance (Lookupable a) => (Read a) where
--- > readsPrec _ = readLookupables
--- .
--- But including this code in this form would need UndecideableInstances.
--- That is a bad idea. Luckily 'readLookupable' (without the s at the end)
--- can be used directly in almost any case.
-readLookupables :: (Lookupable a) => String -> [(a,String)]
-readLookupables s = [ (a,rest) | (word,rest) <- lex s,
- a <- maybeToList (lookup word lookupTable)
- ]
+ lookupTable :: [(Text, a)]
-- | Very similar to a simple 'lookup' in the 'lookupTable', but with a lexer.
-readLookupable :: (Lookupable a) => String -> Maybe a
-readLookupable s = msum
- $ map ((`lookup` lookupTable).fst)
- $ lex s
+readLookupable :: (Lookupable a) => Text -> Maybe a
+readLookupable s =
+ lookup (T.takeWhile (not . isSpace) $ T.dropWhile isSpace s) lookupTable
uncurry3 :: (a->b->c -> z) -> (a,b,c ) -> z
uncurry4 :: (a->b->c->d -> z) -> (a,b,c,d ) -> z
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
index 00c636a0d..0d921e23b 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE LambdaCase #-}
@@ -60,11 +61,11 @@ import Control.Arrow
import Data.Bool ( bool )
import Data.Either ( rights )
import qualified Data.Map as M
-import qualified Data.Text as T
+import Data.Text (Text)
import Data.Default
import Data.Maybe
-import qualified Text.XML.Light as XML
+import qualified Text.Pandoc.XML.Light as XML
import Text.Pandoc.Readers.Odt.Arrows.State
import Text.Pandoc.Readers.Odt.Arrows.Utils
@@ -78,13 +79,13 @@ import Text.Pandoc.Readers.Odt.Generic.Fallible
--------------------------------------------------------------------------------
--
-type ElementName = String
-type AttributeName = String
-type AttributeValue = String
-type TextAttributeValue = T.Text
+type ElementName = Text
+type AttributeName = Text
+type AttributeValue = Text
+type TextAttributeValue = Text
--
-type NameSpacePrefix = String
+type NameSpacePrefix = Text
--
type NameSpacePrefixes nsID = M.Map nsID NameSpacePrefix
@@ -461,7 +462,7 @@ lookupDefaultingAttr :: (NameSpaceID nsID, Lookupable a, Default a)
lookupDefaultingAttr nsID attrName
= lookupAttrWithDefault nsID attrName def
--- | Return value as a (Maybe String)
+-- | Return value as a (Maybe Text)
findAttr' :: (NameSpaceID nsID)
=> nsID -> AttributeName
-> XMLConverter nsID extraState x (Maybe AttributeValue)
@@ -477,7 +478,6 @@ findAttrText' nsID attrName
= qualifyName nsID attrName
&&& getCurrentElement
>>% XML.findAttr
- >>^ fmap T.pack
-- | Return value as string or fail
findAttr :: (NameSpaceID nsID)
@@ -492,7 +492,6 @@ findAttrText :: (NameSpaceID nsID)
-> FallibleXMLConverter nsID extraState x TextAttributeValue
findAttrText nsID attrName
= findAttr' nsID attrName
- >>^ fmap T.pack
>>> maybeToChoice
-- | Return value as string or return provided default value
@@ -511,7 +510,7 @@ findAttrTextWithDefault :: (NameSpaceID nsID)
-> XMLConverter nsID extraState x TextAttributeValue
findAttrTextWithDefault nsID attrName deflt
= findAttr' nsID attrName
- >>^ maybe deflt T.pack
+ >>^ fromMaybe deflt
-- | Read and return value or fail
readAttr :: (NameSpaceID nsID, Read attrValue)
@@ -748,7 +747,7 @@ matchContent lookups fallback
-- Internals
--------------------------------------------------------------------------------
-stringToBool' :: String -> Maybe Bool
+stringToBool' :: Text -> Maybe Bool
stringToBool' val | val `elem` trueValues = Just True
| val `elem` falseValues = Just False
| otherwise = Nothing