aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Odt
diff options
context:
space:
mode:
authorIgor Pashev <pashev.igor@gmail.com>2021-07-17 18:10:34 +0200
committerIgor Pashev <pashev.igor@gmail.com>2021-07-17 18:46:16 +0200
commit48459559a13a20083fc9b31eb523b8ea2bf0a63f (patch)
tree1c04e75709457403110a6f8c5c90099f22369de3 /src/Text/Pandoc/Readers/Odt
parent0c39509d9b6a58958228cebf5d643598e5c98950 (diff)
parent46099e79defe662e541b12548200caf29063c1c6 (diff)
downloadpandoc-48459559a13a20083fc9b31eb523b8ea2bf0a63f.tar.gz
Merge branch 'master' of https://github.com/jgm/pandoc
Diffstat (limited to 'src/Text/Pandoc/Readers/Odt')
-rw-r--r--src/Text/Pandoc/Readers/Odt/Arrows/State.hs4
-rw-r--r--src/Text/Pandoc/Readers/Odt/ContentReader.hs32
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs3
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/Utils.hs33
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs27
-rw-r--r--src/Text/Pandoc/Readers/Odt/Namespaces.hs11
-rw-r--r--src/Text/Pandoc/Readers/Odt/StyleReader.hs38
7 files changed, 72 insertions, 76 deletions
diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
index 93c6b5e79..96515bf56 100644
--- a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
+++ b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
@@ -22,7 +22,7 @@ module Text.Pandoc.Readers.Odt.Arrows.State where
import Control.Arrow
import qualified Control.Category as Cat
import Control.Monad
-
+import Data.List (foldl')
import Text.Pandoc.Readers.Odt.Arrows.Utils
import Text.Pandoc.Readers.Odt.Generic.Fallible
@@ -122,7 +122,7 @@ iterateS a = ArrowState $ \(s,f) -> foldr a' (s,mzero) f
iterateSL :: (Foldable f, MonadPlus m)
=> ArrowState s x y
-> ArrowState s (f x) (m y)
-iterateSL a = ArrowState $ \(s,f) -> foldl a' (s,mzero) f
+iterateSL a = ArrowState $ \(s,f) -> foldl' a' (s,mzero) f
where a' (s',m) x = second (mplus m.return) $ runArrowState a (s',x)
diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
index 43c44e7e9..5520d039f 100644
--- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE Arrows #-}
+{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveFoldable #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE PatternGuards #-}
@@ -29,14 +30,14 @@ import Control.Monad ((<=<))
import qualified Data.ByteString.Lazy as B
import Data.Foldable (fold)
-import Data.List (find, stripPrefix)
+import Data.List (find)
import qualified Data.Map as M
import qualified Data.Text as T
import Data.Maybe
-import Data.Semigroup (First(..), Option(..))
+import Data.Monoid (Alt (..))
import Text.TeXMath (readMathML, writeTeX)
-import qualified Text.XML.Light as XML
+import qualified Text.Pandoc.XML.Light as XML
import Text.Pandoc.Builder hiding (underline)
import Text.Pandoc.MediaBag (MediaBag, insertMedia)
@@ -505,13 +506,11 @@ type InlineMatcher = ElementMatcher Inlines
type BlockMatcher = ElementMatcher Blocks
-
-newtype FirstMatch a = FirstMatch (Option (First a))
- deriving (Foldable, Monoid, Semigroup)
+newtype FirstMatch a = FirstMatch (Alt Maybe a)
+ deriving (Foldable, Monoid, Semigroup)
firstMatch :: a -> FirstMatch a
-firstMatch = FirstMatch . Option . Just . First
-
+firstMatch = FirstMatch . Alt . Just
--
matchingElement :: (Monoid e)
@@ -557,7 +556,7 @@ read_plain_text = fst ^&&& read_plain_text' >>% recover
>>?% mappend
--
extractText :: XML.Content -> Fallible T.Text
- extractText (XML.Text cData) = succeedWith (T.pack $ XML.cdData cData)
+ extractText (XML.Text cData) = succeedWith (XML.cdData cData)
extractText _ = failEmpty
read_text_seq :: InlineMatcher
@@ -577,7 +576,10 @@ read_spaces = matchingElement NsText "s" (
read_line_break :: InlineMatcher
read_line_break = matchingElement NsText "line-break"
$ returnV linebreak
-
+--
+read_tab :: InlineMatcher
+read_tab = matchingElement NsText "tab"
+ $ returnV space
--
read_span :: InlineMatcher
read_span = matchingElement NsText "span"
@@ -585,6 +587,7 @@ read_span = matchingElement NsText "span"
$ matchChildContent [ read_span
, read_spaces
, read_line_break
+ , read_tab
, read_link
, read_note
, read_citation
@@ -604,6 +607,7 @@ read_paragraph = matchingElement NsText "p"
$ matchChildContent [ read_span
, read_spaces
, read_line_break
+ , read_tab
, read_link
, read_note
, read_citation
@@ -630,6 +634,7 @@ read_header = matchingElement NsText "h"
children <- ( matchChildContent [ read_span
, read_spaces
, read_line_break
+ , read_tab
, read_link
, read_note
, read_citation
@@ -777,14 +782,14 @@ read_frame_img =
"" -> returnV mempty -< ()
src' -> do
let exts = extensionsFromList [Ext_auto_identifiers]
- resource <- lookupResource -< src'
+ resource <- lookupResource -< T.unpack src'
_ <- updateMediaWithResource -< resource
w <- findAttrText' NsSVG "width" -< ()
h <- findAttrText' NsSVG "height" -< ()
titleNodes <- matchChildContent' [ read_frame_title ] -< ()
alt <- matchChildContent [] read_plain_text -< ()
arr (firstMatch . uncurry4 imageWith) -<
- (image_attributes w h, T.pack src', inlineListToIdentifier exts (toList titleNodes), alt)
+ (image_attributes w h, src', inlineListToIdentifier exts (toList titleNodes), alt)
read_frame_title :: InlineMatcher
read_frame_title = matchingElement NsSVG "title" (matchChildContent [] read_plain_text)
@@ -804,7 +809,8 @@ read_frame_mathml =
case fold src of
"" -> returnV mempty -< ()
src' -> do
- let path = fromMaybe src' (stripPrefix "./" src') ++ "/content.xml"
+ let path = T.unpack $
+ fromMaybe src' (T.stripPrefix "./" src') <> "/content.xml"
(_, mathml) <- lookupResource -< path
case readMathML (UTF8.toText $ B.toStrict mathml) of
Left _ -> returnV mempty -< ()
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
index 77174c793..78a7fc0b2 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
@@ -14,9 +14,10 @@ typesafe Haskell namespace identifiers and unsafe "real world" namespaces.
module Text.Pandoc.Readers.Odt.Generic.Namespaces where
import qualified Data.Map as M
+import Data.Text (Text)
--
-type NameSpaceIRI = String
+type NameSpaceIRI = Text
--
type NameSpaceIRIs nsID = M.Map nsID NameSpaceIRI
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
index 6dc56a0d9..edefe3c70 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
@@ -20,7 +20,6 @@ module Text.Pandoc.Readers.Odt.Generic.Utils
, reverseComposition
, tryToRead
, Lookupable(..)
-, readLookupables
, readLookupable
, readPercent
, findBy
@@ -30,11 +29,11 @@ module Text.Pandoc.Readers.Odt.Generic.Utils
import Control.Category (Category, (<<<), (>>>))
import qualified Control.Category as Cat (id)
-import Control.Monad (msum)
-
+import Data.Char (isSpace)
import qualified Data.Foldable as F (Foldable, foldr)
import Data.Maybe
-
+import Data.Text (Text)
+import qualified Data.Text as T
-- | Equivalent to
-- > foldr (.) id
@@ -76,8 +75,8 @@ swing = flip.(.flip id)
-- (nobody wants that) while the latter returns "to much" for simple purposes.
-- This function instead applies 'reads' and returns the first match (if any)
-- in a 'Maybe'.
-tryToRead :: (Read r) => String -> Maybe r
-tryToRead = reads >>> listToMaybe >>> fmap fst
+tryToRead :: (Read r) => Text -> Maybe r
+tryToRead = (reads . T.unpack) >>> listToMaybe >>> fmap fst
-- | A version of 'reads' that requires a '%' sign after the number
readPercent :: ReadS Int
@@ -88,26 +87,12 @@ readPercent s = [ (i,s') | (i , r ) <- reads s
-- | Data that can be looked up.
-- This is mostly a utility to read data with kind *.
class Lookupable a where
- lookupTable :: [(String, a)]
-
--- | The idea is to use this function as if there was a declaration like
---
--- > instance (Lookupable a) => (Read a) where
--- > readsPrec _ = readLookupables
--- .
--- But including this code in this form would need UndecideableInstances.
--- That is a bad idea. Luckily 'readLookupable' (without the s at the end)
--- can be used directly in almost any case.
-readLookupables :: (Lookupable a) => String -> [(a,String)]
-readLookupables s = [ (a,rest) | (word,rest) <- lex s,
- a <- maybeToList (lookup word lookupTable)
- ]
+ lookupTable :: [(Text, a)]
-- | Very similar to a simple 'lookup' in the 'lookupTable', but with a lexer.
-readLookupable :: (Lookupable a) => String -> Maybe a
-readLookupable s = msum
- $ map ((`lookup` lookupTable).fst)
- $ lex s
+readLookupable :: (Lookupable a) => Text -> Maybe a
+readLookupable s =
+ lookup (T.takeWhile (not . isSpace) $ T.dropWhile isSpace s) lookupTable
uncurry3 :: (a->b->c -> z) -> (a,b,c ) -> z
uncurry4 :: (a->b->c->d -> z) -> (a,b,c,d ) -> z
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
index 00c636a0d..341903046 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE LambdaCase #-}
@@ -60,15 +61,15 @@ import Control.Arrow
import Data.Bool ( bool )
import Data.Either ( rights )
import qualified Data.Map as M
-import qualified Data.Text as T
+import Data.Text (Text)
import Data.Default
import Data.Maybe
+import Data.List (foldl')
-import qualified Text.XML.Light as XML
+import qualified Text.Pandoc.XML.Light as XML
import Text.Pandoc.Readers.Odt.Arrows.State
import Text.Pandoc.Readers.Odt.Arrows.Utils
-
import Text.Pandoc.Readers.Odt.Generic.Namespaces
import Text.Pandoc.Readers.Odt.Generic.Utils
import Text.Pandoc.Readers.Odt.Generic.Fallible
@@ -78,13 +79,13 @@ import Text.Pandoc.Readers.Odt.Generic.Fallible
--------------------------------------------------------------------------------
--
-type ElementName = String
-type AttributeName = String
-type AttributeValue = String
-type TextAttributeValue = T.Text
+type ElementName = Text
+type AttributeName = Text
+type AttributeValue = Text
+type TextAttributeValue = Text
--
-type NameSpacePrefix = String
+type NameSpacePrefix = Text
--
type NameSpacePrefixes nsID = M.Map nsID NameSpacePrefix
@@ -292,7 +293,7 @@ readNSattributes = fromState $ \state -> maybe (state, failEmpty )
=> XMLConverterState nsID extraState
-> Maybe (XMLConverterState nsID extraState)
extractNSAttrs startState
- = foldl (\state d -> state >>= addNS d)
+ = foldl' (\state d -> state >>= addNS d)
(Just startState)
nsAttribs
where nsAttribs = mapMaybe readNSattr (XML.elAttribs element)
@@ -461,7 +462,7 @@ lookupDefaultingAttr :: (NameSpaceID nsID, Lookupable a, Default a)
lookupDefaultingAttr nsID attrName
= lookupAttrWithDefault nsID attrName def
--- | Return value as a (Maybe String)
+-- | Return value as a (Maybe Text)
findAttr' :: (NameSpaceID nsID)
=> nsID -> AttributeName
-> XMLConverter nsID extraState x (Maybe AttributeValue)
@@ -477,7 +478,6 @@ findAttrText' nsID attrName
= qualifyName nsID attrName
&&& getCurrentElement
>>% XML.findAttr
- >>^ fmap T.pack
-- | Return value as string or fail
findAttr :: (NameSpaceID nsID)
@@ -492,7 +492,6 @@ findAttrText :: (NameSpaceID nsID)
-> FallibleXMLConverter nsID extraState x TextAttributeValue
findAttrText nsID attrName
= findAttr' nsID attrName
- >>^ fmap T.pack
>>> maybeToChoice
-- | Return value as string or return provided default value
@@ -511,7 +510,7 @@ findAttrTextWithDefault :: (NameSpaceID nsID)
-> XMLConverter nsID extraState x TextAttributeValue
findAttrTextWithDefault nsID attrName deflt
= findAttr' nsID attrName
- >>^ maybe deflt T.pack
+ >>^ fromMaybe deflt
-- | Read and return value or fail
readAttr :: (NameSpaceID nsID, Read attrValue)
@@ -748,7 +747,7 @@ matchContent lookups fallback
-- Internals
--------------------------------------------------------------------------------
-stringToBool' :: String -> Maybe Bool
+stringToBool' :: Text -> Maybe Bool
stringToBool' val | val `elem` trueValues = Just True
| val `elem` falseValues = Just False
| otherwise = Nothing
diff --git a/src/Text/Pandoc/Readers/Odt/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Namespaces.hs
index 3a24a1162..70741c28d 100644
--- a/src/Text/Pandoc/Readers/Odt/Namespaces.hs
+++ b/src/Text/Pandoc/Readers/Odt/Namespaces.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Reader.Odt.Namespaces
Copyright : Copyright (C) 2015 Martin Linnemann
@@ -13,10 +14,10 @@ Namespaces used in odt files.
module Text.Pandoc.Readers.Odt.Namespaces ( Namespace (..)
) where
-import Data.List (isPrefixOf)
import qualified Data.Map as M (empty, insert)
import Data.Maybe (fromMaybe, listToMaybe)
-
+import Data.Text (Text)
+import qualified Data.Text as T
import Text.Pandoc.Readers.Odt.Generic.Namespaces
@@ -30,7 +31,7 @@ instance NameSpaceID Namespace where
findID :: NameSpaceIRI -> Maybe Namespace
-findID iri = listToMaybe [nsID | (iri',nsID) <- nsIDs, iri' `isPrefixOf` iri]
+findID iri = listToMaybe [nsID | (iri',nsID) <- nsIDs, iri' `T.isPrefixOf` iri]
nsIDmap :: NameSpaceIRIs Namespace
nsIDmap = foldr (uncurry $ flip M.insert) M.empty nsIDs
@@ -54,12 +55,12 @@ data Namespace = -- Open Document core
-- Core XML (basically only for the 'id'-attribute)
| NsXML
-- Fallback
- | NsOther String
+ | NsOther Text
deriving ( Eq, Ord, Show )
-- | Not the actual iri's, but large prefixes of them - this way there are
-- less versioning problems and the like.
-nsIDs :: [(String,Namespace)]
+nsIDs :: [(Text, Namespace)]
nsIDs = [
("urn:oasis:names:tc:opendocument:xmlns:animation" , NsAnim ),
("urn:oasis:names:tc:opendocument:xmlns:chart" , NsChart ),
diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
index 46a777df1..ca791ad1e 100644
--- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
@@ -2,6 +2,7 @@
{-# LANGUAGE Arrows #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Odt.StyleReader
Copyright : Copyright (C) 2015 Martin Linnemann
@@ -43,14 +44,16 @@ import Control.Arrow
import Data.Default
import qualified Data.Foldable as F
-import Data.List (unfoldr)
+import Data.List (unfoldr, foldl')
import qualified Data.Map as M
import Data.Maybe
+import Data.Text (Text)
+import qualified Data.Text as T
import qualified Data.Set as S
-import qualified Text.XML.Light as XML
+import qualified Text.Pandoc.XML.Light as XML
-import Text.Pandoc.Shared (safeRead)
+import Text.Pandoc.Shared (safeRead, tshow)
import Text.Pandoc.Readers.Odt.Arrows.Utils
@@ -90,7 +93,7 @@ instance Default FontPitch where
--
-- Thus, we want
-type FontFaceName = String
+type FontFaceName = Text
type FontPitches = M.Map FontFaceName FontPitch
@@ -117,7 +120,7 @@ fontPitchReader = executeInSub NsOffice "font-face-decls" (
&&&
lookupDefaultingAttr NsStyle "font-pitch"
))
- >>?^ ( M.fromList . foldl accumLegalPitches [] )
+ >>?^ ( M.fromList . foldl' accumLegalPitches [] )
) `ifFailedDo` returnV (Right M.empty)
where accumLegalPitches ls (Nothing,_) = ls
accumLegalPitches ls (Just n,p) = (n,p):ls
@@ -151,7 +154,7 @@ findPitch = ( lookupAttr NsStyle "font-pitch"
-- Definitions of main data
--------------------------------------------------------------------------------
-type StyleName = String
+type StyleName = Text
-- | There are two types of styles: named styles with a style family and an
-- optional style parent, and default styles for each style family,
@@ -355,8 +358,8 @@ getListLevelStyle level ListStyle{..} =
-- \^ simpler, but in general less efficient
data ListLevelStyle = ListLevelStyle { listLevelType :: ListLevelType
- , listItemPrefix :: Maybe String
- , listItemSuffix :: Maybe String
+ , listItemPrefix :: Maybe Text
+ , listItemSuffix :: Maybe Text
, listItemFormat :: ListItemNumberFormat
, listItemStart :: Int
}
@@ -366,9 +369,9 @@ instance Show ListLevelStyle where
show ListLevelStyle{..} = "<LLS|"
++ show listLevelType
++ "|"
- ++ maybeToString listItemPrefix
+ ++ maybeToString (T.unpack <$> listItemPrefix)
++ show listItemFormat
- ++ maybeToString listItemSuffix
+ ++ maybeToString (T.unpack <$> listItemSuffix)
++ ">"
where maybeToString = fromMaybe ""
@@ -471,7 +474,7 @@ readTextProperties =
)
where isFontEmphasised = [("normal",False),("italic",True),("oblique",True)]
isFontBold = ("normal",False):("bold",True)
- :map ((,True).show) ([100,200..900]::[Int])
+ :map ((,True) . tshow) ([100,200..900]::[Int])
readUnderlineMode :: StyleReaderSafe _x (Maybe UnderlineMode)
readUnderlineMode = readLineMode "text-underline-mode"
@@ -481,7 +484,7 @@ readStrikeThroughMode :: StyleReaderSafe _x (Maybe UnderlineMode)
readStrikeThroughMode = readLineMode "text-line-through-mode"
"text-line-through-style"
-readLineMode :: String -> String -> StyleReaderSafe _x (Maybe UnderlineMode)
+readLineMode :: Text -> Text -> StyleReaderSafe _x (Maybe UnderlineMode)
readLineMode modeAttr styleAttr = proc x -> do
isUL <- searchAttr NsStyle styleAttr False isLinePresent -< x
mode <- lookupAttr' NsStyle modeAttr -< x
@@ -560,12 +563,13 @@ readListLevelStyle levelType = readAttr NsText "level"
--
chooseMostSpecificListLevelStyle :: S.Set ListLevelStyle -> Maybe ListLevelStyle
-chooseMostSpecificListLevelStyle ls | ls == mempty = Nothing
- | otherwise = Just ( F.foldr1 select ls )
+chooseMostSpecificListLevelStyle ls = F.foldr select Nothing ls
where
- select ( ListLevelStyle t1 p1 s1 f1 b1 )
- ( ListLevelStyle t2 p2 s2 f2 _ )
- = ListLevelStyle (select' t1 t2) (p1 <|> p2) (s1 <|> s2) (selectLinf f1 f2) b1
+ select l Nothing = Just l
+ select ( ListLevelStyle t1 p1 s1 f1 b1 )
+ ( Just ( ListLevelStyle t2 p2 s2 f2 _ ))
+ = Just $ ListLevelStyle (select' t1 t2) (p1 <|> p2) (s1 <|> s2)
+ (selectLinf f1 f2) b1
select' LltNumbered _ = LltNumbered
select' _ LltNumbered = LltNumbered
select' _ _ = LltBullet