aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Odt
diff options
context:
space:
mode:
authordespresc <christian.j.j.despres@gmail.com>2019-11-04 16:12:37 -0500
committerJohn MacFarlane <jgm@berkeley.edu>2019-11-12 16:03:45 -0800
commit90e436d49604e3fd1ef9432fb23f6d7f6245c7fd (patch)
tree4e7f0692f989643189f1fc6786050d95e239a0ea /src/Text/Pandoc/Readers/Odt
parentd3966372f5049eea56213b069fc4d70d8af9144c (diff)
downloadpandoc-90e436d49604e3fd1ef9432fb23f6d7f6245c7fd.tar.gz
Switch to new pandoc-types and use Text instead of String [API change].
PR #5884. + Use pandoc-types 1.20 and texmath 0.12. + Text is now used instead of String, with a few exceptions. + In the MediaBag module, some of the types using Strings were switched to use FilePath instead (not Text). + In the Parsing module, new parsers `manyChar`, `many1Char`, `manyTillChar`, `many1TillChar`, `many1Till`, `manyUntil`, `mantyUntilChar` have been added: these are like their unsuffixed counterparts but pack some or all of their output. + `glob` in Text.Pandoc.Class still takes String since it seems to be intended as an interface to Glob, which uses strings. It seems to be used only once in the package, in the EPUB writer, so that is not hard to change.
Diffstat (limited to 'src/Text/Pandoc/Readers/Odt')
-rw-r--r--src/Text/Pandoc/Readers/Odt/ContentReader.hs61
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs33
-rw-r--r--src/Text/Pandoc/Readers/Odt/StyleReader.hs10
3 files changed, 69 insertions, 35 deletions
diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
index d8e5ba272..ff8cdc5fa 100644
--- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
@@ -1,11 +1,12 @@
{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE Arrows #-}
-{-# LANGUAGE DeriveFoldable #-}
+{-# LANGUAGE Arrows #-}
+{-# LANGUAGE DeriveFoldable #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
-{-# LANGUAGE PatternGuards #-}
-{-# LANGUAGE RecordWildCards #-}
-{-# LANGUAGE TupleSections #-}
-{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE PatternGuards #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Odt.ContentReader
Copyright : Copyright (C) 2015 Martin Linnemann
@@ -29,8 +30,9 @@ import Control.Arrow
import qualified Data.ByteString.Lazy as B
import Data.Foldable (fold)
-import Data.List (find, intercalate, stripPrefix)
+import Data.List (find, stripPrefix)
import qualified Data.Map as M
+import qualified Data.Text as T
import Data.Maybe
import Data.Semigroup (First(..), Option(..))
@@ -59,7 +61,7 @@ import qualified Data.Set as Set
-- State
--------------------------------------------------------------------------------
-type Anchor = String
+type Anchor = T.Text
type Media = [(FilePath, B.ByteString)]
data ReaderState
@@ -204,21 +206,21 @@ updateMediaWithResource = keepingTheValue (
)
>>^ fst
-lookupResource :: OdtReaderSafe String (FilePath, B.ByteString)
+lookupResource :: OdtReaderSafe FilePath (FilePath, B.ByteString)
lookupResource = proc target -> do
state <- getExtraState -< ()
case lookup target (getMediaEnv state) of
Just bs -> returnV (target, bs) -<< ()
Nothing -> returnV ("", B.empty) -< ()
-type AnchorPrefix = String
+type AnchorPrefix = T.Text
-- | An adaptation of 'uniqueIdent' from "Text.Pandoc.Shared" that generates a
-- unique identifier but without assuming that the id should be for a header.
-- Second argument is a list of already used identifiers.
uniqueIdentFrom :: AnchorPrefix -> [Anchor] -> Anchor
uniqueIdentFrom baseIdent usedIdents =
- let numIdent n = baseIdent ++ "-" ++ show n
+ let numIdent n = baseIdent <> "-" <> T.pack (show n)
in if baseIdent `elem` usedIdents
then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of
Just x -> numIdent x
@@ -305,7 +307,7 @@ withNewStyle a = proc x -> do
isCodeStyle _ = False
inlineCode :: Inlines -> Inlines
- inlineCode = code . intercalate "" . map stringify . toList
+ inlineCode = code . T.concat . map stringify . toList
type PropertyTriple = (ReaderState, TextProperties, Maybe StyleFamily)
type InlineModifier = Inlines -> Inlines
@@ -535,7 +537,6 @@ matchChildContent :: (Monoid result)
-> OdtReaderSafe _x result
matchChildContent ls fallback = returnV mempty >>> matchContent ls fallback
-
--------------------------------------------
-- Matchers
--------------------------------------------
@@ -556,8 +557,8 @@ read_plain_text = fst ^&&& read_plain_text' >>% recover
)
>>?% mappend
--
- extractText :: XML.Content -> Fallible String
- extractText (XML.Text cData) = succeedWith (XML.cdData cData)
+ extractText :: XML.Content -> Fallible T.Text
+ extractText (XML.Text cData) = succeedWith (T.pack $ XML.cdData cData)
extractText _ = failEmpty
read_text_seq :: InlineMatcher
@@ -675,8 +676,8 @@ read_list_item = matchingElement NsText "list-item"
read_link :: InlineMatcher
read_link = matchingElement NsText "a"
$ liftA3 link
- ( findAttrWithDefault NsXLink "href" "" )
- ( findAttrWithDefault NsOffice "title" "" )
+ ( findAttrTextWithDefault NsXLink "href" "" )
+ ( findAttrTextWithDefault NsOffice "title" "" )
( matchChildContent [ read_span
, read_note
, read_citation
@@ -709,12 +710,12 @@ read_citation :: InlineMatcher
read_citation = matchingElement NsText "bibliography-mark"
$ liftA2 cite
( liftA2 makeCitation
- ( findAttrWithDefault NsText "identifier" "" )
+ ( findAttrTextWithDefault NsText "identifier" "" )
( readAttrWithDefault NsText "number" 0 )
)
( matchChildContent [] read_plain_text )
where
- makeCitation :: String -> Int -> [Citation]
+ makeCitation :: T.Text -> Int -> [Citation]
makeCitation citeId num = [Citation citeId [] [] NormalCitation num 0]
@@ -779,17 +780,17 @@ read_frame_img =
let exts = extensionsFromList [Ext_auto_identifiers]
resource <- lookupResource -< src'
_ <- updateMediaWithResource -< resource
- w <- findAttr' NsSVG "width" -< ()
- h <- findAttr' NsSVG "height" -< ()
+ w <- findAttrText' NsSVG "width" -< ()
+ h <- findAttrText' NsSVG "height" -< ()
titleNodes <- matchChildContent' [ read_frame_title ] -< ()
alt <- matchChildContent [] read_plain_text -< ()
arr (firstMatch . uncurry4 imageWith) -<
- (image_attributes w h, src', inlineListToIdentifier exts (toList titleNodes), alt)
+ (image_attributes w h, T.pack src', inlineListToIdentifier exts (toList titleNodes), alt)
read_frame_title :: InlineMatcher
read_frame_title = matchingElement NsSVG "title" (matchChildContent [] read_plain_text)
-image_attributes :: Maybe String -> Maybe String -> Attr
+image_attributes :: Maybe T.Text -> Maybe T.Text -> Attr
image_attributes x y =
( "", [], (dim "width" x) ++ (dim "height" y))
where
@@ -806,7 +807,7 @@ read_frame_mathml =
src' -> do
let path = fromMaybe src' (stripPrefix "./" src') ++ "/content.xml"
(_, mathml) <- lookupResource -< path
- case readMathML (UTF8.toString $ B.toStrict mathml) of
+ case readMathML (UTF8.toText $ B.toStrict mathml) of
Left _ -> returnV mempty -< ()
Right exps -> arr (firstMatch . displayMath . writeTeX) -< exps
@@ -817,9 +818,9 @@ read_frame_text_box = proc box -> do
read_img_with_caption :: [Block] -> FirstMatch Inlines
read_img_with_caption (Para [Image attr alt (src,title)] : _) =
- firstMatch $ singleton (Image attr alt (src, 'f':'i':'g':':':title)) -- no text, default caption
+ firstMatch $ singleton (Image attr alt (src, "fig:" <> title)) -- no text, default caption
read_img_with_caption (Para (Image attr _ (src,title) : txt) : _) =
- firstMatch $ singleton (Image attr txt (src, 'f':'i':'g':':':title) ) -- override caption with the text that follows
+ firstMatch $ singleton (Image attr txt (src, "fig:" <> title) ) -- override caption with the text that follows
read_img_with_caption ( Para (_ : xs) : ys) =
read_img_with_caption (Para xs : ys)
read_img_with_caption _ =
@@ -829,12 +830,12 @@ read_img_with_caption _ =
-- Internal links
----------------------
-_ANCHOR_PREFIX_ :: String
+_ANCHOR_PREFIX_ :: T.Text
_ANCHOR_PREFIX_ = "anchor"
--
readAnchorAttr :: OdtReader _x Anchor
-readAnchorAttr = findAttr NsText "name"
+readAnchorAttr = findAttrText NsText "name"
-- | Beware: may fail
findAnchorName :: OdtReader AnchorPrefix Anchor
@@ -875,7 +876,7 @@ read_reference_start = matchingElement NsText "reference-mark-start"
-- | Beware: may fail
findAnchorRef :: OdtReader _x Anchor
-findAnchorRef = ( findAttr NsText "ref-name"
+findAnchorRef = ( findAttrText NsText "ref-name"
>>?^ (_ANCHOR_PREFIX_,)
) >>?! getPrettyAnchor
@@ -890,7 +891,7 @@ maybeInAnchorRef = proc inlines -> do
Left _ -> returnA -< inlines
where
toAnchorRef :: Anchor -> Inlines -> Inlines
- toAnchorRef anchor = link ('#':anchor) "" -- no title
+ toAnchorRef anchor = link ("#" <> anchor) "" -- no title
--
read_bookmark_ref :: InlineMatcher
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
index ccbaf6fc4..59d1b8abd 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
@@ -38,8 +38,11 @@ module Text.Pandoc.Readers.Odt.Generic.XMLConverter
, lookupAttr'
, lookupDefaultingAttr
, findAttr'
+, findAttrText'
, findAttr
+, findAttrText
, findAttrWithDefault
+, findAttrTextWithDefault
, readAttr
, readAttr'
, readAttrWithDefault
@@ -59,6 +62,7 @@ import Control.Arrow
import Data.Either ( rights )
import qualified Data.Map as M
+import qualified Data.Text as T
import Data.Default
import Data.Maybe
@@ -79,6 +83,7 @@ import Text.Pandoc.Readers.Odt.Generic.Fallible
type ElementName = String
type AttributeName = String
type AttributeValue = String
+type TextAttributeValue = T.Text
--
type NameSpacePrefix = String
@@ -466,6 +471,16 @@ findAttr' nsID attrName = qualifyName nsID attrName
&&& getCurrentElement
>>% XML.findAttr
+-- | Return value as a (Maybe Text)
+findAttrText' :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> XMLConverter nsID extraState x (Maybe TextAttributeValue)
+findAttrText' nsID attrName
+ = qualifyName nsID attrName
+ &&& getCurrentElement
+ >>% XML.findAttr
+ >>^ fmap T.pack
+
-- | Return value as string or fail
findAttr :: (NameSpaceID nsID)
=> nsID -> AttributeName
@@ -473,6 +488,15 @@ findAttr :: (NameSpaceID nsID)
findAttr nsID attrName = findAttr' nsID attrName
>>> maybeToChoice
+-- | Return value as text or fail
+findAttrText :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> FallibleXMLConverter nsID extraState x TextAttributeValue
+findAttrText nsID attrName
+ = findAttr' nsID attrName
+ >>^ fmap T.pack
+ >>> maybeToChoice
+
-- | Return value as string or return provided default value
findAttrWithDefault :: (NameSpaceID nsID)
=> nsID -> AttributeName
@@ -482,6 +506,15 @@ findAttrWithDefault nsID attrName deflt
= findAttr' nsID attrName
>>^ fromMaybe deflt
+-- | Return value as string or return provided default value
+findAttrTextWithDefault :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> TextAttributeValue
+ -> XMLConverter nsID extraState x TextAttributeValue
+findAttrTextWithDefault nsID attrName deflt
+ = findAttr' nsID attrName
+ >>^ maybe deflt T.pack
+
-- | Read and return value or fail
readAttr :: (NameSpaceID nsID, Read attrValue)
=> nsID -> AttributeName
diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
index 79e8d7aea..99fa05880 100644
--- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
@@ -548,11 +548,11 @@ readListLevelStyle :: ListLevelType -> StyleReader _x (Int, ListLevelStyle)
readListLevelStyle levelType = readAttr NsText "level"
>>?! keepingTheValue
( liftA5 toListLevelStyle
- ( returnV levelType )
- ( findAttr' NsStyle "num-prefix" )
- ( findAttr' NsStyle "num-suffix" )
- ( getAttr NsStyle "num-format" )
- ( findAttr' NsText "start-value" )
+ ( returnV levelType )
+ ( findAttr' NsStyle "num-prefix" )
+ ( findAttr' NsStyle "num-suffix" )
+ ( getAttr NsStyle "num-format" )
+ ( findAttrText' NsText "start-value" )
)
where
toListLevelStyle _ p s LinfNone b = ListLevelStyle LltBullet p s LinfNone (startValue b)