Switch to new pandoc-types and use Text instead of String [API change].

PR #5884. + Use pandoc-types 1.20 and texmath 0.12. + Text is now used instead of String, with a few exceptions. + In the MediaBag module, some of the types using Strings were switched to use FilePath instead (not Text). + In the Parsing module, new parsers `manyChar`, `many1Char`, `manyTillChar`, `many1TillChar`, `many1Till`, `manyUntil`, `mantyUntilChar` have been added: these are like their unsuffixed counterparts but pack some or all of their output. + `glob` in Text.Pandoc.Class still takes String since it seems to be intended as an interface to Glob, which uses strings. It seems to be used only once in the package, in the EPUB writer, so that is not hard to change.
author: despresc <christian.j.j.despres@gmail.com> 2019-11-04 16:12:37 -0500
committer: John MacFarlane <jgm@berkeley.edu> 2019-11-12 16:03:45 -0800
commit: 90e436d49604e3fd1ef9432fb23f6d7f6245c7fd (patch)
tree: 4e7f0692f989643189f1fc6786050d95e239a0ea /src/Text/Pandoc/Readers/Odt
parent: d3966372f5049eea56213b069fc4d70d8af9144c (diff)
download: pandoc-90e436d49604e3fd1ef9432fb23f6d7f6245c7fd.tar.gz
3 files changed, 69 insertions, 35 deletions
diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
index d8e5ba272..ff8cdc5fa 100644
--- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
@@ -1,11 +1,12 @@
 {-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE Arrows          #-}
-{-# LANGUAGE DeriveFoldable  #-}
+{-# LANGUAGE Arrows            #-}
+{-# LANGUAGE DeriveFoldable    #-}
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
-{-# LANGUAGE PatternGuards   #-}
-{-# LANGUAGE RecordWildCards #-}
-{-# LANGUAGE TupleSections   #-}
-{-# LANGUAGE ViewPatterns    #-}
+{-# LANGUAGE PatternGuards     #-}
+{-# LANGUAGE RecordWildCards   #-}
+{-# LANGUAGE TupleSections     #-}
+{-# LANGUAGE ViewPatterns      #-}
+{-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Odt.ContentReader
    Copyright   : Copyright (C) 2015 Martin Linnemann
@@ -29,8 +30,9 @@ import Control.Arrow
 
 import qualified Data.ByteString.Lazy as B
 import Data.Foldable (fold)
-import Data.List (find, intercalate, stripPrefix)
+import Data.List (find, stripPrefix)
 import qualified Data.Map as M
+import qualified Data.Text as T
 import Data.Maybe
 import Data.Semigroup (First(..), Option(..))
 
@@ -59,7 +61,7 @@ import qualified Data.Set as Set
 -- State
 --------------------------------------------------------------------------------
 
-type Anchor = String
+type Anchor = T.Text
 type Media = [(FilePath, B.ByteString)]
 
 data ReaderState
@@ -204,21 +206,21 @@ updateMediaWithResource = keepingTheValue (
                )
            >>^ fst
 
-lookupResource :: OdtReaderSafe String (FilePath, B.ByteString)
+lookupResource :: OdtReaderSafe FilePath (FilePath, B.ByteString)
 lookupResource = proc target -> do
     state <- getExtraState -< ()
     case lookup target (getMediaEnv state) of
       Just bs -> returnV (target, bs) -<< ()
       Nothing -> returnV ("", B.empty) -< ()
 
-type AnchorPrefix = String
+type AnchorPrefix = T.Text
 
 -- | An adaptation of 'uniqueIdent' from "Text.Pandoc.Shared" that generates a
 -- unique identifier but without assuming that the id should be for a header.
 -- Second argument is a list of already used identifiers.
 uniqueIdentFrom :: AnchorPrefix -> [Anchor] -> Anchor
 uniqueIdentFrom baseIdent usedIdents =
-  let  numIdent n = baseIdent ++ "-" ++ show n
+  let  numIdent n = baseIdent <> "-" <> T.pack (show n)
   in  if baseIdent `elem` usedIdents
         then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of
                   Just x  -> numIdent x
@@ -305,7 +307,7 @@ withNewStyle a = proc x -> do
     isCodeStyle _             = False
 
     inlineCode :: Inlines -> Inlines
-    inlineCode = code . intercalate "" . map stringify . toList
+    inlineCode = code . T.concat . map stringify . toList
 
 type PropertyTriple = (ReaderState, TextProperties, Maybe StyleFamily)
 type InlineModifier = Inlines -> Inlines
@@ -535,7 +537,6 @@ matchChildContent    :: (Monoid result)
                      ->  OdtReaderSafe _x result
 matchChildContent ls fallback = returnV mempty >>> matchContent ls fallback
 
-
 --------------------------------------------
 -- Matchers
 --------------------------------------------
@@ -556,8 +557,8 @@ read_plain_text =  fst ^&&& read_plain_text' >>% recover
                             )
                        >>?% mappend
     --
-    extractText     :: XML.Content -> Fallible String
-    extractText (XML.Text cData) = succeedWith (XML.cdData cData)
+    extractText     :: XML.Content -> Fallible T.Text
+    extractText (XML.Text cData) = succeedWith (T.pack $ XML.cdData cData)
     extractText         _        = failEmpty
 
 read_text_seq :: InlineMatcher
@@ -675,8 +676,8 @@ read_list_item    = matchingElement NsText "list-item"
 read_link        :: InlineMatcher
 read_link         = matchingElement NsText "a"
                     $ liftA3 link
-                      ( findAttrWithDefault NsXLink  "href"  ""              )
-                      ( findAttrWithDefault NsOffice "title" ""              )
+                      ( findAttrTextWithDefault NsXLink  "href"  ""          )
+                      ( findAttrTextWithDefault NsOffice "title" ""          )
                       ( matchChildContent [ read_span
                                           , read_note
                                           , read_citation
@@ -709,12 +710,12 @@ read_citation    :: InlineMatcher
 read_citation     = matchingElement NsText "bibliography-mark"
                     $ liftA2 cite
                       ( liftA2 makeCitation
-                        ( findAttrWithDefault NsText "identifier" ""     )
+                        ( findAttrTextWithDefault NsText "identifier" "" )
                         ( readAttrWithDefault NsText "number" 0          )
                       )
                       ( matchChildContent [] read_plain_text             )
   where
-   makeCitation :: String -> Int -> [Citation]
+   makeCitation :: T.Text -> Int -> [Citation]
    makeCitation citeId num = [Citation citeId [] [] NormalCitation num 0]
 
 
@@ -779,17 +780,17 @@ read_frame_img =
         let exts = extensionsFromList [Ext_auto_identifiers]
         resource   <- lookupResource                          -< src'
         _          <- updateMediaWithResource                 -< resource
-        w          <- findAttr' NsSVG "width"                 -< ()
-        h          <- findAttr' NsSVG "height"                -< ()
+        w          <- findAttrText' NsSVG "width"             -< ()
+        h          <- findAttrText' NsSVG "height"            -< ()
         titleNodes <- matchChildContent' [ read_frame_title ] -< ()
         alt        <- matchChildContent [] read_plain_text    -< ()
         arr (firstMatch . uncurry4 imageWith)                 -<
-          (image_attributes w h, src', inlineListToIdentifier exts (toList titleNodes), alt)
+          (image_attributes w h, T.pack src', inlineListToIdentifier exts (toList titleNodes), alt)
 
 read_frame_title :: InlineMatcher
 read_frame_title = matchingElement NsSVG "title" (matchChildContent [] read_plain_text)
 
-image_attributes :: Maybe String -> Maybe String -> Attr
+image_attributes :: Maybe T.Text -> Maybe T.Text -> Attr
 image_attributes x y =
   ( "", [], (dim "width" x) ++ (dim "height" y))
   where
@@ -806,7 +807,7 @@ read_frame_mathml =
       src' -> do
         let path = fromMaybe src' (stripPrefix "./" src') ++ "/content.xml"
         (_, mathml) <- lookupResource -< path
-        case readMathML (UTF8.toString $ B.toStrict mathml) of
+        case readMathML (UTF8.toText $ B.toStrict mathml) of
           Left _     -> returnV mempty -< ()
           Right exps -> arr (firstMatch . displayMath . writeTeX) -< exps
 
@@ -817,9 +818,9 @@ read_frame_text_box = proc box -> do
 
 read_img_with_caption :: [Block] -> FirstMatch Inlines
 read_img_with_caption (Para [Image attr alt (src,title)] : _) =
-  firstMatch $ singleton (Image attr alt (src, 'f':'i':'g':':':title))   -- no text, default caption
+  firstMatch $ singleton (Image attr alt (src, "fig:" <> title))   -- no text, default caption
 read_img_with_caption (Para (Image attr _ (src,title) : txt) : _) =
-  firstMatch $ singleton (Image attr txt (src, 'f':'i':'g':':':title) )  -- override caption with the text that follows
+  firstMatch $ singleton (Image attr txt (src, "fig:" <> title) )  -- override caption with the text that follows
 read_img_with_caption  ( Para (_ : xs) : ys) =
   read_img_with_caption (Para xs : ys)
 read_img_with_caption _ =
@@ -829,12 +830,12 @@ read_img_with_caption _ =
 -- Internal links
 ----------------------
 
-_ANCHOR_PREFIX_ :: String
+_ANCHOR_PREFIX_ :: T.Text
 _ANCHOR_PREFIX_ = "anchor"
 
 --
 readAnchorAttr :: OdtReader _x Anchor
-readAnchorAttr = findAttr NsText "name"
+readAnchorAttr = findAttrText NsText "name"
 
 -- | Beware: may fail
 findAnchorName :: OdtReader AnchorPrefix Anchor
@@ -875,7 +876,7 @@ read_reference_start = matchingElement NsText "reference-mark-start"
 
 -- | Beware: may fail
 findAnchorRef :: OdtReader _x Anchor
-findAnchorRef = (      findAttr NsText "ref-name"
+findAnchorRef = (      findAttrText NsText "ref-name"
                   >>?^ (_ANCHOR_PREFIX_,)
                 ) >>?! getPrettyAnchor
 
@@ -890,7 +891,7 @@ maybeInAnchorRef = proc inlines -> do
     Left _ -> returnA -< inlines
   where
     toAnchorRef :: Anchor -> Inlines -> Inlines
-    toAnchorRef anchor = link ('#':anchor) "" -- no title
+    toAnchorRef anchor = link ("#" <> anchor) "" -- no title
 
 --
 read_bookmark_ref :: InlineMatcher
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
index ccbaf6fc4..59d1b8abd 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
@@ -38,8 +38,11 @@ module Text.Pandoc.Readers.Odt.Generic.XMLConverter
 , lookupAttr'
 , lookupDefaultingAttr
 , findAttr'
+, findAttrText'
 , findAttr
+, findAttrText
 , findAttrWithDefault
+, findAttrTextWithDefault
 , readAttr
 , readAttr'
 , readAttrWithDefault
@@ -59,6 +62,7 @@ import           Control.Arrow
 
 import           Data.Either ( rights )
 import qualified Data.Map             as M
+import qualified Data.Text            as T
 import           Data.Default
 import           Data.Maybe
 
@@ -79,6 +83,7 @@ import           Text.Pandoc.Readers.Odt.Generic.Fallible
 type ElementName           = String
 type AttributeName         = String
 type AttributeValue        = String
+type TextAttributeValue    = T.Text
 
 --
 type NameSpacePrefix       = String
@@ -466,6 +471,16 @@ findAttr' nsID attrName =         qualifyName nsID attrName
                               &&& getCurrentElement
                           >>% XML.findAttr
 
+-- | Return value as a (Maybe Text)
+findAttrText'           :: (NameSpaceID nsID)
+                        => nsID -> AttributeName
+                        -> XMLConverter nsID extraState x (Maybe TextAttributeValue)
+findAttrText' nsID attrName
+                        =         qualifyName nsID attrName
+                              &&& getCurrentElement
+                          >>% XML.findAttr
+                          >>^ fmap T.pack
+
 -- | Return value as string or fail
 findAttr               :: (NameSpaceID nsID)
                        => nsID -> AttributeName
@@ -473,6 +488,15 @@ findAttr               :: (NameSpaceID nsID)
 findAttr nsID attrName =     findAttr' nsID attrName
                          >>> maybeToChoice
 
+-- | Return value as text or fail
+findAttrText           :: (NameSpaceID nsID)
+                       => nsID -> AttributeName
+                       -> FallibleXMLConverter nsID extraState x TextAttributeValue
+findAttrText nsID attrName
+                       = findAttr' nsID attrName
+                         >>^ fmap T.pack
+                         >>> maybeToChoice
+
 -- | Return value as string or return provided default value
 findAttrWithDefault    :: (NameSpaceID nsID)
                        => nsID -> AttributeName
@@ -482,6 +506,15 @@ findAttrWithDefault nsID attrName deflt
                        = findAttr' nsID attrName
                          >>^ fromMaybe deflt
 
+-- | Return value as string or return provided default value
+findAttrTextWithDefault :: (NameSpaceID nsID)
+                        => nsID -> AttributeName
+                        -> TextAttributeValue
+                        -> XMLConverter nsID extraState x TextAttributeValue
+findAttrTextWithDefault nsID attrName deflt
+                       = findAttr' nsID attrName
+                         >>^ maybe deflt T.pack
+
 -- | Read and return value or fail
 readAttr               :: (NameSpaceID nsID, Read attrValue)
                        => nsID -> AttributeName
diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
index 79e8d7aea..99fa05880 100644
--- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
@@ -548,11 +548,11 @@ readListLevelStyle :: ListLevelType -> StyleReader _x (Int, ListLevelStyle)
 readListLevelStyle levelType =      readAttr NsText "level"
                                >>?! keepingTheValue
                                     ( liftA5 toListLevelStyle
-                                      ( returnV  levelType              )
-                                      ( findAttr' NsStyle "num-prefix"  )
-                                      ( findAttr' NsStyle "num-suffix"  )
-                                      ( getAttr   NsStyle "num-format"  )
-                                      ( findAttr' NsText  "start-value" )
+                                      ( returnV       levelType             )
+                                      ( findAttr'     NsStyle "num-prefix"  )
+                                      ( findAttr'     NsStyle "num-suffix"  )
+                                      ( getAttr       NsStyle "num-format"  )
+                                      ( findAttrText' NsText  "start-value" )
                                     )
   where
   toListLevelStyle _ p s LinfNone b         = ListLevelStyle LltBullet p s LinfNone (startValue b)
author	despresc <christian.j.j.despres@gmail.com>	2019-11-04 16:12:37 -0500
committer	John MacFarlane <jgm@berkeley.edu>	2019-11-12 16:03:45 -0800
commit	90e436d49604e3fd1ef9432fb23f6d7f6245c7fd (patch)
tree	4e7f0692f989643189f1fc6786050d95e239a0ea /src/Text/Pandoc/Readers/Odt
parent	d3966372f5049eea56213b069fc4d70d8af9144c (diff)
download	pandoc-90e436d49604e3fd1ef9432fb23f6d7f6245c7fd.tar.gz