diff options
| author | Igor Pashev <pashev.igor@gmail.com> | 2021-07-17 18:10:34 +0200 |
|---|---|---|
| committer | Igor Pashev <pashev.igor@gmail.com> | 2021-07-17 18:46:16 +0200 |
| commit | 48459559a13a20083fc9b31eb523b8ea2bf0a63f (patch) | |
| tree | 1c04e75709457403110a6f8c5c90099f22369de3 /src/Text/Pandoc/Readers/Odt | |
| parent | 0c39509d9b6a58958228cebf5d643598e5c98950 (diff) | |
| parent | 46099e79defe662e541b12548200caf29063c1c6 (diff) | |
| download | pandoc-48459559a13a20083fc9b31eb523b8ea2bf0a63f.tar.gz | |
Merge branch 'master' of https://github.com/jgm/pandoc
Diffstat (limited to 'src/Text/Pandoc/Readers/Odt')
| -rw-r--r-- | src/Text/Pandoc/Readers/Odt/Arrows/State.hs | 4 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Odt/ContentReader.hs | 32 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs | 3 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Odt/Generic/Utils.hs | 33 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs | 27 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Odt/Namespaces.hs | 11 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Odt/StyleReader.hs | 38 |
7 files changed, 72 insertions, 76 deletions
diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs index 93c6b5e79..96515bf56 100644 --- a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs +++ b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs @@ -22,7 +22,7 @@ module Text.Pandoc.Readers.Odt.Arrows.State where import Control.Arrow import qualified Control.Category as Cat import Control.Monad - +import Data.List (foldl') import Text.Pandoc.Readers.Odt.Arrows.Utils import Text.Pandoc.Readers.Odt.Generic.Fallible @@ -122,7 +122,7 @@ iterateS a = ArrowState $ \(s,f) -> foldr a' (s,mzero) f iterateSL :: (Foldable f, MonadPlus m) => ArrowState s x y -> ArrowState s (f x) (m y) -iterateSL a = ArrowState $ \(s,f) -> foldl a' (s,mzero) f +iterateSL a = ArrowState $ \(s,f) -> foldl' a' (s,mzero) f where a' (s',m) x = second (mplus m.return) $ runArrowState a (s',x) diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs index 43c44e7e9..5520d039f 100644 --- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs +++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs @@ -1,4 +1,5 @@ {-# LANGUAGE Arrows #-} +{-# LANGUAGE CPP #-} {-# LANGUAGE DeriveFoldable #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE PatternGuards #-} @@ -29,14 +30,14 @@ import Control.Monad ((<=<)) import qualified Data.ByteString.Lazy as B import Data.Foldable (fold) -import Data.List (find, stripPrefix) +import Data.List (find) import qualified Data.Map as M import qualified Data.Text as T import Data.Maybe -import Data.Semigroup (First(..), Option(..)) +import Data.Monoid (Alt (..)) import Text.TeXMath (readMathML, writeTeX) -import qualified Text.XML.Light as XML +import qualified Text.Pandoc.XML.Light as XML import Text.Pandoc.Builder hiding (underline) import Text.Pandoc.MediaBag (MediaBag, insertMedia) @@ -505,13 +506,11 @@ type InlineMatcher = ElementMatcher Inlines type BlockMatcher = ElementMatcher Blocks - -newtype FirstMatch a = FirstMatch (Option (First a)) - deriving (Foldable, Monoid, Semigroup) +newtype FirstMatch a = FirstMatch (Alt Maybe a) + deriving (Foldable, Monoid, Semigroup) firstMatch :: a -> FirstMatch a -firstMatch = FirstMatch . Option . Just . First - +firstMatch = FirstMatch . Alt . Just -- matchingElement :: (Monoid e) @@ -557,7 +556,7 @@ read_plain_text = fst ^&&& read_plain_text' >>% recover >>?% mappend -- extractText :: XML.Content -> Fallible T.Text - extractText (XML.Text cData) = succeedWith (T.pack $ XML.cdData cData) + extractText (XML.Text cData) = succeedWith (XML.cdData cData) extractText _ = failEmpty read_text_seq :: InlineMatcher @@ -577,7 +576,10 @@ read_spaces = matchingElement NsText "s" ( read_line_break :: InlineMatcher read_line_break = matchingElement NsText "line-break" $ returnV linebreak - +-- +read_tab :: InlineMatcher +read_tab = matchingElement NsText "tab" + $ returnV space -- read_span :: InlineMatcher read_span = matchingElement NsText "span" @@ -585,6 +587,7 @@ read_span = matchingElement NsText "span" $ matchChildContent [ read_span , read_spaces , read_line_break + , read_tab , read_link , read_note , read_citation @@ -604,6 +607,7 @@ read_paragraph = matchingElement NsText "p" $ matchChildContent [ read_span , read_spaces , read_line_break + , read_tab , read_link , read_note , read_citation @@ -630,6 +634,7 @@ read_header = matchingElement NsText "h" children <- ( matchChildContent [ read_span , read_spaces , read_line_break + , read_tab , read_link , read_note , read_citation @@ -777,14 +782,14 @@ read_frame_img = "" -> returnV mempty -< () src' -> do let exts = extensionsFromList [Ext_auto_identifiers] - resource <- lookupResource -< src' + resource <- lookupResource -< T.unpack src' _ <- updateMediaWithResource -< resource w <- findAttrText' NsSVG "width" -< () h <- findAttrText' NsSVG "height" -< () titleNodes <- matchChildContent' [ read_frame_title ] -< () alt <- matchChildContent [] read_plain_text -< () arr (firstMatch . uncurry4 imageWith) -< - (image_attributes w h, T.pack src', inlineListToIdentifier exts (toList titleNodes), alt) + (image_attributes w h, src', inlineListToIdentifier exts (toList titleNodes), alt) read_frame_title :: InlineMatcher read_frame_title = matchingElement NsSVG "title" (matchChildContent [] read_plain_text) @@ -804,7 +809,8 @@ read_frame_mathml = case fold src of "" -> returnV mempty -< () src' -> do - let path = fromMaybe src' (stripPrefix "./" src') ++ "/content.xml" + let path = T.unpack $ + fromMaybe src' (T.stripPrefix "./" src') <> "/content.xml" (_, mathml) <- lookupResource -< path case readMathML (UTF8.toText $ B.toStrict mathml) of Left _ -> returnV mempty -< () diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs index 77174c793..78a7fc0b2 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs @@ -14,9 +14,10 @@ typesafe Haskell namespace identifiers and unsafe "real world" namespaces. module Text.Pandoc.Readers.Odt.Generic.Namespaces where import qualified Data.Map as M +import Data.Text (Text) -- -type NameSpaceIRI = String +type NameSpaceIRI = Text -- type NameSpaceIRIs nsID = M.Map nsID NameSpaceIRI diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs index 6dc56a0d9..edefe3c70 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs @@ -20,7 +20,6 @@ module Text.Pandoc.Readers.Odt.Generic.Utils , reverseComposition , tryToRead , Lookupable(..) -, readLookupables , readLookupable , readPercent , findBy @@ -30,11 +29,11 @@ module Text.Pandoc.Readers.Odt.Generic.Utils import Control.Category (Category, (<<<), (>>>)) import qualified Control.Category as Cat (id) -import Control.Monad (msum) - +import Data.Char (isSpace) import qualified Data.Foldable as F (Foldable, foldr) import Data.Maybe - +import Data.Text (Text) +import qualified Data.Text as T -- | Equivalent to -- > foldr (.) id @@ -76,8 +75,8 @@ swing = flip.(.flip id) -- (nobody wants that) while the latter returns "to much" for simple purposes. -- This function instead applies 'reads' and returns the first match (if any) -- in a 'Maybe'. -tryToRead :: (Read r) => String -> Maybe r -tryToRead = reads >>> listToMaybe >>> fmap fst +tryToRead :: (Read r) => Text -> Maybe r +tryToRead = (reads . T.unpack) >>> listToMaybe >>> fmap fst -- | A version of 'reads' that requires a '%' sign after the number readPercent :: ReadS Int @@ -88,26 +87,12 @@ readPercent s = [ (i,s') | (i , r ) <- reads s -- | Data that can be looked up. -- This is mostly a utility to read data with kind *. class Lookupable a where - lookupTable :: [(String, a)] - --- | The idea is to use this function as if there was a declaration like --- --- > instance (Lookupable a) => (Read a) where --- > readsPrec _ = readLookupables --- . --- But including this code in this form would need UndecideableInstances. --- That is a bad idea. Luckily 'readLookupable' (without the s at the end) --- can be used directly in almost any case. -readLookupables :: (Lookupable a) => String -> [(a,String)] -readLookupables s = [ (a,rest) | (word,rest) <- lex s, - a <- maybeToList (lookup word lookupTable) - ] + lookupTable :: [(Text, a)] -- | Very similar to a simple 'lookup' in the 'lookupTable', but with a lexer. -readLookupable :: (Lookupable a) => String -> Maybe a -readLookupable s = msum - $ map ((`lookup` lookupTable).fst) - $ lex s +readLookupable :: (Lookupable a) => Text -> Maybe a +readLookupable s = + lookup (T.takeWhile (not . isSpace) $ T.dropWhile isSpace s) lookupTable uncurry3 :: (a->b->c -> z) -> (a,b,c ) -> z uncurry4 :: (a->b->c->d -> z) -> (a,b,c,d ) -> z diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs index 00c636a0d..341903046 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE LambdaCase #-} @@ -60,15 +61,15 @@ import Control.Arrow import Data.Bool ( bool ) import Data.Either ( rights ) import qualified Data.Map as M -import qualified Data.Text as T +import Data.Text (Text) import Data.Default import Data.Maybe +import Data.List (foldl') -import qualified Text.XML.Light as XML +import qualified Text.Pandoc.XML.Light as XML import Text.Pandoc.Readers.Odt.Arrows.State import Text.Pandoc.Readers.Odt.Arrows.Utils - import Text.Pandoc.Readers.Odt.Generic.Namespaces import Text.Pandoc.Readers.Odt.Generic.Utils import Text.Pandoc.Readers.Odt.Generic.Fallible @@ -78,13 +79,13 @@ import Text.Pandoc.Readers.Odt.Generic.Fallible -------------------------------------------------------------------------------- -- -type ElementName = String -type AttributeName = String -type AttributeValue = String -type TextAttributeValue = T.Text +type ElementName = Text +type AttributeName = Text +type AttributeValue = Text +type TextAttributeValue = Text -- -type NameSpacePrefix = String +type NameSpacePrefix = Text -- type NameSpacePrefixes nsID = M.Map nsID NameSpacePrefix @@ -292,7 +293,7 @@ readNSattributes = fromState $ \state -> maybe (state, failEmpty ) => XMLConverterState nsID extraState -> Maybe (XMLConverterState nsID extraState) extractNSAttrs startState - = foldl (\state d -> state >>= addNS d) + = foldl' (\state d -> state >>= addNS d) (Just startState) nsAttribs where nsAttribs = mapMaybe readNSattr (XML.elAttribs element) @@ -461,7 +462,7 @@ lookupDefaultingAttr :: (NameSpaceID nsID, Lookupable a, Default a) lookupDefaultingAttr nsID attrName = lookupAttrWithDefault nsID attrName def --- | Return value as a (Maybe String) +-- | Return value as a (Maybe Text) findAttr' :: (NameSpaceID nsID) => nsID -> AttributeName -> XMLConverter nsID extraState x (Maybe AttributeValue) @@ -477,7 +478,6 @@ findAttrText' nsID attrName = qualifyName nsID attrName &&& getCurrentElement >>% XML.findAttr - >>^ fmap T.pack -- | Return value as string or fail findAttr :: (NameSpaceID nsID) @@ -492,7 +492,6 @@ findAttrText :: (NameSpaceID nsID) -> FallibleXMLConverter nsID extraState x TextAttributeValue findAttrText nsID attrName = findAttr' nsID attrName - >>^ fmap T.pack >>> maybeToChoice -- | Return value as string or return provided default value @@ -511,7 +510,7 @@ findAttrTextWithDefault :: (NameSpaceID nsID) -> XMLConverter nsID extraState x TextAttributeValue findAttrTextWithDefault nsID attrName deflt = findAttr' nsID attrName - >>^ maybe deflt T.pack + >>^ fromMaybe deflt -- | Read and return value or fail readAttr :: (NameSpaceID nsID, Read attrValue) @@ -748,7 +747,7 @@ matchContent lookups fallback -- Internals -------------------------------------------------------------------------------- -stringToBool' :: String -> Maybe Bool +stringToBool' :: Text -> Maybe Bool stringToBool' val | val `elem` trueValues = Just True | val `elem` falseValues = Just False | otherwise = Nothing diff --git a/src/Text/Pandoc/Readers/Odt/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Namespaces.hs index 3a24a1162..70741c28d 100644 --- a/src/Text/Pandoc/Readers/Odt/Namespaces.hs +++ b/src/Text/Pandoc/Readers/Odt/Namespaces.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Reader.Odt.Namespaces Copyright : Copyright (C) 2015 Martin Linnemann @@ -13,10 +14,10 @@ Namespaces used in odt files. module Text.Pandoc.Readers.Odt.Namespaces ( Namespace (..) ) where -import Data.List (isPrefixOf) import qualified Data.Map as M (empty, insert) import Data.Maybe (fromMaybe, listToMaybe) - +import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Readers.Odt.Generic.Namespaces @@ -30,7 +31,7 @@ instance NameSpaceID Namespace where findID :: NameSpaceIRI -> Maybe Namespace -findID iri = listToMaybe [nsID | (iri',nsID) <- nsIDs, iri' `isPrefixOf` iri] +findID iri = listToMaybe [nsID | (iri',nsID) <- nsIDs, iri' `T.isPrefixOf` iri] nsIDmap :: NameSpaceIRIs Namespace nsIDmap = foldr (uncurry $ flip M.insert) M.empty nsIDs @@ -54,12 +55,12 @@ data Namespace = -- Open Document core -- Core XML (basically only for the 'id'-attribute) | NsXML -- Fallback - | NsOther String + | NsOther Text deriving ( Eq, Ord, Show ) -- | Not the actual iri's, but large prefixes of them - this way there are -- less versioning problems and the like. -nsIDs :: [(String,Namespace)] +nsIDs :: [(Text, Namespace)] nsIDs = [ ("urn:oasis:names:tc:opendocument:xmlns:animation" , NsAnim ), ("urn:oasis:names:tc:opendocument:xmlns:chart" , NsChart ), diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs index 46a777df1..ca791ad1e 100644 --- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs +++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs @@ -2,6 +2,7 @@ {-# LANGUAGE Arrows #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Odt.StyleReader Copyright : Copyright (C) 2015 Martin Linnemann @@ -43,14 +44,16 @@ import Control.Arrow import Data.Default import qualified Data.Foldable as F -import Data.List (unfoldr) +import Data.List (unfoldr, foldl') import qualified Data.Map as M import Data.Maybe +import Data.Text (Text) +import qualified Data.Text as T import qualified Data.Set as S -import qualified Text.XML.Light as XML +import qualified Text.Pandoc.XML.Light as XML -import Text.Pandoc.Shared (safeRead) +import Text.Pandoc.Shared (safeRead, tshow) import Text.Pandoc.Readers.Odt.Arrows.Utils @@ -90,7 +93,7 @@ instance Default FontPitch where -- -- Thus, we want -type FontFaceName = String +type FontFaceName = Text type FontPitches = M.Map FontFaceName FontPitch @@ -117,7 +120,7 @@ fontPitchReader = executeInSub NsOffice "font-face-decls" ( &&& lookupDefaultingAttr NsStyle "font-pitch" )) - >>?^ ( M.fromList . foldl accumLegalPitches [] ) + >>?^ ( M.fromList . foldl' accumLegalPitches [] ) ) `ifFailedDo` returnV (Right M.empty) where accumLegalPitches ls (Nothing,_) = ls accumLegalPitches ls (Just n,p) = (n,p):ls @@ -151,7 +154,7 @@ findPitch = ( lookupAttr NsStyle "font-pitch" -- Definitions of main data -------------------------------------------------------------------------------- -type StyleName = String +type StyleName = Text -- | There are two types of styles: named styles with a style family and an -- optional style parent, and default styles for each style family, @@ -355,8 +358,8 @@ getListLevelStyle level ListStyle{..} = -- \^ simpler, but in general less efficient data ListLevelStyle = ListLevelStyle { listLevelType :: ListLevelType - , listItemPrefix :: Maybe String - , listItemSuffix :: Maybe String + , listItemPrefix :: Maybe Text + , listItemSuffix :: Maybe Text , listItemFormat :: ListItemNumberFormat , listItemStart :: Int } @@ -366,9 +369,9 @@ instance Show ListLevelStyle where show ListLevelStyle{..} = "<LLS|" ++ show listLevelType ++ "|" - ++ maybeToString listItemPrefix + ++ maybeToString (T.unpack <$> listItemPrefix) ++ show listItemFormat - ++ maybeToString listItemSuffix + ++ maybeToString (T.unpack <$> listItemSuffix) ++ ">" where maybeToString = fromMaybe "" @@ -471,7 +474,7 @@ readTextProperties = ) where isFontEmphasised = [("normal",False),("italic",True),("oblique",True)] isFontBold = ("normal",False):("bold",True) - :map ((,True).show) ([100,200..900]::[Int]) + :map ((,True) . tshow) ([100,200..900]::[Int]) readUnderlineMode :: StyleReaderSafe _x (Maybe UnderlineMode) readUnderlineMode = readLineMode "text-underline-mode" @@ -481,7 +484,7 @@ readStrikeThroughMode :: StyleReaderSafe _x (Maybe UnderlineMode) readStrikeThroughMode = readLineMode "text-line-through-mode" "text-line-through-style" -readLineMode :: String -> String -> StyleReaderSafe _x (Maybe UnderlineMode) +readLineMode :: Text -> Text -> StyleReaderSafe _x (Maybe UnderlineMode) readLineMode modeAttr styleAttr = proc x -> do isUL <- searchAttr NsStyle styleAttr False isLinePresent -< x mode <- lookupAttr' NsStyle modeAttr -< x @@ -560,12 +563,13 @@ readListLevelStyle levelType = readAttr NsText "level" -- chooseMostSpecificListLevelStyle :: S.Set ListLevelStyle -> Maybe ListLevelStyle -chooseMostSpecificListLevelStyle ls | ls == mempty = Nothing - | otherwise = Just ( F.foldr1 select ls ) +chooseMostSpecificListLevelStyle ls = F.foldr select Nothing ls where - select ( ListLevelStyle t1 p1 s1 f1 b1 ) - ( ListLevelStyle t2 p2 s2 f2 _ ) - = ListLevelStyle (select' t1 t2) (p1 <|> p2) (s1 <|> s2) (selectLinf f1 f2) b1 + select l Nothing = Just l + select ( ListLevelStyle t1 p1 s1 f1 b1 ) + ( Just ( ListLevelStyle t2 p2 s2 f2 _ )) + = Just $ ListLevelStyle (select' t1 t2) (p1 <|> p2) (s1 <|> s2) + (selectLinf f1 f2) b1 select' LltNumbered _ = LltNumbered select' _ LltNumbered = LltNumbered select' _ _ = LltBullet |
