aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Docx
diff options
context:
space:
mode:
authordespresc <christian.j.j.despres@gmail.com>2019-11-04 16:12:37 -0500
committerJohn MacFarlane <jgm@berkeley.edu>2019-11-12 16:03:45 -0800
commit90e436d49604e3fd1ef9432fb23f6d7f6245c7fd (patch)
tree4e7f0692f989643189f1fc6786050d95e239a0ea /src/Text/Pandoc/Readers/Docx
parentd3966372f5049eea56213b069fc4d70d8af9144c (diff)
downloadpandoc-90e436d49604e3fd1ef9432fb23f6d7f6245c7fd.tar.gz
Switch to new pandoc-types and use Text instead of String [API change].
PR #5884. + Use pandoc-types 1.20 and texmath 0.12. + Text is now used instead of String, with a few exceptions. + In the MediaBag module, some of the types using Strings were switched to use FilePath instead (not Text). + In the Parsing module, new parsers `manyChar`, `many1Char`, `manyTillChar`, `many1TillChar`, `many1Till`, `manyUntil`, `mantyUntilChar` have been added: these are like their unsuffixed counterparts but pack some or all of their output. + `glob` in Text.Pandoc.Class still takes String since it seems to be intended as an interface to Glob, which uses strings. It seems to be used only once in the package, in the EPUB writer, so that is not hard to change.
Diffstat (limited to 'src/Text/Pandoc/Readers/Docx')
-rw-r--r--src/Text/Pandoc/Readers/Docx/Combine.hs3
-rw-r--r--src/Text/Pandoc/Readers/Docx/Fields.hs33
-rw-r--r--src/Text/Pandoc/Readers/Docx/Lists.hs11
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs171
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse/Styles.hs48
-rw-r--r--src/Text/Pandoc/Readers/Docx/Util.hs9
6 files changed, 148 insertions, 127 deletions
diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs
index da40a80ea..82791d669 100644
--- a/src/Text/Pandoc/Readers/Docx/Combine.hs
+++ b/src/Text/Pandoc/Readers/Docx/Combine.hs
@@ -1,7 +1,8 @@
-{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE TypeSynonymInstances #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Docx.Combine
Copyright : © 2014-2019 Jesse Rosenthal <jrosenthal@jhu.edu>,
diff --git a/src/Text/Pandoc/Readers/Docx/Fields.hs b/src/Text/Pandoc/Readers/Docx/Fields.hs
index e7a916f1c..05d9dd697 100644
--- a/src/Text/Pandoc/Readers/Docx/Fields.hs
+++ b/src/Text/Pandoc/Readers/Docx/Fields.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Docx.Fields
Copyright : Copyright (C) 2014-2019 Jesse Rosenthal
@@ -16,16 +17,18 @@ module Text.Pandoc.Readers.Docx.Fields ( FieldInfo(..)
) where
import Prelude
+import Data.Functor (($>))
+import qualified Data.Text as T
import Text.Parsec
-import Text.Parsec.String (Parser)
+import Text.Parsec.Text (Parser)
-type URL = String
+type URL = T.Text
data FieldInfo = HyperlinkField URL
| UnknownField
deriving (Show)
-parseFieldInfo :: String -> Either ParseError FieldInfo
+parseFieldInfo :: T.Text -> Either ParseError FieldInfo
parseFieldInfo = parse fieldInfo ""
fieldInfo :: Parser FieldInfo
@@ -34,31 +37,31 @@ fieldInfo =
<|>
return UnknownField
-escapedQuote :: Parser String
-escapedQuote = string "\\\""
+escapedQuote :: Parser T.Text
+escapedQuote = string "\\\"" $> "\\\""
-inQuotes :: Parser String
+inQuotes :: Parser T.Text
inQuotes =
- (try escapedQuote) <|> (anyChar >>= (\c -> return [c]))
+ (try escapedQuote) <|> (anyChar >>= (\c -> return $ T.singleton c))
-quotedString :: Parser String
+quotedString :: Parser T.Text
quotedString = do
char '"'
- concat <$> manyTill inQuotes (try (char '"'))
+ T.concat <$> manyTill inQuotes (try (char '"'))
-unquotedString :: Parser String
-unquotedString = manyTill anyChar (try $ lookAhead space *> return () <|> eof)
+unquotedString :: Parser T.Text
+unquotedString = T.pack <$> manyTill anyChar (try $ lookAhead space *> return () <|> eof)
-fieldArgument :: Parser String
+fieldArgument :: Parser T.Text
fieldArgument = quotedString <|> unquotedString
-- there are other switches, but this is the only one I've seen in the wild so far, so it's the first one I'll implement. See §17.16.5.25
-hyperlinkSwitch :: Parser (String, String)
+hyperlinkSwitch :: Parser (T.Text, T.Text)
hyperlinkSwitch = do
sw <- string "\\l"
spaces
farg <- fieldArgument
- return (sw, farg)
+ return (T.pack sw, farg)
hyperlink :: Parser URL
hyperlink = do
@@ -68,6 +71,6 @@ hyperlink = do
farg <- fieldArgument
switches <- spaces *> many hyperlinkSwitch
let url = case switches of
- ("\\l", s) : _ -> farg ++ ('#': s)
+ ("\\l", s) : _ -> farg <> "#" <> s
_ -> farg
return url
diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs
index eb24640c5..b7b7a3835 100644
--- a/src/Text/Pandoc/Readers/Docx/Lists.hs
+++ b/src/Text/Pandoc/Readers/Docx/Lists.hs
@@ -22,6 +22,7 @@ import Prelude
import Data.List
import Data.Maybe
import Data.String (fromString)
+import qualified Data.Text as T
import Text.Pandoc.Generic (bottomUp)
import Text.Pandoc.JSON
import Text.Pandoc.Readers.Docx.Parse (ParaStyleName)
@@ -45,20 +46,20 @@ getNumId _ = Nothing
getNumIdN :: Block -> Integer
getNumIdN b = fromMaybe (-1) (getNumId b)
-getText :: Block -> Maybe String
+getText :: Block -> Maybe T.Text
getText (Div (_, _, kvs) _) = lookup "text" kvs
getText _ = Nothing
data ListType = Itemized | Enumerated ListAttributes
-listStyleMap :: [(String, ListNumberStyle)]
+listStyleMap :: [(T.Text, ListNumberStyle)]
listStyleMap = [("upperLetter", UpperAlpha),
("lowerLetter", LowerAlpha),
("upperRoman", UpperRoman),
("lowerRoman", LowerRoman),
("decimal", Decimal)]
-listDelimMap :: [(String, ListNumberDelim)]
+listDelimMap :: [(T.Text, ListNumberDelim)]
listDelimMap = [("%1)", OneParen),
("(%1)", TwoParens),
("%1.", Period)]
@@ -82,11 +83,11 @@ getListType b@(Div (_, _, kvs) _) | isListItem b =
_ -> Nothing
getListType _ = Nothing
-listParagraphDivs :: [String]
+listParagraphDivs :: [T.Text]
listParagraphDivs = ["list-paragraph"]
listParagraphStyles :: [ParaStyleName]
-listParagraphStyles = map fromString listParagraphDivs
+listParagraphStyles = map (fromString . T.unpack) listParagraphDivs
-- This is a first stab at going through and attaching meaning to list
-- paragraphs, without an item marker, following a list item. We
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 889bd80fc..8598ada6f 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -1,5 +1,6 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Docx.Parse
Copyright : Copyright (C) 2014-2019 Jesse Rosenthal
@@ -63,6 +64,7 @@ import qualified Data.ByteString.Lazy as B
import Data.Char (chr, ord, readLitChar)
import Data.List
import qualified Data.Map as M
+import qualified Data.Text as T
import Data.Maybe
import System.FilePath
import Text.Pandoc.Readers.Docx.Util
@@ -71,7 +73,7 @@ import Text.Pandoc.Shared (filteredFilesFromArchive, safeRead)
import qualified Text.Pandoc.UTF8 as UTF8
import Text.TeXMath (Exp)
import Text.TeXMath.Readers.OMML (readOMML)
-import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, stringToFont)
+import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, textToFont)
import Text.XML.Light
import qualified Text.XML.Light.Cursor as XMLC
@@ -88,7 +90,7 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes
}
deriving Show
-data ReaderState = ReaderState { stateWarnings :: [String]
+data ReaderState = ReaderState { stateWarnings :: [T.Text]
, stateFldCharState :: FldCharState
}
deriving Show
@@ -119,7 +121,6 @@ eitherToD (Left _) = throwError DocxError
concatMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b]
concatMapM f xs = liftM concat (mapM f xs)
-
-- This is similar to `mapMaybe`: it maps a function returning the D
-- monad over a list, and only keeps the non-erroring return values.
mapD :: (a -> D b) -> [a] -> D [b]
@@ -178,18 +179,18 @@ type ParStyleMap = M.Map ParaStyleId ParStyle
data Numbering = Numbering NameSpaces [Numb] [AbstractNumb]
deriving Show
-data Numb = Numb String String [LevelOverride]
+data Numb = Numb T.Text T.Text [LevelOverride]
deriving Show
-- ilvl startOverride lvl
-data LevelOverride = LevelOverride String (Maybe Integer) (Maybe Level)
+data LevelOverride = LevelOverride T.Text (Maybe Integer) (Maybe Level)
deriving Show
-data AbstractNumb = AbstractNumb String [Level]
+data AbstractNumb = AbstractNumb T.Text [Level]
deriving Show
-- ilvl format string start
-data Level = Level String String String (Maybe Integer)
+data Level = Level T.Text T.Text T.Text (Maybe Integer)
deriving Show
data DocumentLocation = InDocument | InFootnote | InEndnote
@@ -199,11 +200,11 @@ data Relationship = Relationship DocumentLocation RelId Target
deriving Show
data Notes = Notes NameSpaces
- (Maybe (M.Map String Element))
- (Maybe (M.Map String Element))
+ (Maybe (M.Map T.Text Element))
+ (Maybe (M.Map T.Text Element))
deriving Show
-data Comments = Comments NameSpaces (M.Map String Element)
+data Comments = Comments NameSpaces (M.Map T.Text Element)
deriving Show
data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer
@@ -238,8 +239,8 @@ defaultParagraphStyle = ParagraphStyle { pStyle = []
data BodyPart = Paragraph ParagraphStyle [ParPart]
- | ListItem ParagraphStyle String String (Maybe Level) [ParPart]
- | Tbl String TblGrid TblLook [Row]
+ | ListItem ParagraphStyle T.Text T.Text (Maybe Level) [ParPart]
+ | Tbl T.Text TblGrid TblLook [Row]
| OMathPara [Exp]
deriving Show
@@ -279,7 +280,7 @@ data ParPart = PlainRun Run
| BookMark BookMarkId Anchor
| InternalHyperLink Anchor [Run]
| ExternalHyperLink URL [Run]
- | Drawing FilePath String String B.ByteString Extent -- title, alt
+ | Drawing FilePath T.Text T.Text B.ByteString Extent -- title, alt
| Chart -- placeholder for now
| PlainOMath [Exp]
| Field FieldInfo [Run]
@@ -290,28 +291,28 @@ data ParPart = PlainRun Run
data Run = Run RunStyle [RunElem]
| Footnote [BodyPart]
| Endnote [BodyPart]
- | InlineDrawing FilePath String String B.ByteString Extent -- title, alt
+ | InlineDrawing FilePath T.Text T.Text B.ByteString Extent -- title, alt
| InlineChart -- placeholder
deriving Show
-data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen
+data RunElem = TextRun T.Text | LnBrk | Tab | SoftHyphen | NoBreakHyphen
deriving Show
-type Target = String
-type Anchor = String
-type URL = String
-type BookMarkId = String
-type RelId = String
-type ChangeId = String
-type CommentId = String
-type Author = String
-type ChangeDate = String
-type CommentDate = String
+type Target = T.Text
+type Anchor = T.Text
+type URL = T.Text
+type BookMarkId = T.Text
+type RelId = T.Text
+type ChangeId = T.Text
+type CommentId = T.Text
+type Author = T.Text
+type ChangeDate = T.Text
+type CommentDate = T.Text
archiveToDocx :: Archive -> Either DocxError Docx
archiveToDocx archive = fst <$> archiveToDocxWithWarnings archive
-archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [String])
+archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [T.Text])
archiveToDocxWithWarnings archive = do
docXmlPath <- case getDocumentXmlPath archive of
Just fp -> Right fp
@@ -341,7 +342,7 @@ archiveToDocxWithWarnings archive = do
Right doc -> Right (Docx doc, stateWarnings st)
Left e -> Left e
-getDocumentXmlPath :: Archive -> Maybe String
+getDocumentXmlPath :: Archive -> Maybe FilePath
getDocumentXmlPath zf = do
entry <- findEntryByPath "_rels/.rels" zf
relsElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
@@ -394,7 +395,7 @@ constructBogusParStyleData stName = ParStyle
, numInfo = Nothing
, psParentStyle = Nothing
, pStyleName = stName
- , pStyleId = ParaStyleId . filter (/=' ') . fromStyleName $ stName
+ , pStyleId = ParaStyleId . T.filter (/=' ') . fromStyleName $ stName
}
archiveToNotes :: Archive -> Notes
@@ -441,8 +442,8 @@ filePathToRelType path docXmlPath =
relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship
relElemToRelationship relType element | qName (elName element) == "Relationship" =
do
- relId <- findAttr (QName "Id" Nothing Nothing) element
- target <- findAttr (QName "Target" Nothing Nothing) element
+ relId <- findAttrText (QName "Id" Nothing Nothing) element
+ target <- findAttrText (QName "Target" Nothing Nothing) element
return $ Relationship relType relId target
relElemToRelationship _ _ = Nothing
@@ -464,7 +465,7 @@ filePathIsMedia fp =
in
(dir == "word/media/")
-lookupLevel :: String -> String -> Numbering -> Maybe Level
+lookupLevel :: T.Text -> T.Text -> Numbering -> Maybe Level
lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
(absNumId, ovrrides) <- lookup numId $
map (\(Numb nid absnumid ovrRides) -> (nid, (absnumid, ovrRides))) numbs
@@ -483,7 +484,7 @@ lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
loElemToLevelOverride :: NameSpaces -> Element -> Maybe LevelOverride
loElemToLevelOverride ns element
| isElem ns "w" "lvlOverride" element = do
- ilvl <- findAttrByName ns "w" "ilvl" element
+ ilvl <- findAttrTextByName ns "w" "ilvl" element
let startOverride = findChildByName ns "w" "startOverride" element
>>= findAttrByName ns "w" "val"
>>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
@@ -495,9 +496,9 @@ loElemToLevelOverride _ _ = Nothing
numElemToNum :: NameSpaces -> Element -> Maybe Numb
numElemToNum ns element
| isElem ns "w" "num" element = do
- numId <- findAttrByName ns "w" "numId" element
+ numId <- findAttrTextByName ns "w" "numId" element
absNumId <- findChildByName ns "w" "abstractNumId" element
- >>= findAttrByName ns "w" "val"
+ >>= findAttrTextByName ns "w" "val"
let lvlOverrides = mapMaybe
(loElemToLevelOverride ns)
(findChildrenByName ns "w" "lvlOverride" element)
@@ -507,7 +508,7 @@ numElemToNum _ _ = Nothing
absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb
absNumElemToAbsNum ns element
| isElem ns "w" "abstractNum" element = do
- absNumId <- findAttrByName ns "w" "abstractNumId" element
+ absNumId <- findAttrTextByName ns "w" "abstractNumId" element
let levelElems = findChildrenByName ns "w" "lvl" element
levels = mapMaybe (levelElemToLevel ns) levelElems
return $ AbstractNumb absNumId levels
@@ -516,11 +517,11 @@ absNumElemToAbsNum _ _ = Nothing
levelElemToLevel :: NameSpaces -> Element -> Maybe Level
levelElemToLevel ns element
| isElem ns "w" "lvl" element = do
- ilvl <- findAttrByName ns "w" "ilvl" element
+ ilvl <- findAttrTextByName ns "w" "ilvl" element
fmt <- findChildByName ns "w" "numFmt" element
- >>= findAttrByName ns "w" "val"
+ >>= findAttrTextByName ns "w" "val"
txt <- findChildByName ns "w" "lvlText" element
- >>= findAttrByName ns "w" "val"
+ >>= findAttrTextByName ns "w" "val"
let start = findChildByName ns "w" "start" element
>>= findAttrByName ns "w" "val"
>>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
@@ -544,11 +545,11 @@ archiveToNumbering :: Archive -> Numbering
archiveToNumbering archive =
fromMaybe (Numbering [] [] []) (archiveToNumbering' archive)
-elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map String Element)
+elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map T.Text Element)
elemToNotes ns notetype element
- | isElem ns "w" (notetype ++ "s") element =
+ | isElem ns "w" (notetype <> "s") element =
let pairs = mapMaybe
- (\e -> findAttrByName ns "w" "id" e >>=
+ (\e -> findAttrTextByName ns "w" "id" e >>=
(\a -> Just (a, e)))
(findChildrenByName ns "w" notetype element)
in
@@ -556,11 +557,11 @@ elemToNotes ns notetype element
M.fromList pairs
elemToNotes _ _ _ = Nothing
-elemToComments :: NameSpaces -> Element -> M.Map String Element
+elemToComments :: NameSpaces -> Element -> M.Map T.Text Element
elemToComments ns element
| isElem ns "w" "comments" element =
let pairs = mapMaybe
- (\e -> findAttrByName ns "w" "id" e >>=
+ (\e -> findAttrTextByName ns "w" "id" e >>=
(\a -> Just (a, e)))
(findChildrenByName ns "w" "comment" element)
in
@@ -632,7 +633,7 @@ testBitMask bitMaskS n =
pHeading :: ParagraphStyle -> Maybe (ParaStyleName, Int)
pHeading = getParStyleField headingLev . pStyle
-pNumInfo :: ParagraphStyle -> Maybe (String, String)
+pNumInfo :: ParagraphStyle -> Maybe (T.Text, T.Text)
pNumInfo = getParStyleField numInfo . pStyle
elemToBodyPart :: NameSpaces -> Element -> D BodyPart
@@ -640,7 +641,7 @@ elemToBodyPart ns element
| isElem ns "w" "p" element
, (c:_) <- findChildrenByName ns "m" "oMathPara" element =
do
- expsLst <- eitherToD $ readOMML $ showElement c
+ expsLst <- eitherToD $ readOMML $ T.pack $ showElement c
return $ OMathPara expsLst
elemToBodyPart ns element
| isElem ns "w" "p" element
@@ -664,7 +665,7 @@ elemToBodyPart ns element
| isElem ns "w" "tbl" element = do
let caption' = findChildByName ns "w" "tblPr" element
>>= findChildByName ns "w" "tblCaption"
- >>= findAttrByName ns "w" "val"
+ >>= findAttrTextByName ns "w" "val"
caption = fromMaybe "" caption'
grid' = case findChildByName ns "w" "tblGrid" element of
Just g -> elemToTblGrid ns g
@@ -687,10 +688,10 @@ lookupRelationship docLocation relid rels =
where
pairs = map (\(Relationship loc relid' target) -> ((loc, relid'), target)) rels
-expandDrawingId :: String -> D (FilePath, B.ByteString)
+expandDrawingId :: T.Text -> D (FilePath, B.ByteString)
expandDrawingId s = do
location <- asks envLocation
- target <- asks (lookupRelationship location s . envRelationships)
+ target <- asks (fmap T.unpack . lookupRelationship location s . envRelationships)
case target of
Just filepath -> do
bytes <- asks (lookup ("word/" ++ filepath) . envMedia)
@@ -699,12 +700,12 @@ expandDrawingId s = do
Nothing -> throwError DocxError
Nothing -> throwError DocxError
-getTitleAndAlt :: NameSpaces -> Element -> (String, String)
+getTitleAndAlt :: NameSpaces -> Element -> (T.Text, T.Text)
getTitleAndAlt ns element =
let mbDocPr = findChildByName ns "wp" "inline" element >>=
findChildByName ns "wp" "docPr"
- title = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "title")
- alt = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "descr")
+ title = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "title")
+ alt = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "descr")
in (title, alt)
elemToParPart :: NameSpaces -> Element -> D ParPart
@@ -716,7 +717,7 @@ elemToParPart ns element
= let (title, alt) = getTitleAndAlt ns drawingElem
a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
- >>= findAttrByName ns "r" "embed"
+ >>= findAttrTextByName ns "r" "embed"
in
case drawing of
Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem)
@@ -726,7 +727,7 @@ elemToParPart ns element
| isElem ns "w" "r" element
, Just _ <- findChildByName ns "w" "pict" element =
let drawing = findElement (elemName ns "v" "imagedata") element
- >>= findAttrByName ns "r" "id"
+ >>= findAttrTextByName ns "r" "id"
in
case drawing of
-- Todo: check out title and attr for deprecated format.
@@ -795,7 +796,7 @@ elemToParPart ns element
fldCharState <- gets stateFldCharState
case fldCharState of
FldCharOpen -> do
- info <- eitherToD $ parseFieldInfo $ strContent instrText
+ info <- eitherToD $ parseFieldInfo $ T.pack $ strContent instrText
modify $ \st -> st{stateFldCharState = FldCharFieldInfo info}
return NullParPart
_ -> return NullParPart
@@ -816,56 +817,56 @@ elemToParPart ns element
return $ ChangedRuns change runs
elemToParPart ns element
| isElem ns "w" "bookmarkStart" element
- , Just bmId <- findAttrByName ns "w" "id" element
- , Just bmName <- findAttrByName ns "w" "name" element =
+ , Just bmId <- findAttrTextByName ns "w" "id" element
+ , Just bmName <- findAttrTextByName ns "w" "name" element =
return $ BookMark bmId bmName
elemToParPart ns element
| isElem ns "w" "hyperlink" element
- , Just relId <- findAttrByName ns "r" "id" element = do
+ , Just relId <- findAttrTextByName ns "r" "id" element = do
location <- asks envLocation
runs <- mapD (elemToRun ns) (elChildren element)
rels <- asks envRelationships
case lookupRelationship location relId rels of
Just target ->
- case findAttrByName ns "w" "anchor" element of
- Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs
+ case findAttrTextByName ns "w" "anchor" element of
+ Just anchor -> return $ ExternalHyperLink (target <> "#" <> anchor) runs
Nothing -> return $ ExternalHyperLink target runs
Nothing -> return $ ExternalHyperLink "" runs
elemToParPart ns element
| isElem ns "w" "hyperlink" element
- , Just anchor <- findAttrByName ns "w" "anchor" element = do
+ , Just anchor <- findAttrTextByName ns "w" "anchor" element = do
runs <- mapD (elemToRun ns) (elChildren element)
return $ InternalHyperLink anchor runs
elemToParPart ns element
| isElem ns "w" "commentRangeStart" element
- , Just cmtId <- findAttrByName ns "w" "id" element = do
+ , Just cmtId <- findAttrTextByName ns "w" "id" element = do
(Comments _ commentMap) <- asks envComments
case M.lookup cmtId commentMap of
Just cmtElem -> elemToCommentStart ns cmtElem
Nothing -> throwError WrongElem
elemToParPart ns element
| isElem ns "w" "commentRangeEnd" element
- , Just cmtId <- findAttrByName ns "w" "id" element =
+ , Just cmtId <- findAttrTextByName ns "w" "id" element =
return $ CommentEnd cmtId
elemToParPart ns element
| isElem ns "m" "oMath" element =
- fmap PlainOMath (eitherToD $ readOMML $ showElement element)
+ fmap PlainOMath (eitherToD $ readOMML $ T.pack $ showElement element)
elemToParPart _ _ = throwError WrongElem
elemToCommentStart :: NameSpaces -> Element -> D ParPart
elemToCommentStart ns element
| isElem ns "w" "comment" element
- , Just cmtId <- findAttrByName ns "w" "id" element
- , Just cmtAuthor <- findAttrByName ns "w" "author" element
- , Just cmtDate <- findAttrByName ns "w" "date" element = do
+ , Just cmtId <- findAttrTextByName ns "w" "id" element
+ , Just cmtAuthor <- findAttrTextByName ns "w" "author" element
+ , Just cmtDate <- findAttrTextByName ns "w" "date" element = do
bps <- mapD (elemToBodyPart ns) (elChildren element)
return $ CommentStart cmtId cmtAuthor cmtDate bps
elemToCommentStart _ _ = throwError WrongElem
-lookupFootnote :: String -> Notes -> Maybe Element
+lookupFootnote :: T.Text -> Notes -> Maybe Element
lookupFootnote s (Notes _ fns _) = fns >>= M.lookup s
-lookupEndnote :: String -> Notes -> Maybe Element
+lookupEndnote :: T.Text -> Notes -> Maybe Element
lookupEndnote s (Notes _ _ ens) = ens >>= M.lookup s
elemToExtent :: Element -> Extent
@@ -876,7 +877,7 @@ elemToExtent drawingElem =
where
wp_ns = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem
- >>= findAttr (QName at Nothing Nothing) >>= safeRead
+ >>= findAttr (QName at Nothing Nothing) >>= safeRead . T.pack
childElemToRun :: NameSpaces -> Element -> D Run
@@ -887,7 +888,7 @@ childElemToRun ns element
= let (title, alt) = getTitleAndAlt ns element
a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
- >>= findAttr (QName "embed" (lookup "r" ns) (Just "r"))
+ >>= findAttrText (QName "embed" (lookup "r" ns) (Just "r"))
in
case drawing of
Just s -> expandDrawingId s >>=
@@ -900,7 +901,7 @@ childElemToRun ns element
= return InlineChart
childElemToRun ns element
| isElem ns "w" "footnoteReference" element
- , Just fnId <- findAttrByName ns "w" "id" element = do
+ , Just fnId <- findAttrTextByName ns "w" "id" element = do
notes <- asks envNotes
case lookupFootnote fnId notes of
Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e)
@@ -908,7 +909,7 @@ childElemToRun ns element
Nothing -> return $ Footnote []
childElemToRun ns element
| isElem ns "w" "endnoteReference" element
- , Just enId <- findAttrByName ns "w" "id" element = do
+ , Just enId <- findAttrTextByName ns "w" "id" element = do
notes <- asks envNotes
case lookupEndnote enId notes of
Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e)
@@ -961,15 +962,15 @@ getParStyleField _ _ = Nothing
getTrackedChange :: NameSpaces -> Element -> Maybe TrackedChange
getTrackedChange ns element
| isElem ns "w" "ins" element || isElem ns "w" "moveTo" element
- , Just cId <- findAttrByName ns "w" "id" element
- , Just cAuthor <- findAttrByName ns "w" "author" element
- , Just cDate <- findAttrByName ns "w" "date" element =
+ , Just cId <- findAttrTextByName ns "w" "id" element
+ , Just cAuthor <- findAttrTextByName ns "w" "author" element
+ , Just cDate <- findAttrTextByName ns "w" "date" element =
Just $ TrackedChange Insertion (ChangeInfo cId cAuthor cDate)
getTrackedChange ns element
| isElem ns "w" "del" element || isElem ns "w" "moveFrom" element
- , Just cId <- findAttrByName ns "w" "id" element
- , Just cAuthor <- findAttrByName ns "w" "author" element
- , Just cDate <- findAttrByName ns "w" "date" element =
+ , Just cId <- findAttrTextByName ns "w" "id" element
+ , Just cAuthor <- findAttrTextByName ns "w" "author" element
+ , Just cDate <- findAttrTextByName ns "w" "date" element =
Just $ TrackedChange Deletion (ChangeInfo cId cAuthor cDate)
getTrackedChange _ _ = Nothing
@@ -978,7 +979,7 @@ elemToParagraphStyle ns element sty
| Just pPr <- findChildByName ns "w" "pPr" element =
let style =
mapMaybe
- (fmap ParaStyleId . findAttrByName ns "w" "val")
+ (fmap ParaStyleId . findAttrTextByName ns "w" "val")
(findChildrenByName ns "w" "pStyle" pPr)
in ParagraphStyle
{pStyle = mapMaybe (`M.lookup` sty) style
@@ -1010,7 +1011,7 @@ elemToRunStyleD ns element
charStyles <- asks envCharStyles
let parentSty =
findChildByName ns "w" "rStyle" rPr >>=
- findAttrByName ns "w" "val" >>=
+ findAttrTextByName ns "w" "val" >>=
flip M.lookup charStyles . CharStyleId
return $ elemToRunStyle ns element parentSty
elemToRunStyleD _ _ = return defaultRunStyle
@@ -1020,12 +1021,12 @@ elemToRunElem ns element
| isElem ns "w" "t" element
|| isElem ns "w" "delText" element
|| isElem ns "m" "t" element = do
- let str = strContent element
+ let str = T.pack $ strContent element
font <- asks envFont
case font of
Nothing -> return $ TextRun str
Just f -> return . TextRun $
- map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str
+ T.map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str
| isElem ns "w" "br" element = return LnBrk
| isElem ns "w" "tab" element = return Tab
| isElem ns "w" "softHyphen" element = return SoftHyphen
@@ -1043,11 +1044,11 @@ getSymChar ns element
| Just s <- lowerFromPrivate <$> getCodepoint
, Just font <- getFont =
case readLitChar ("\\x" ++ s) of
- [(char, _)] -> TextRun . maybe "" (:[]) $ getUnicode font char
+ [(char, _)] -> TextRun . maybe "" T.singleton $ getUnicode font char
_ -> TextRun ""
where
getCodepoint = findAttrByName ns "w" "char" element
- getFont = stringToFont =<< findAttrByName ns "w" "font" element
+ getFont = textToFont . T.pack =<< findAttrByName ns "w" "font" element
lowerFromPrivate ('F':xs) = '0':xs
lowerFromPrivate xs = xs
getSymChar _ _ = TextRun ""
@@ -1059,7 +1060,7 @@ elemToRunElems ns element
let qualName = elemName ns "w"
let font = do
fontElem <- findElement (qualName "rFonts") element
- stringToFont =<<
+ textToFont . T.pack =<<
foldr ((<|>) . (flip findAttr fontElem . qualName)) Nothing ["ascii", "hAnsi"]
local (setFont font) (mapD (elemToRunElem ns) (elChildren element))
elemToRunElems _ _ = throwError WrongElem
diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
index ac2d6fa07..f81707e92 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
@@ -3,6 +3,7 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Docx.Parse.Styles
Copyright : Copyright (C) 2014-2019 Jesse Rosenthal
@@ -46,20 +47,19 @@ import Prelude
import Codec.Archive.Zip
import Control.Applicative ((<|>))
import Control.Monad.Except
-import Data.Char (toLower)
-import Data.List
import Data.Function (on)
import Data.String (IsString(..))
import qualified Data.Map as M
+import qualified Data.Text as T
import Data.Maybe
import Data.Coerce
import Text.Pandoc.Readers.Docx.Util
import qualified Text.Pandoc.UTF8 as UTF8
import Text.XML.Light
-newtype CharStyleId = CharStyleId String
+newtype CharStyleId = CharStyleId T.Text
deriving (Show, Eq, Ord, IsString, FromStyleId)
-newtype ParaStyleId = ParaStyleId String
+newtype ParaStyleId = ParaStyleId T.Text
deriving (Show, Eq, Ord, IsString, FromStyleId)
newtype CharStyleName = CharStyleName CIString
@@ -68,25 +68,31 @@ newtype ParaStyleName = ParaStyleName CIString
deriving (Show, Eq, Ord, IsString, FromStyleName)
-- Case-insensitive comparisons
-newtype CIString = CIString String deriving (Show, IsString, FromStyleName)
+newtype CIString = CIString T.Text deriving (Show, IsString, FromStyleName)
class FromStyleName a where
- fromStyleName :: a -> String
+ fromStyleName :: a -> T.Text
instance FromStyleName String where
+ fromStyleName = T.pack
+
+instance FromStyleName T.Text where
fromStyleName = id
class FromStyleId a where
- fromStyleId :: a -> String
+ fromStyleId :: a -> T.Text
instance FromStyleId String where
+ fromStyleId = T.pack
+
+instance FromStyleId T.Text where
fromStyleId = id
instance Eq CIString where
- (==) = (==) `on` map toLower . coerce
+ (==) = (==) `on` T.toCaseFold . coerce
instance Ord CIString where
- compare = compare `on` map toLower . coerce
+ compare = compare `on` T.toCaseFold . coerce
data VertAlign = BaseLn | SupScrpt | SubScrpt
deriving Show
@@ -108,7 +114,7 @@ data RunStyle = RunStyle { isBold :: Maybe Bool
deriving Show
data ParStyle = ParStyle { headingLev :: Maybe (ParaStyleName, Int)
- , numInfo :: Maybe (String, String)
+ , numInfo :: Maybe (T.Text, T.Text)
, psParentStyle :: Maybe ParStyle
, pStyleName :: ParaStyleName
, pStyleId :: ParaStyleId
@@ -146,7 +152,7 @@ isBasedOnStyle ns element parentStyle
, Just styleType <- findAttrByName ns "w" "type" element
, styleType == cStyleType parentStyle
, Just basedOnVal <- findChildByName ns "w" "basedOn" element >>=
- findAttrByName ns "w" "val"
+ findAttrTextByName ns "w" "val"
, Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps)
| isElem ns "w" "style" element
, Just styleType <- findAttrByName ns "w" "type" element
@@ -234,7 +240,7 @@ checkOnOff _ _ _ = Nothing
elemToCharStyle :: NameSpaces
-> Element -> Maybe CharStyle -> Maybe CharStyle
elemToCharStyle ns element parentStyle
- = CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element)
+ = CharStyle <$> (CharStyleId <$> findAttrTextByName ns "w" "styleId" element)
<*> getElementStyleName ns element
<*> (Just $ elemToRunStyle ns element parentStyle)
@@ -267,32 +273,32 @@ elemToRunStyle _ _ _ = defaultRunStyle
getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int)
getHeaderLevel ns element
| Just styleName <- getElementStyleName ns element
- , Just n <- stringToInteger =<<
- (stripPrefix "heading " . map toLower $
+ , Just n <- stringToInteger . T.unpack =<<
+ (T.stripPrefix "heading " . T.toLower $
fromStyleName styleName)
, n > 0 = Just (styleName, fromInteger n)
getHeaderLevel _ _ = Nothing
-getElementStyleName :: Coercible String a => NameSpaces -> Element -> Maybe a
+getElementStyleName :: Coercible T.Text a => NameSpaces -> Element -> Maybe a
getElementStyleName ns el = coerce <$>
- ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val")
- <|> findAttrByName ns "w" "styleId" el)
+ ((findChildByName ns "w" "name" el >>= findAttrTextByName ns "w" "val")
+ <|> findAttrTextByName ns "w" "styleId" el)
-getNumInfo :: NameSpaces -> Element -> Maybe (String, String)
+getNumInfo :: NameSpaces -> Element -> Maybe (T.Text, T.Text)
getNumInfo ns element = do
let numPr = findChildByName ns "w" "pPr" element >>=
findChildByName ns "w" "numPr"
lvl = fromMaybe "0" (numPr >>=
findChildByName ns "w" "ilvl" >>=
- findAttrByName ns "w" "val")
+ findAttrTextByName ns "w" "val")
numId <- numPr >>=
findChildByName ns "w" "numId" >>=
- findAttrByName ns "w" "val"
+ findAttrTextByName ns "w" "val"
return (numId, lvl)
elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle
elemToParStyleData ns element parentStyle
- | Just styleId <- findAttrByName ns "w" "styleId" element
+ | Just styleId <- findAttrTextByName ns "w" "styleId" element
, Just styleName <- getElementStyleName ns element
= Just $ ParStyle
{
diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs
index f4855efd2..0de1114bd 100644
--- a/src/Text/Pandoc/Readers/Docx/Util.hs
+++ b/src/Text/Pandoc/Readers/Docx/Util.hs
@@ -19,11 +19,14 @@ module Text.Pandoc.Readers.Docx.Util (
, elemToNameSpaces
, findChildByName
, findChildrenByName
+ , findAttrText
, findAttrByName
+ , findAttrTextByName
) where
import Prelude
import Data.Maybe (mapMaybe)
+import qualified Data.Text as T
import Text.XML.Light
type NameSpaces = [(String, String)]
@@ -55,7 +58,13 @@ findChildrenByName ns pref name el =
let ns' = ns ++ elemToNameSpaces el
in findChildren (elemName ns' pref name) el
+findAttrText :: QName -> Element -> Maybe T.Text
+findAttrText x = fmap T.pack . findAttr x
+
findAttrByName :: NameSpaces -> String -> String -> Element -> Maybe String
findAttrByName ns pref name el =
let ns' = ns ++ elemToNameSpaces el
in findAttr (elemName ns' pref name) el
+
+findAttrTextByName :: NameSpaces -> String -> String -> Element -> Maybe T.Text
+findAttrTextByName a b c = fmap T.pack . findAttrByName a b c