aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Docx
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/Docx')
-rw-r--r--src/Text/Pandoc/Readers/Docx/Combine.hs3
-rw-r--r--src/Text/Pandoc/Readers/Docx/Fields.hs33
-rw-r--r--src/Text/Pandoc/Readers/Docx/Lists.hs11
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs171
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse/Styles.hs48
-rw-r--r--src/Text/Pandoc/Readers/Docx/Util.hs9
6 files changed, 148 insertions, 127 deletions
diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs
index da40a80ea..82791d669 100644
--- a/src/Text/Pandoc/Readers/Docx/Combine.hs
+++ b/src/Text/Pandoc/Readers/Docx/Combine.hs
@@ -1,7 +1,8 @@
-{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE TypeSynonymInstances #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Docx.Combine
Copyright : © 2014-2019 Jesse Rosenthal <jrosenthal@jhu.edu>,
diff --git a/src/Text/Pandoc/Readers/Docx/Fields.hs b/src/Text/Pandoc/Readers/Docx/Fields.hs
index e7a916f1c..05d9dd697 100644
--- a/src/Text/Pandoc/Readers/Docx/Fields.hs
+++ b/src/Text/Pandoc/Readers/Docx/Fields.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Docx.Fields
Copyright : Copyright (C) 2014-2019 Jesse Rosenthal
@@ -16,16 +17,18 @@ module Text.Pandoc.Readers.Docx.Fields ( FieldInfo(..)
) where
import Prelude
+import Data.Functor (($>))
+import qualified Data.Text as T
import Text.Parsec
-import Text.Parsec.String (Parser)
+import Text.Parsec.Text (Parser)
-type URL = String
+type URL = T.Text
data FieldInfo = HyperlinkField URL
| UnknownField
deriving (Show)
-parseFieldInfo :: String -> Either ParseError FieldInfo
+parseFieldInfo :: T.Text -> Either ParseError FieldInfo
parseFieldInfo = parse fieldInfo ""
fieldInfo :: Parser FieldInfo
@@ -34,31 +37,31 @@ fieldInfo =
<|>
return UnknownField
-escapedQuote :: Parser String
-escapedQuote = string "\\\""
+escapedQuote :: Parser T.Text
+escapedQuote = string "\\\"" $> "\\\""
-inQuotes :: Parser String
+inQuotes :: Parser T.Text
inQuotes =
- (try escapedQuote) <|> (anyChar >>= (\c -> return [c]))
+ (try escapedQuote) <|> (anyChar >>= (\c -> return $ T.singleton c))
-quotedString :: Parser String
+quotedString :: Parser T.Text
quotedString = do
char '"'
- concat <$> manyTill inQuotes (try (char '"'))
+ T.concat <$> manyTill inQuotes (try (char '"'))
-unquotedString :: Parser String
-unquotedString = manyTill anyChar (try $ lookAhead space *> return () <|> eof)
+unquotedString :: Parser T.Text
+unquotedString = T.pack <$> manyTill anyChar (try $ lookAhead space *> return () <|> eof)
-fieldArgument :: Parser String
+fieldArgument :: Parser T.Text
fieldArgument = quotedString <|> unquotedString
-- there are other switches, but this is the only one I've seen in the wild so far, so it's the first one I'll implement. See §17.16.5.25
-hyperlinkSwitch :: Parser (String, String)
+hyperlinkSwitch :: Parser (T.Text, T.Text)
hyperlinkSwitch = do
sw <- string "\\l"
spaces
farg <- fieldArgument
- return (sw, farg)
+ return (T.pack sw, farg)
hyperlink :: Parser URL
hyperlink = do
@@ -68,6 +71,6 @@ hyperlink = do
farg <- fieldArgument
switches <- spaces *> many hyperlinkSwitch
let url = case switches of
- ("\\l", s) : _ -> farg ++ ('#': s)
+ ("\\l", s) : _ -> farg <> "#" <> s
_ -> farg
return url
diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs
index eb24640c5..b7b7a3835 100644
--- a/src/Text/Pandoc/Readers/Docx/Lists.hs
+++ b/src/Text/Pandoc/Readers/Docx/Lists.hs
@@ -22,6 +22,7 @@ import Prelude
import Data.List
import Data.Maybe
import Data.String (fromString)
+import qualified Data.Text as T
import Text.Pandoc.Generic (bottomUp)
import Text.Pandoc.JSON
import Text.Pandoc.Readers.Docx.Parse (ParaStyleName)
@@ -45,20 +46,20 @@ getNumId _ = Nothing
getNumIdN :: Block -> Integer
getNumIdN b = fromMaybe (-1) (getNumId b)
-getText :: Block -> Maybe String
+getText :: Block -> Maybe T.Text
getText (Div (_, _, kvs) _) = lookup "text" kvs
getText _ = Nothing
data ListType = Itemized | Enumerated ListAttributes
-listStyleMap :: [(String, ListNumberStyle)]
+listStyleMap :: [(T.Text, ListNumberStyle)]
listStyleMap = [("upperLetter", UpperAlpha),
("lowerLetter", LowerAlpha),
("upperRoman", UpperRoman),
("lowerRoman", LowerRoman),
("decimal", Decimal)]
-listDelimMap :: [(String, ListNumberDelim)]
+listDelimMap :: [(T.Text, ListNumberDelim)]
listDelimMap = [("%1)", OneParen),
("(%1)", TwoParens),
("%1.", Period)]
@@ -82,11 +83,11 @@ getListType b@(Div (_, _, kvs) _) | isListItem b =
_ -> Nothing
getListType _ = Nothing
-listParagraphDivs :: [String]
+listParagraphDivs :: [T.Text]
listParagraphDivs = ["list-paragraph"]
listParagraphStyles :: [ParaStyleName]
-listParagraphStyles = map fromString listParagraphDivs
+listParagraphStyles = map (fromString . T.unpack) listParagraphDivs
-- This is a first stab at going through and attaching meaning to list
-- paragraphs, without an item marker, following a list item. We
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 889bd80fc..8598ada6f 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -1,5 +1,6 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Docx.Parse
Copyright : Copyright (C) 2014-2019 Jesse Rosenthal
@@ -63,6 +64,7 @@ import qualified Data.ByteString.Lazy as B
import Data.Char (chr, ord, readLitChar)
import Data.List
import qualified Data.Map as M
+import qualified Data.Text as T
import Data.Maybe
import System.FilePath
import Text.Pandoc.Readers.Docx.Util
@@ -71,7 +73,7 @@ import Text.Pandoc.Shared (filteredFilesFromArchive, safeRead)
import qualified Text.Pandoc.UTF8 as UTF8
import Text.TeXMath (Exp)
import Text.TeXMath.Readers.OMML (readOMML)
-import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, stringToFont)
+import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, textToFont)
import Text.XML.Light
import qualified Text.XML.Light.Cursor as XMLC
@@ -88,7 +90,7 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes
}
deriving Show
-data ReaderState = ReaderState { stateWarnings :: [String]
+data ReaderState = ReaderState { stateWarnings :: [T.Text]
, stateFldCharState :: FldCharState
}
deriving Show
@@ -119,7 +121,6 @@ eitherToD (Left _) = throwError DocxError
concatMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b]
concatMapM f xs = liftM concat (mapM f xs)
-
-- This is similar to `mapMaybe`: it maps a function returning the D
-- monad over a list, and only keeps the non-erroring return values.
mapD :: (a -> D b) -> [a] -> D [b]
@@ -178,18 +179,18 @@ type ParStyleMap = M.Map ParaStyleId ParStyle
data Numbering = Numbering NameSpaces [Numb] [AbstractNumb]
deriving Show
-data Numb = Numb String String [LevelOverride]
+data Numb = Numb T.Text T.Text [LevelOverride]
deriving Show
-- ilvl startOverride lvl
-data LevelOverride = LevelOverride String (Maybe Integer) (Maybe Level)
+data LevelOverride = LevelOverride T.Text (Maybe Integer) (Maybe Level)
deriving Show
-data AbstractNumb = AbstractNumb String [Level]
+data AbstractNumb = AbstractNumb T.Text [Level]
deriving Show
-- ilvl format string start
-data Level = Level String String String (Maybe Integer)
+data Level = Level T.Text T.Text T.Text (Maybe Integer)
deriving Show
data DocumentLocation = InDocument | InFootnote | InEndnote
@@ -199,11 +200,11 @@ data Relationship = Relationship DocumentLocation RelId Target
deriving Show
data Notes = Notes NameSpaces
- (Maybe (M.Map String Element))
- (Maybe (M.Map String Element))
+ (Maybe (M.Map T.Text Element))
+ (Maybe (M.Map T.Text Element))
deriving Show
-data Comments = Comments NameSpaces (M.Map String Element)
+data Comments = Comments NameSpaces (M.Map T.Text Element)
deriving Show
data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer
@@ -238,8 +239,8 @@ defaultParagraphStyle = ParagraphStyle { pStyle = []
data BodyPart = Paragraph ParagraphStyle [ParPart]
- | ListItem ParagraphStyle String String (Maybe Level) [ParPart]
- | Tbl String TblGrid TblLook [Row]
+ | ListItem ParagraphStyle T.Text T.Text (Maybe Level) [ParPart]
+ | Tbl T.Text TblGrid TblLook [Row]
| OMathPara [Exp]
deriving Show
@@ -279,7 +280,7 @@ data ParPart = PlainRun Run
| BookMark BookMarkId Anchor
| InternalHyperLink Anchor [Run]
| ExternalHyperLink URL [Run]
- | Drawing FilePath String String B.ByteString Extent -- title, alt
+ | Drawing FilePath T.Text T.Text B.ByteString Extent -- title, alt
| Chart -- placeholder for now
| PlainOMath [Exp]
| Field FieldInfo [Run]
@@ -290,28 +291,28 @@ data ParPart = PlainRun Run
data Run = Run RunStyle [RunElem]
| Footnote [BodyPart]
| Endnote [BodyPart]
- | InlineDrawing FilePath String String B.ByteString Extent -- title, alt
+ | InlineDrawing FilePath T.Text T.Text B.ByteString Extent -- title, alt
| InlineChart -- placeholder
deriving Show
-data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen
+data RunElem = TextRun T.Text | LnBrk | Tab | SoftHyphen | NoBreakHyphen
deriving Show
-type Target = String
-type Anchor = String
-type URL = String
-type BookMarkId = String
-type RelId = String
-type ChangeId = String
-type CommentId = String
-type Author = String
-type ChangeDate = String
-type CommentDate = String
+type Target = T.Text
+type Anchor = T.Text
+type URL = T.Text
+type BookMarkId = T.Text
+type RelId = T.Text
+type ChangeId = T.Text
+type CommentId = T.Text
+type Author = T.Text
+type ChangeDate = T.Text
+type CommentDate = T.Text
archiveToDocx :: Archive -> Either DocxError Docx
archiveToDocx archive = fst <$> archiveToDocxWithWarnings archive
-archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [String])
+archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [T.Text])
archiveToDocxWithWarnings archive = do
docXmlPath <- case getDocumentXmlPath archive of
Just fp -> Right fp
@@ -341,7 +342,7 @@ archiveToDocxWithWarnings archive = do
Right doc -> Right (Docx doc, stateWarnings st)
Left e -> Left e
-getDocumentXmlPath :: Archive -> Maybe String
+getDocumentXmlPath :: Archive -> Maybe FilePath
getDocumentXmlPath zf = do
entry <- findEntryByPath "_rels/.rels" zf
relsElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
@@ -394,7 +395,7 @@ constructBogusParStyleData stName = ParStyle
, numInfo = Nothing
, psParentStyle = Nothing
, pStyleName = stName
- , pStyleId = ParaStyleId . filter (/=' ') . fromStyleName $ stName
+ , pStyleId = ParaStyleId . T.filter (/=' ') . fromStyleName $ stName
}
archiveToNotes :: Archive -> Notes
@@ -441,8 +442,8 @@ filePathToRelType path docXmlPath =
relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship
relElemToRelationship relType element | qName (elName element) == "Relationship" =
do
- relId <- findAttr (QName "Id" Nothing Nothing) element
- target <- findAttr (QName "Target" Nothing Nothing) element
+ relId <- findAttrText (QName "Id" Nothing Nothing) element
+ target <- findAttrText (QName "Target" Nothing Nothing) element
return $ Relationship relType relId target
relElemToRelationship _ _ = Nothing
@@ -464,7 +465,7 @@ filePathIsMedia fp =
in
(dir == "word/media/")
-lookupLevel :: String -> String -> Numbering -> Maybe Level
+lookupLevel :: T.Text -> T.Text -> Numbering -> Maybe Level
lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
(absNumId, ovrrides) <- lookup numId $
map (\(Numb nid absnumid ovrRides) -> (nid, (absnumid, ovrRides))) numbs
@@ -483,7 +484,7 @@ lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
loElemToLevelOverride :: NameSpaces -> Element -> Maybe LevelOverride
loElemToLevelOverride ns element
| isElem ns "w" "lvlOverride" element = do
- ilvl <- findAttrByName ns "w" "ilvl" element
+ ilvl <- findAttrTextByName ns "w" "ilvl" element
let startOverride = findChildByName ns "w" "startOverride" element
>>= findAttrByName ns "w" "val"
>>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
@@ -495,9 +496,9 @@ loElemToLevelOverride _ _ = Nothing
numElemToNum :: NameSpaces -> Element -> Maybe Numb
numElemToNum ns element
| isElem ns "w" "num" element = do
- numId <- findAttrByName ns "w" "numId" element
+ numId <- findAttrTextByName ns "w" "numId" element
absNumId <- findChildByName ns "w" "abstractNumId" element
- >>= findAttrByName ns "w" "val"
+ >>= findAttrTextByName ns "w" "val"
let lvlOverrides = mapMaybe
(loElemToLevelOverride ns)
(findChildrenByName ns "w" "lvlOverride" element)
@@ -507,7 +508,7 @@ numElemToNum _ _ = Nothing
absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb
absNumElemToAbsNum ns element
| isElem ns "w" "abstractNum" element = do
- absNumId <- findAttrByName ns "w" "abstractNumId" element
+ absNumId <- findAttrTextByName ns "w" "abstractNumId" element
let levelElems = findChildrenByName ns "w" "lvl" element
levels = mapMaybe (levelElemToLevel ns) levelElems
return $ AbstractNumb absNumId levels
@@ -516,11 +517,11 @@ absNumElemToAbsNum _ _ = Nothing
levelElemToLevel :: NameSpaces -> Element -> Maybe Level
levelElemToLevel ns element
| isElem ns "w" "lvl" element = do
- ilvl <- findAttrByName ns "w" "ilvl" element
+ ilvl <- findAttrTextByName ns "w" "ilvl" element
fmt <- findChildByName ns "w" "numFmt" element
- >>= findAttrByName ns "w" "val"
+ >>= findAttrTextByName ns "w" "val"
txt <- findChildByName ns "w" "lvlText" element
- >>= findAttrByName ns "w" "val"
+ >>= findAttrTextByName ns "w" "val"
let start = findChildByName ns "w" "start" element
>>= findAttrByName ns "w" "val"
>>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
@@ -544,11 +545,11 @@ archiveToNumbering :: Archive -> Numbering
archiveToNumbering archive =
fromMaybe (Numbering [] [] []) (archiveToNumbering' archive)
-elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map String Element)
+elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map T.Text Element)
elemToNotes ns notetype element
- | isElem ns "w" (notetype ++ "s") element =
+ | isElem ns "w" (notetype <> "s") element =
let pairs = mapMaybe
- (\e -> findAttrByName ns "w" "id" e >>=
+ (\e -> findAttrTextByName ns "w" "id" e >>=
(\a -> Just (a, e)))
(findChildrenByName ns "w" notetype element)
in
@@ -556,11 +557,11 @@ elemToNotes ns notetype element
M.fromList pairs
elemToNotes _ _ _ = Nothing
-elemToComments :: NameSpaces -> Element -> M.Map String Element
+elemToComments :: NameSpaces -> Element -> M.Map T.Text Element
elemToComments ns element
| isElem ns "w" "comments" element =
let pairs = mapMaybe
- (\e -> findAttrByName ns "w" "id" e >>=
+ (\e -> findAttrTextByName ns "w" "id" e >>=
(\a -> Just (a, e)))
(findChildrenByName ns "w" "comment" element)
in
@@ -632,7 +633,7 @@ testBitMask bitMaskS n =
pHeading :: ParagraphStyle -> Maybe (ParaStyleName, Int)
pHeading = getParStyleField headingLev . pStyle
-pNumInfo :: ParagraphStyle -> Maybe (String, String)
+pNumInfo :: ParagraphStyle -> Maybe (T.Text, T.Text)
pNumInfo = getParStyleField numInfo . pStyle
elemToBodyPart :: NameSpaces -> Element -> D BodyPart
@@ -640,7 +641,7 @@ elemToBodyPart ns element
| isElem ns "w" "p" element
, (c:_) <- findChildrenByName ns "m" "oMathPara" element =
do
- expsLst <- eitherToD $ readOMML $ showElement c
+ expsLst <- eitherToD $ readOMML $ T.pack $ showElement c
return $ OMathPara expsLst
elemToBodyPart ns element
| isElem ns "w" "p" element
@@ -664,7 +665,7 @@ elemToBodyPart ns element
| isElem ns "w" "tbl" element = do
let caption' = findChildByName ns "w" "tblPr" element
>>= findChildByName ns "w" "tblCaption"
- >>= findAttrByName ns "w" "val"
+ >>= findAttrTextByName ns "w" "val"
caption = fromMaybe "" caption'
grid' = case findChildByName ns "w" "tblGrid" element of
Just g -> elemToTblGrid ns g
@@ -687,10 +688,10 @@ lookupRelationship docLocation relid rels =
where
pairs = map (\(Relationship loc relid' target) -> ((loc, relid'), target)) rels
-expandDrawingId :: String -> D (FilePath, B.ByteString)
+expandDrawingId :: T.Text -> D (FilePath, B.ByteString)
expandDrawingId s = do
location <- asks envLocation
- target <- asks (lookupRelationship location s . envRelationships)
+ target <- asks (fmap T.unpack . lookupRelationship location s . envRelationships)
case target of
Just filepath -> do
bytes <- asks (lookup ("word/" ++ filepath) . envMedia)
@@ -699,12 +700,12 @@ expandDrawingId s = do
Nothing -> throwError DocxError
Nothing -> throwError DocxError
-getTitleAndAlt :: NameSpaces -> Element -> (String, String)
+getTitleAndAlt :: NameSpaces -> Element -> (T.Text, T.Text)
getTitleAndAlt ns element =
let mbDocPr = findChildByName ns "wp" "inline" element >>=
findChildByName ns "wp" "docPr"
- title = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "title")
- alt = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "descr")
+ title = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "title")
+ alt = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "descr")
in (title, alt)
elemToParPart :: NameSpaces -> Element -> D ParPart
@@ -716,7 +717,7 @@ elemToParPart ns element
= let (title, alt) = getTitleAndAlt ns drawingElem
a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
- >>= findAttrByName ns "r" "embed"
+ >>= findAttrTextByName ns "r" "embed"
in
case drawing of
Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem)
@@ -726,7 +727,7 @@ elemToParPart ns element
| isElem ns "w" "r" element
, Just _ <- findChildByName ns "w" "pict" element =
let drawing = findElement (elemName ns "v" "imagedata") element
- >>= findAttrByName ns "r" "id"
+ >>= findAttrTextByName ns "r" "id"
in
case drawing of
-- Todo: check out title and attr for deprecated format.
@@ -795,7 +796,7 @@ elemToParPart ns element
fldCharState <- gets stateFldCharState
case fldCharState of
FldCharOpen -> do
- info <- eitherToD $ parseFieldInfo $ strContent instrText
+ info <- eitherToD $ parseFieldInfo $ T.pack $ strContent instrText
modify $ \st -> st{stateFldCharState = FldCharFieldInfo info}
return NullParPart
_ -> return NullParPart
@@ -816,56 +817,56 @@ elemToParPart ns element
return $ ChangedRuns change runs
elemToParPart ns element
| isElem ns "w" "bookmarkStart" element
- , Just bmId <- findAttrByName ns "w" "id" element
- , Just bmName <- findAttrByName ns "w" "name" element =
+ , Just bmId <- findAttrTextByName ns "w" "id" element
+ , Just bmName <- findAttrTextByName ns "w" "name" element =
return $ BookMark bmId bmName
elemToParPart ns element
| isElem ns "w" "hyperlink" element
- , Just relId <- findAttrByName ns "r" "id" element = do
+ , Just relId <- findAttrTextByName ns "r" "id" element = do
location <- asks envLocation
runs <- mapD (elemToRun ns) (elChildren element)
rels <- asks envRelationships
case lookupRelationship location relId rels of
Just target ->
- case findAttrByName ns "w" "anchor" element of
- Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs
+ case findAttrTextByName ns "w" "anchor" element of
+ Just anchor -> return $ ExternalHyperLink (target <> "#" <> anchor) runs
Nothing -> return $ ExternalHyperLink target runs
Nothing -> return $ ExternalHyperLink "" runs
elemToParPart ns element
| isElem ns "w" "hyperlink" element
- , Just anchor <- findAttrByName ns "w" "anchor" element = do
+ , Just anchor <- findAttrTextByName ns "w" "anchor" element = do
runs <- mapD (elemToRun ns) (elChildren element)
return $ InternalHyperLink anchor runs
elemToParPart ns element
| isElem ns "w" "commentRangeStart" element
- , Just cmtId <- findAttrByName ns "w" "id" element = do
+ , Just cmtId <- findAttrTextByName ns "w" "id" element = do
(Comments _ commentMap) <- asks envComments
case M.lookup cmtId commentMap of
Just cmtElem -> elemToCommentStart ns cmtElem
Nothing -> throwError WrongElem
elemToParPart ns element
| isElem ns "w" "commentRangeEnd" element
- , Just cmtId <- findAttrByName ns "w" "id" element =
+ , Just cmtId <- findAttrTextByName ns "w" "id" element =
return $ CommentEnd cmtId
elemToParPart ns element
| isElem ns "m" "oMath" element =
- fmap PlainOMath (eitherToD $ readOMML $ showElement element)
+ fmap PlainOMath (eitherToD $ readOMML $ T.pack $ showElement element)
elemToParPart _ _ = throwError WrongElem
elemToCommentStart :: NameSpaces -> Element -> D ParPart
elemToCommentStart ns element
| isElem ns "w" "comment" element
- , Just cmtId <- findAttrByName ns "w" "id" element
- , Just cmtAuthor <- findAttrByName ns "w" "author" element
- , Just cmtDate <- findAttrByName ns "w" "date" element = do
+ , Just cmtId <- findAttrTextByName ns "w" "id" element
+ , Just cmtAuthor <- findAttrTextByName ns "w" "author" element
+ , Just cmtDate <- findAttrTextByName ns "w" "date" element = do
bps <- mapD (elemToBodyPart ns) (elChildren element)
return $ CommentStart cmtId cmtAuthor cmtDate bps
elemToCommentStart _ _ = throwError WrongElem
-lookupFootnote :: String -> Notes -> Maybe Element
+lookupFootnote :: T.Text -> Notes -> Maybe Element
lookupFootnote s (Notes _ fns _) = fns >>= M.lookup s
-lookupEndnote :: String -> Notes -> Maybe Element
+lookupEndnote :: T.Text -> Notes -> Maybe Element
lookupEndnote s (Notes _ _ ens) = ens >>= M.lookup s
elemToExtent :: Element -> Extent
@@ -876,7 +877,7 @@ elemToExtent drawingElem =
where
wp_ns = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem
- >>= findAttr (QName at Nothing Nothing) >>= safeRead
+ >>= findAttr (QName at Nothing Nothing) >>= safeRead . T.pack
childElemToRun :: NameSpaces -> Element -> D Run
@@ -887,7 +888,7 @@ childElemToRun ns element
= let (title, alt) = getTitleAndAlt ns element
a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
- >>= findAttr (QName "embed" (lookup "r" ns) (Just "r"))
+ >>= findAttrText (QName "embed" (lookup "r" ns) (Just "r"))
in
case drawing of
Just s -> expandDrawingId s >>=
@@ -900,7 +901,7 @@ childElemToRun ns element
= return InlineChart
childElemToRun ns element
| isElem ns "w" "footnoteReference" element
- , Just fnId <- findAttrByName ns "w" "id" element = do
+ , Just fnId <- findAttrTextByName ns "w" "id" element = do
notes <- asks envNotes
case lookupFootnote fnId notes of
Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e)
@@ -908,7 +909,7 @@ childElemToRun ns element
Nothing -> return $ Footnote []
childElemToRun ns element
| isElem ns "w" "endnoteReference" element
- , Just enId <- findAttrByName ns "w" "id" element = do
+ , Just enId <- findAttrTextByName ns "w" "id" element = do
notes <- asks envNotes
case lookupEndnote enId notes of
Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e)
@@ -961,15 +962,15 @@ getParStyleField _ _ = Nothing
getTrackedChange :: NameSpaces -> Element -> Maybe TrackedChange
getTrackedChange ns element
| isElem ns "w" "ins" element || isElem ns "w" "moveTo" element
- , Just cId <- findAttrByName ns "w" "id" element
- , Just cAuthor <- findAttrByName ns "w" "author" element
- , Just cDate <- findAttrByName ns "w" "date" element =
+ , Just cId <- findAttrTextByName ns "w" "id" element
+ , Just cAuthor <- findAttrTextByName ns "w" "author" element
+ , Just cDate <- findAttrTextByName ns "w" "date" element =
Just $ TrackedChange Insertion (ChangeInfo cId cAuthor cDate)
getTrackedChange ns element
| isElem ns "w" "del" element || isElem ns "w" "moveFrom" element
- , Just cId <- findAttrByName ns "w" "id" element
- , Just cAuthor <- findAttrByName ns "w" "author" element
- , Just cDate <- findAttrByName ns "w" "date" element =
+ , Just cId <- findAttrTextByName ns "w" "id" element
+ , Just cAuthor <- findAttrTextByName ns "w" "author" element
+ , Just cDate <- findAttrTextByName ns "w" "date" element =
Just $ TrackedChange Deletion (ChangeInfo cId cAuthor cDate)
getTrackedChange _ _ = Nothing
@@ -978,7 +979,7 @@ elemToParagraphStyle ns element sty
| Just pPr <- findChildByName ns "w" "pPr" element =
let style =
mapMaybe
- (fmap ParaStyleId . findAttrByName ns "w" "val")
+ (fmap ParaStyleId . findAttrTextByName ns "w" "val")
(findChildrenByName ns "w" "pStyle" pPr)
in ParagraphStyle
{pStyle = mapMaybe (`M.lookup` sty) style
@@ -1010,7 +1011,7 @@ elemToRunStyleD ns element
charStyles <- asks envCharStyles
let parentSty =
findChildByName ns "w" "rStyle" rPr >>=
- findAttrByName ns "w" "val" >>=
+ findAttrTextByName ns "w" "val" >>=
flip M.lookup charStyles . CharStyleId
return $ elemToRunStyle ns element parentSty
elemToRunStyleD _ _ = return defaultRunStyle
@@ -1020,12 +1021,12 @@ elemToRunElem ns element
| isElem ns "w" "t" element
|| isElem ns "w" "delText" element
|| isElem ns "m" "t" element = do
- let str = strContent element
+ let str = T.pack $ strContent element
font <- asks envFont
case font of
Nothing -> return $ TextRun str
Just f -> return . TextRun $
- map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str
+ T.map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str
| isElem ns "w" "br" element = return LnBrk
| isElem ns "w" "tab" element = return Tab
| isElem ns "w" "softHyphen" element = return SoftHyphen
@@ -1043,11 +1044,11 @@ getSymChar ns element
| Just s <- lowerFromPrivate <$> getCodepoint
, Just font <- getFont =
case readLitChar ("\\x" ++ s) of
- [(char, _)] -> TextRun . maybe "" (:[]) $ getUnicode font char
+ [(char, _)] -> TextRun . maybe "" T.singleton $ getUnicode font char
_ -> TextRun ""
where
getCodepoint = findAttrByName ns "w" "char" element
- getFont = stringToFont =<< findAttrByName ns "w" "font" element
+ getFont = textToFont . T.pack =<< findAttrByName ns "w" "font" element
lowerFromPrivate ('F':xs) = '0':xs
lowerFromPrivate xs = xs
getSymChar _ _ = TextRun ""
@@ -1059,7 +1060,7 @@ elemToRunElems ns element
let qualName = elemName ns "w"
let font = do
fontElem <- findElement (qualName "rFonts") element
- stringToFont =<<
+ textToFont . T.pack =<<
foldr ((<|>) . (flip findAttr fontElem . qualName)) Nothing ["ascii", "hAnsi"]
local (setFont font) (mapD (elemToRunElem ns) (elChildren element))
elemToRunElems _ _ = throwError WrongElem
diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
index ac2d6fa07..f81707e92 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
@@ -3,6 +3,7 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Docx.Parse.Styles
Copyright : Copyright (C) 2014-2019 Jesse Rosenthal
@@ -46,20 +47,19 @@ import Prelude
import Codec.Archive.Zip
import Control.Applicative ((<|>))
import Control.Monad.Except
-import Data.Char (toLower)
-import Data.List
import Data.Function (on)
import Data.String (IsString(..))
import qualified Data.Map as M
+import qualified Data.Text as T
import Data.Maybe
import Data.Coerce
import Text.Pandoc.Readers.Docx.Util
import qualified Text.Pandoc.UTF8 as UTF8
import Text.XML.Light
-newtype CharStyleId = CharStyleId String
+newtype CharStyleId = CharStyleId T.Text
deriving (Show, Eq, Ord, IsString, FromStyleId)
-newtype ParaStyleId = ParaStyleId String
+newtype ParaStyleId = ParaStyleId T.Text
deriving (Show, Eq, Ord, IsString, FromStyleId)
newtype CharStyleName = CharStyleName CIString
@@ -68,25 +68,31 @@ newtype ParaStyleName = ParaStyleName CIString
deriving (Show, Eq, Ord, IsString, FromStyleName)
-- Case-insensitive comparisons
-newtype CIString = CIString String deriving (Show, IsString, FromStyleName)
+newtype CIString = CIString T.Text deriving (Show, IsString, FromStyleName)
class FromStyleName a where
- fromStyleName :: a -> String
+ fromStyleName :: a -> T.Text
instance FromStyleName String where
+ fromStyleName = T.pack
+
+instance FromStyleName T.Text where
fromStyleName = id
class FromStyleId a where
- fromStyleId :: a -> String
+ fromStyleId :: a -> T.Text
instance FromStyleId String where
+ fromStyleId = T.pack
+
+instance FromStyleId T.Text where
fromStyleId = id
instance Eq CIString where
- (==) = (==) `on` map toLower . coerce
+ (==) = (==) `on` T.toCaseFold . coerce
instance Ord CIString where
- compare = compare `on` map toLower . coerce
+ compare = compare `on` T.toCaseFold . coerce
data VertAlign = BaseLn | SupScrpt | SubScrpt
deriving Show
@@ -108,7 +114,7 @@ data RunStyle = RunStyle { isBold :: Maybe Bool
deriving Show
data ParStyle = ParStyle { headingLev :: Maybe (ParaStyleName, Int)
- , numInfo :: Maybe (String, String)
+ , numInfo :: Maybe (T.Text, T.Text)
, psParentStyle :: Maybe ParStyle
, pStyleName :: ParaStyleName
, pStyleId :: ParaStyleId
@@ -146,7 +152,7 @@ isBasedOnStyle ns element parentStyle
, Just styleType <- findAttrByName ns "w" "type" element
, styleType == cStyleType parentStyle
, Just basedOnVal <- findChildByName ns "w" "basedOn" element >>=
- findAttrByName ns "w" "val"
+ findAttrTextByName ns "w" "val"
, Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps)
| isElem ns "w" "style" element
, Just styleType <- findAttrByName ns "w" "type" element
@@ -234,7 +240,7 @@ checkOnOff _ _ _ = Nothing
elemToCharStyle :: NameSpaces
-> Element -> Maybe CharStyle -> Maybe CharStyle
elemToCharStyle ns element parentStyle
- = CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element)
+ = CharStyle <$> (CharStyleId <$> findAttrTextByName ns "w" "styleId" element)
<*> getElementStyleName ns element
<*> (Just $ elemToRunStyle ns element parentStyle)
@@ -267,32 +273,32 @@ elemToRunStyle _ _ _ = defaultRunStyle
getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int)
getHeaderLevel ns element
| Just styleName <- getElementStyleName ns element
- , Just n <- stringToInteger =<<
- (stripPrefix "heading " . map toLower $
+ , Just n <- stringToInteger . T.unpack =<<
+ (T.stripPrefix "heading " . T.toLower $
fromStyleName styleName)
, n > 0 = Just (styleName, fromInteger n)
getHeaderLevel _ _ = Nothing
-getElementStyleName :: Coercible String a => NameSpaces -> Element -> Maybe a
+getElementStyleName :: Coercible T.Text a => NameSpaces -> Element -> Maybe a
getElementStyleName ns el = coerce <$>
- ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val")
- <|> findAttrByName ns "w" "styleId" el)
+ ((findChildByName ns "w" "name" el >>= findAttrTextByName ns "w" "val")
+ <|> findAttrTextByName ns "w" "styleId" el)
-getNumInfo :: NameSpaces -> Element -> Maybe (String, String)
+getNumInfo :: NameSpaces -> Element -> Maybe (T.Text, T.Text)
getNumInfo ns element = do
let numPr = findChildByName ns "w" "pPr" element >>=
findChildByName ns "w" "numPr"
lvl = fromMaybe "0" (numPr >>=
findChildByName ns "w" "ilvl" >>=
- findAttrByName ns "w" "val")
+ findAttrTextByName ns "w" "val")
numId <- numPr >>=
findChildByName ns "w" "numId" >>=
- findAttrByName ns "w" "val"
+ findAttrTextByName ns "w" "val"
return (numId, lvl)
elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle
elemToParStyleData ns element parentStyle
- | Just styleId <- findAttrByName ns "w" "styleId" element
+ | Just styleId <- findAttrTextByName ns "w" "styleId" element
, Just styleName <- getElementStyleName ns element
= Just $ ParStyle
{
diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs
index f4855efd2..0de1114bd 100644
--- a/src/Text/Pandoc/Readers/Docx/Util.hs
+++ b/src/Text/Pandoc/Readers/Docx/Util.hs
@@ -19,11 +19,14 @@ module Text.Pandoc.Readers.Docx.Util (
, elemToNameSpaces
, findChildByName
, findChildrenByName
+ , findAttrText
, findAttrByName
+ , findAttrTextByName
) where
import Prelude
import Data.Maybe (mapMaybe)
+import qualified Data.Text as T
import Text.XML.Light
type NameSpaces = [(String, String)]
@@ -55,7 +58,13 @@ findChildrenByName ns pref name el =
let ns' = ns ++ elemToNameSpaces el
in findChildren (elemName ns' pref name) el
+findAttrText :: QName -> Element -> Maybe T.Text
+findAttrText x = fmap T.pack . findAttr x
+
findAttrByName :: NameSpaces -> String -> String -> Element -> Maybe String
findAttrByName ns pref name el =
let ns' = ns ++ elemToNameSpaces el
in findAttr (elemName ns' pref name) el
+
+findAttrTextByName :: NameSpaces -> String -> String -> Element -> Maybe T.Text
+findAttrTextByName a b c = fmap T.pack . findAttrByName a b c