diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/Docx')
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx/Combine.hs | 3 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx/Fields.hs | 33 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx/Lists.hs | 11 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse.hs | 171 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse/Styles.hs | 48 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx/Util.hs | 9 |
6 files changed, 148 insertions, 127 deletions
diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs index da40a80ea..82791d669 100644 --- a/src/Text/Pandoc/Readers/Docx/Combine.hs +++ b/src/Text/Pandoc/Readers/Docx/Combine.hs @@ -1,7 +1,8 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE TypeSynonymInstances #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Docx.Combine Copyright : © 2014-2019 Jesse Rosenthal <jrosenthal@jhu.edu>, diff --git a/src/Text/Pandoc/Readers/Docx/Fields.hs b/src/Text/Pandoc/Readers/Docx/Fields.hs index e7a916f1c..05d9dd697 100644 --- a/src/Text/Pandoc/Readers/Docx/Fields.hs +++ b/src/Text/Pandoc/Readers/Docx/Fields.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Docx.Fields Copyright : Copyright (C) 2014-2019 Jesse Rosenthal @@ -16,16 +17,18 @@ module Text.Pandoc.Readers.Docx.Fields ( FieldInfo(..) ) where import Prelude +import Data.Functor (($>)) +import qualified Data.Text as T import Text.Parsec -import Text.Parsec.String (Parser) +import Text.Parsec.Text (Parser) -type URL = String +type URL = T.Text data FieldInfo = HyperlinkField URL | UnknownField deriving (Show) -parseFieldInfo :: String -> Either ParseError FieldInfo +parseFieldInfo :: T.Text -> Either ParseError FieldInfo parseFieldInfo = parse fieldInfo "" fieldInfo :: Parser FieldInfo @@ -34,31 +37,31 @@ fieldInfo = <|> return UnknownField -escapedQuote :: Parser String -escapedQuote = string "\\\"" +escapedQuote :: Parser T.Text +escapedQuote = string "\\\"" $> "\\\"" -inQuotes :: Parser String +inQuotes :: Parser T.Text inQuotes = - (try escapedQuote) <|> (anyChar >>= (\c -> return [c])) + (try escapedQuote) <|> (anyChar >>= (\c -> return $ T.singleton c)) -quotedString :: Parser String +quotedString :: Parser T.Text quotedString = do char '"' - concat <$> manyTill inQuotes (try (char '"')) + T.concat <$> manyTill inQuotes (try (char '"')) -unquotedString :: Parser String -unquotedString = manyTill anyChar (try $ lookAhead space *> return () <|> eof) +unquotedString :: Parser T.Text +unquotedString = T.pack <$> manyTill anyChar (try $ lookAhead space *> return () <|> eof) -fieldArgument :: Parser String +fieldArgument :: Parser T.Text fieldArgument = quotedString <|> unquotedString -- there are other switches, but this is the only one I've seen in the wild so far, so it's the first one I'll implement. See §17.16.5.25 -hyperlinkSwitch :: Parser (String, String) +hyperlinkSwitch :: Parser (T.Text, T.Text) hyperlinkSwitch = do sw <- string "\\l" spaces farg <- fieldArgument - return (sw, farg) + return (T.pack sw, farg) hyperlink :: Parser URL hyperlink = do @@ -68,6 +71,6 @@ hyperlink = do farg <- fieldArgument switches <- spaces *> many hyperlinkSwitch let url = case switches of - ("\\l", s) : _ -> farg ++ ('#': s) + ("\\l", s) : _ -> farg <> "#" <> s _ -> farg return url diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs index eb24640c5..b7b7a3835 100644 --- a/src/Text/Pandoc/Readers/Docx/Lists.hs +++ b/src/Text/Pandoc/Readers/Docx/Lists.hs @@ -22,6 +22,7 @@ import Prelude import Data.List import Data.Maybe import Data.String (fromString) +import qualified Data.Text as T import Text.Pandoc.Generic (bottomUp) import Text.Pandoc.JSON import Text.Pandoc.Readers.Docx.Parse (ParaStyleName) @@ -45,20 +46,20 @@ getNumId _ = Nothing getNumIdN :: Block -> Integer getNumIdN b = fromMaybe (-1) (getNumId b) -getText :: Block -> Maybe String +getText :: Block -> Maybe T.Text getText (Div (_, _, kvs) _) = lookup "text" kvs getText _ = Nothing data ListType = Itemized | Enumerated ListAttributes -listStyleMap :: [(String, ListNumberStyle)] +listStyleMap :: [(T.Text, ListNumberStyle)] listStyleMap = [("upperLetter", UpperAlpha), ("lowerLetter", LowerAlpha), ("upperRoman", UpperRoman), ("lowerRoman", LowerRoman), ("decimal", Decimal)] -listDelimMap :: [(String, ListNumberDelim)] +listDelimMap :: [(T.Text, ListNumberDelim)] listDelimMap = [("%1)", OneParen), ("(%1)", TwoParens), ("%1.", Period)] @@ -82,11 +83,11 @@ getListType b@(Div (_, _, kvs) _) | isListItem b = _ -> Nothing getListType _ = Nothing -listParagraphDivs :: [String] +listParagraphDivs :: [T.Text] listParagraphDivs = ["list-paragraph"] listParagraphStyles :: [ParaStyleName] -listParagraphStyles = map fromString listParagraphDivs +listParagraphStyles = map (fromString . T.unpack) listParagraphDivs -- This is a first stab at going through and attaching meaning to list -- paragraphs, without an item marker, following a list item. We diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 889bd80fc..8598ada6f 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Docx.Parse Copyright : Copyright (C) 2014-2019 Jesse Rosenthal @@ -63,6 +64,7 @@ import qualified Data.ByteString.Lazy as B import Data.Char (chr, ord, readLitChar) import Data.List import qualified Data.Map as M +import qualified Data.Text as T import Data.Maybe import System.FilePath import Text.Pandoc.Readers.Docx.Util @@ -71,7 +73,7 @@ import Text.Pandoc.Shared (filteredFilesFromArchive, safeRead) import qualified Text.Pandoc.UTF8 as UTF8 import Text.TeXMath (Exp) import Text.TeXMath.Readers.OMML (readOMML) -import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, stringToFont) +import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, textToFont) import Text.XML.Light import qualified Text.XML.Light.Cursor as XMLC @@ -88,7 +90,7 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes } deriving Show -data ReaderState = ReaderState { stateWarnings :: [String] +data ReaderState = ReaderState { stateWarnings :: [T.Text] , stateFldCharState :: FldCharState } deriving Show @@ -119,7 +121,6 @@ eitherToD (Left _) = throwError DocxError concatMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b] concatMapM f xs = liftM concat (mapM f xs) - -- This is similar to `mapMaybe`: it maps a function returning the D -- monad over a list, and only keeps the non-erroring return values. mapD :: (a -> D b) -> [a] -> D [b] @@ -178,18 +179,18 @@ type ParStyleMap = M.Map ParaStyleId ParStyle data Numbering = Numbering NameSpaces [Numb] [AbstractNumb] deriving Show -data Numb = Numb String String [LevelOverride] +data Numb = Numb T.Text T.Text [LevelOverride] deriving Show -- ilvl startOverride lvl -data LevelOverride = LevelOverride String (Maybe Integer) (Maybe Level) +data LevelOverride = LevelOverride T.Text (Maybe Integer) (Maybe Level) deriving Show -data AbstractNumb = AbstractNumb String [Level] +data AbstractNumb = AbstractNumb T.Text [Level] deriving Show -- ilvl format string start -data Level = Level String String String (Maybe Integer) +data Level = Level T.Text T.Text T.Text (Maybe Integer) deriving Show data DocumentLocation = InDocument | InFootnote | InEndnote @@ -199,11 +200,11 @@ data Relationship = Relationship DocumentLocation RelId Target deriving Show data Notes = Notes NameSpaces - (Maybe (M.Map String Element)) - (Maybe (M.Map String Element)) + (Maybe (M.Map T.Text Element)) + (Maybe (M.Map T.Text Element)) deriving Show -data Comments = Comments NameSpaces (M.Map String Element) +data Comments = Comments NameSpaces (M.Map T.Text Element) deriving Show data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer @@ -238,8 +239,8 @@ defaultParagraphStyle = ParagraphStyle { pStyle = [] data BodyPart = Paragraph ParagraphStyle [ParPart] - | ListItem ParagraphStyle String String (Maybe Level) [ParPart] - | Tbl String TblGrid TblLook [Row] + | ListItem ParagraphStyle T.Text T.Text (Maybe Level) [ParPart] + | Tbl T.Text TblGrid TblLook [Row] | OMathPara [Exp] deriving Show @@ -279,7 +280,7 @@ data ParPart = PlainRun Run | BookMark BookMarkId Anchor | InternalHyperLink Anchor [Run] | ExternalHyperLink URL [Run] - | Drawing FilePath String String B.ByteString Extent -- title, alt + | Drawing FilePath T.Text T.Text B.ByteString Extent -- title, alt | Chart -- placeholder for now | PlainOMath [Exp] | Field FieldInfo [Run] @@ -290,28 +291,28 @@ data ParPart = PlainRun Run data Run = Run RunStyle [RunElem] | Footnote [BodyPart] | Endnote [BodyPart] - | InlineDrawing FilePath String String B.ByteString Extent -- title, alt + | InlineDrawing FilePath T.Text T.Text B.ByteString Extent -- title, alt | InlineChart -- placeholder deriving Show -data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen +data RunElem = TextRun T.Text | LnBrk | Tab | SoftHyphen | NoBreakHyphen deriving Show -type Target = String -type Anchor = String -type URL = String -type BookMarkId = String -type RelId = String -type ChangeId = String -type CommentId = String -type Author = String -type ChangeDate = String -type CommentDate = String +type Target = T.Text +type Anchor = T.Text +type URL = T.Text +type BookMarkId = T.Text +type RelId = T.Text +type ChangeId = T.Text +type CommentId = T.Text +type Author = T.Text +type ChangeDate = T.Text +type CommentDate = T.Text archiveToDocx :: Archive -> Either DocxError Docx archiveToDocx archive = fst <$> archiveToDocxWithWarnings archive -archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [String]) +archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [T.Text]) archiveToDocxWithWarnings archive = do docXmlPath <- case getDocumentXmlPath archive of Just fp -> Right fp @@ -341,7 +342,7 @@ archiveToDocxWithWarnings archive = do Right doc -> Right (Docx doc, stateWarnings st) Left e -> Left e -getDocumentXmlPath :: Archive -> Maybe String +getDocumentXmlPath :: Archive -> Maybe FilePath getDocumentXmlPath zf = do entry <- findEntryByPath "_rels/.rels" zf relsElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry @@ -394,7 +395,7 @@ constructBogusParStyleData stName = ParStyle , numInfo = Nothing , psParentStyle = Nothing , pStyleName = stName - , pStyleId = ParaStyleId . filter (/=' ') . fromStyleName $ stName + , pStyleId = ParaStyleId . T.filter (/=' ') . fromStyleName $ stName } archiveToNotes :: Archive -> Notes @@ -441,8 +442,8 @@ filePathToRelType path docXmlPath = relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship relElemToRelationship relType element | qName (elName element) == "Relationship" = do - relId <- findAttr (QName "Id" Nothing Nothing) element - target <- findAttr (QName "Target" Nothing Nothing) element + relId <- findAttrText (QName "Id" Nothing Nothing) element + target <- findAttrText (QName "Target" Nothing Nothing) element return $ Relationship relType relId target relElemToRelationship _ _ = Nothing @@ -464,7 +465,7 @@ filePathIsMedia fp = in (dir == "word/media/") -lookupLevel :: String -> String -> Numbering -> Maybe Level +lookupLevel :: T.Text -> T.Text -> Numbering -> Maybe Level lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do (absNumId, ovrrides) <- lookup numId $ map (\(Numb nid absnumid ovrRides) -> (nid, (absnumid, ovrRides))) numbs @@ -483,7 +484,7 @@ lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do loElemToLevelOverride :: NameSpaces -> Element -> Maybe LevelOverride loElemToLevelOverride ns element | isElem ns "w" "lvlOverride" element = do - ilvl <- findAttrByName ns "w" "ilvl" element + ilvl <- findAttrTextByName ns "w" "ilvl" element let startOverride = findChildByName ns "w" "startOverride" element >>= findAttrByName ns "w" "val" >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)]))) @@ -495,9 +496,9 @@ loElemToLevelOverride _ _ = Nothing numElemToNum :: NameSpaces -> Element -> Maybe Numb numElemToNum ns element | isElem ns "w" "num" element = do - numId <- findAttrByName ns "w" "numId" element + numId <- findAttrTextByName ns "w" "numId" element absNumId <- findChildByName ns "w" "abstractNumId" element - >>= findAttrByName ns "w" "val" + >>= findAttrTextByName ns "w" "val" let lvlOverrides = mapMaybe (loElemToLevelOverride ns) (findChildrenByName ns "w" "lvlOverride" element) @@ -507,7 +508,7 @@ numElemToNum _ _ = Nothing absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb absNumElemToAbsNum ns element | isElem ns "w" "abstractNum" element = do - absNumId <- findAttrByName ns "w" "abstractNumId" element + absNumId <- findAttrTextByName ns "w" "abstractNumId" element let levelElems = findChildrenByName ns "w" "lvl" element levels = mapMaybe (levelElemToLevel ns) levelElems return $ AbstractNumb absNumId levels @@ -516,11 +517,11 @@ absNumElemToAbsNum _ _ = Nothing levelElemToLevel :: NameSpaces -> Element -> Maybe Level levelElemToLevel ns element | isElem ns "w" "lvl" element = do - ilvl <- findAttrByName ns "w" "ilvl" element + ilvl <- findAttrTextByName ns "w" "ilvl" element fmt <- findChildByName ns "w" "numFmt" element - >>= findAttrByName ns "w" "val" + >>= findAttrTextByName ns "w" "val" txt <- findChildByName ns "w" "lvlText" element - >>= findAttrByName ns "w" "val" + >>= findAttrTextByName ns "w" "val" let start = findChildByName ns "w" "start" element >>= findAttrByName ns "w" "val" >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)]))) @@ -544,11 +545,11 @@ archiveToNumbering :: Archive -> Numbering archiveToNumbering archive = fromMaybe (Numbering [] [] []) (archiveToNumbering' archive) -elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map String Element) +elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map T.Text Element) elemToNotes ns notetype element - | isElem ns "w" (notetype ++ "s") element = + | isElem ns "w" (notetype <> "s") element = let pairs = mapMaybe - (\e -> findAttrByName ns "w" "id" e >>= + (\e -> findAttrTextByName ns "w" "id" e >>= (\a -> Just (a, e))) (findChildrenByName ns "w" notetype element) in @@ -556,11 +557,11 @@ elemToNotes ns notetype element M.fromList pairs elemToNotes _ _ _ = Nothing -elemToComments :: NameSpaces -> Element -> M.Map String Element +elemToComments :: NameSpaces -> Element -> M.Map T.Text Element elemToComments ns element | isElem ns "w" "comments" element = let pairs = mapMaybe - (\e -> findAttrByName ns "w" "id" e >>= + (\e -> findAttrTextByName ns "w" "id" e >>= (\a -> Just (a, e))) (findChildrenByName ns "w" "comment" element) in @@ -632,7 +633,7 @@ testBitMask bitMaskS n = pHeading :: ParagraphStyle -> Maybe (ParaStyleName, Int) pHeading = getParStyleField headingLev . pStyle -pNumInfo :: ParagraphStyle -> Maybe (String, String) +pNumInfo :: ParagraphStyle -> Maybe (T.Text, T.Text) pNumInfo = getParStyleField numInfo . pStyle elemToBodyPart :: NameSpaces -> Element -> D BodyPart @@ -640,7 +641,7 @@ elemToBodyPart ns element | isElem ns "w" "p" element , (c:_) <- findChildrenByName ns "m" "oMathPara" element = do - expsLst <- eitherToD $ readOMML $ showElement c + expsLst <- eitherToD $ readOMML $ T.pack $ showElement c return $ OMathPara expsLst elemToBodyPart ns element | isElem ns "w" "p" element @@ -664,7 +665,7 @@ elemToBodyPart ns element | isElem ns "w" "tbl" element = do let caption' = findChildByName ns "w" "tblPr" element >>= findChildByName ns "w" "tblCaption" - >>= findAttrByName ns "w" "val" + >>= findAttrTextByName ns "w" "val" caption = fromMaybe "" caption' grid' = case findChildByName ns "w" "tblGrid" element of Just g -> elemToTblGrid ns g @@ -687,10 +688,10 @@ lookupRelationship docLocation relid rels = where pairs = map (\(Relationship loc relid' target) -> ((loc, relid'), target)) rels -expandDrawingId :: String -> D (FilePath, B.ByteString) +expandDrawingId :: T.Text -> D (FilePath, B.ByteString) expandDrawingId s = do location <- asks envLocation - target <- asks (lookupRelationship location s . envRelationships) + target <- asks (fmap T.unpack . lookupRelationship location s . envRelationships) case target of Just filepath -> do bytes <- asks (lookup ("word/" ++ filepath) . envMedia) @@ -699,12 +700,12 @@ expandDrawingId s = do Nothing -> throwError DocxError Nothing -> throwError DocxError -getTitleAndAlt :: NameSpaces -> Element -> (String, String) +getTitleAndAlt :: NameSpaces -> Element -> (T.Text, T.Text) getTitleAndAlt ns element = let mbDocPr = findChildByName ns "wp" "inline" element >>= findChildByName ns "wp" "docPr" - title = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "title") - alt = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "descr") + title = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "title") + alt = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "descr") in (title, alt) elemToParPart :: NameSpaces -> Element -> D ParPart @@ -716,7 +717,7 @@ elemToParPart ns element = let (title, alt) = getTitleAndAlt ns drawingElem a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem - >>= findAttrByName ns "r" "embed" + >>= findAttrTextByName ns "r" "embed" in case drawing of Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem) @@ -726,7 +727,7 @@ elemToParPart ns element | isElem ns "w" "r" element , Just _ <- findChildByName ns "w" "pict" element = let drawing = findElement (elemName ns "v" "imagedata") element - >>= findAttrByName ns "r" "id" + >>= findAttrTextByName ns "r" "id" in case drawing of -- Todo: check out title and attr for deprecated format. @@ -795,7 +796,7 @@ elemToParPart ns element fldCharState <- gets stateFldCharState case fldCharState of FldCharOpen -> do - info <- eitherToD $ parseFieldInfo $ strContent instrText + info <- eitherToD $ parseFieldInfo $ T.pack $ strContent instrText modify $ \st -> st{stateFldCharState = FldCharFieldInfo info} return NullParPart _ -> return NullParPart @@ -816,56 +817,56 @@ elemToParPart ns element return $ ChangedRuns change runs elemToParPart ns element | isElem ns "w" "bookmarkStart" element - , Just bmId <- findAttrByName ns "w" "id" element - , Just bmName <- findAttrByName ns "w" "name" element = + , Just bmId <- findAttrTextByName ns "w" "id" element + , Just bmName <- findAttrTextByName ns "w" "name" element = return $ BookMark bmId bmName elemToParPart ns element | isElem ns "w" "hyperlink" element - , Just relId <- findAttrByName ns "r" "id" element = do + , Just relId <- findAttrTextByName ns "r" "id" element = do location <- asks envLocation runs <- mapD (elemToRun ns) (elChildren element) rels <- asks envRelationships case lookupRelationship location relId rels of Just target -> - case findAttrByName ns "w" "anchor" element of - Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs + case findAttrTextByName ns "w" "anchor" element of + Just anchor -> return $ ExternalHyperLink (target <> "#" <> anchor) runs Nothing -> return $ ExternalHyperLink target runs Nothing -> return $ ExternalHyperLink "" runs elemToParPart ns element | isElem ns "w" "hyperlink" element - , Just anchor <- findAttrByName ns "w" "anchor" element = do + , Just anchor <- findAttrTextByName ns "w" "anchor" element = do runs <- mapD (elemToRun ns) (elChildren element) return $ InternalHyperLink anchor runs elemToParPart ns element | isElem ns "w" "commentRangeStart" element - , Just cmtId <- findAttrByName ns "w" "id" element = do + , Just cmtId <- findAttrTextByName ns "w" "id" element = do (Comments _ commentMap) <- asks envComments case M.lookup cmtId commentMap of Just cmtElem -> elemToCommentStart ns cmtElem Nothing -> throwError WrongElem elemToParPart ns element | isElem ns "w" "commentRangeEnd" element - , Just cmtId <- findAttrByName ns "w" "id" element = + , Just cmtId <- findAttrTextByName ns "w" "id" element = return $ CommentEnd cmtId elemToParPart ns element | isElem ns "m" "oMath" element = - fmap PlainOMath (eitherToD $ readOMML $ showElement element) + fmap PlainOMath (eitherToD $ readOMML $ T.pack $ showElement element) elemToParPart _ _ = throwError WrongElem elemToCommentStart :: NameSpaces -> Element -> D ParPart elemToCommentStart ns element | isElem ns "w" "comment" element - , Just cmtId <- findAttrByName ns "w" "id" element - , Just cmtAuthor <- findAttrByName ns "w" "author" element - , Just cmtDate <- findAttrByName ns "w" "date" element = do + , Just cmtId <- findAttrTextByName ns "w" "id" element + , Just cmtAuthor <- findAttrTextByName ns "w" "author" element + , Just cmtDate <- findAttrTextByName ns "w" "date" element = do bps <- mapD (elemToBodyPart ns) (elChildren element) return $ CommentStart cmtId cmtAuthor cmtDate bps elemToCommentStart _ _ = throwError WrongElem -lookupFootnote :: String -> Notes -> Maybe Element +lookupFootnote :: T.Text -> Notes -> Maybe Element lookupFootnote s (Notes _ fns _) = fns >>= M.lookup s -lookupEndnote :: String -> Notes -> Maybe Element +lookupEndnote :: T.Text -> Notes -> Maybe Element lookupEndnote s (Notes _ _ ens) = ens >>= M.lookup s elemToExtent :: Element -> Extent @@ -876,7 +877,7 @@ elemToExtent drawingElem = where wp_ns = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem - >>= findAttr (QName at Nothing Nothing) >>= safeRead + >>= findAttr (QName at Nothing Nothing) >>= safeRead . T.pack childElemToRun :: NameSpaces -> Element -> D Run @@ -887,7 +888,7 @@ childElemToRun ns element = let (title, alt) = getTitleAndAlt ns element a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem - >>= findAttr (QName "embed" (lookup "r" ns) (Just "r")) + >>= findAttrText (QName "embed" (lookup "r" ns) (Just "r")) in case drawing of Just s -> expandDrawingId s >>= @@ -900,7 +901,7 @@ childElemToRun ns element = return InlineChart childElemToRun ns element | isElem ns "w" "footnoteReference" element - , Just fnId <- findAttrByName ns "w" "id" element = do + , Just fnId <- findAttrTextByName ns "w" "id" element = do notes <- asks envNotes case lookupFootnote fnId notes of Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e) @@ -908,7 +909,7 @@ childElemToRun ns element Nothing -> return $ Footnote [] childElemToRun ns element | isElem ns "w" "endnoteReference" element - , Just enId <- findAttrByName ns "w" "id" element = do + , Just enId <- findAttrTextByName ns "w" "id" element = do notes <- asks envNotes case lookupEndnote enId notes of Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e) @@ -961,15 +962,15 @@ getParStyleField _ _ = Nothing getTrackedChange :: NameSpaces -> Element -> Maybe TrackedChange getTrackedChange ns element | isElem ns "w" "ins" element || isElem ns "w" "moveTo" element - , Just cId <- findAttrByName ns "w" "id" element - , Just cAuthor <- findAttrByName ns "w" "author" element - , Just cDate <- findAttrByName ns "w" "date" element = + , Just cId <- findAttrTextByName ns "w" "id" element + , Just cAuthor <- findAttrTextByName ns "w" "author" element + , Just cDate <- findAttrTextByName ns "w" "date" element = Just $ TrackedChange Insertion (ChangeInfo cId cAuthor cDate) getTrackedChange ns element | isElem ns "w" "del" element || isElem ns "w" "moveFrom" element - , Just cId <- findAttrByName ns "w" "id" element - , Just cAuthor <- findAttrByName ns "w" "author" element - , Just cDate <- findAttrByName ns "w" "date" element = + , Just cId <- findAttrTextByName ns "w" "id" element + , Just cAuthor <- findAttrTextByName ns "w" "author" element + , Just cDate <- findAttrTextByName ns "w" "date" element = Just $ TrackedChange Deletion (ChangeInfo cId cAuthor cDate) getTrackedChange _ _ = Nothing @@ -978,7 +979,7 @@ elemToParagraphStyle ns element sty | Just pPr <- findChildByName ns "w" "pPr" element = let style = mapMaybe - (fmap ParaStyleId . findAttrByName ns "w" "val") + (fmap ParaStyleId . findAttrTextByName ns "w" "val") (findChildrenByName ns "w" "pStyle" pPr) in ParagraphStyle {pStyle = mapMaybe (`M.lookup` sty) style @@ -1010,7 +1011,7 @@ elemToRunStyleD ns element charStyles <- asks envCharStyles let parentSty = findChildByName ns "w" "rStyle" rPr >>= - findAttrByName ns "w" "val" >>= + findAttrTextByName ns "w" "val" >>= flip M.lookup charStyles . CharStyleId return $ elemToRunStyle ns element parentSty elemToRunStyleD _ _ = return defaultRunStyle @@ -1020,12 +1021,12 @@ elemToRunElem ns element | isElem ns "w" "t" element || isElem ns "w" "delText" element || isElem ns "m" "t" element = do - let str = strContent element + let str = T.pack $ strContent element font <- asks envFont case font of Nothing -> return $ TextRun str Just f -> return . TextRun $ - map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str + T.map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str | isElem ns "w" "br" element = return LnBrk | isElem ns "w" "tab" element = return Tab | isElem ns "w" "softHyphen" element = return SoftHyphen @@ -1043,11 +1044,11 @@ getSymChar ns element | Just s <- lowerFromPrivate <$> getCodepoint , Just font <- getFont = case readLitChar ("\\x" ++ s) of - [(char, _)] -> TextRun . maybe "" (:[]) $ getUnicode font char + [(char, _)] -> TextRun . maybe "" T.singleton $ getUnicode font char _ -> TextRun "" where getCodepoint = findAttrByName ns "w" "char" element - getFont = stringToFont =<< findAttrByName ns "w" "font" element + getFont = textToFont . T.pack =<< findAttrByName ns "w" "font" element lowerFromPrivate ('F':xs) = '0':xs lowerFromPrivate xs = xs getSymChar _ _ = TextRun "" @@ -1059,7 +1060,7 @@ elemToRunElems ns element let qualName = elemName ns "w" let font = do fontElem <- findElement (qualName "rFonts") element - stringToFont =<< + textToFont . T.pack =<< foldr ((<|>) . (flip findAttr fontElem . qualName)) Nothing ["ascii", "hAnsi"] local (setFont font) (mapD (elemToRunElem ns) (elChildren element)) elemToRunElems _ _ = throwError WrongElem diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs index ac2d6fa07..f81707e92 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs @@ -3,6 +3,7 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Docx.Parse.Styles Copyright : Copyright (C) 2014-2019 Jesse Rosenthal @@ -46,20 +47,19 @@ import Prelude import Codec.Archive.Zip import Control.Applicative ((<|>)) import Control.Monad.Except -import Data.Char (toLower) -import Data.List import Data.Function (on) import Data.String (IsString(..)) import qualified Data.Map as M +import qualified Data.Text as T import Data.Maybe import Data.Coerce import Text.Pandoc.Readers.Docx.Util import qualified Text.Pandoc.UTF8 as UTF8 import Text.XML.Light -newtype CharStyleId = CharStyleId String +newtype CharStyleId = CharStyleId T.Text deriving (Show, Eq, Ord, IsString, FromStyleId) -newtype ParaStyleId = ParaStyleId String +newtype ParaStyleId = ParaStyleId T.Text deriving (Show, Eq, Ord, IsString, FromStyleId) newtype CharStyleName = CharStyleName CIString @@ -68,25 +68,31 @@ newtype ParaStyleName = ParaStyleName CIString deriving (Show, Eq, Ord, IsString, FromStyleName) -- Case-insensitive comparisons -newtype CIString = CIString String deriving (Show, IsString, FromStyleName) +newtype CIString = CIString T.Text deriving (Show, IsString, FromStyleName) class FromStyleName a where - fromStyleName :: a -> String + fromStyleName :: a -> T.Text instance FromStyleName String where + fromStyleName = T.pack + +instance FromStyleName T.Text where fromStyleName = id class FromStyleId a where - fromStyleId :: a -> String + fromStyleId :: a -> T.Text instance FromStyleId String where + fromStyleId = T.pack + +instance FromStyleId T.Text where fromStyleId = id instance Eq CIString where - (==) = (==) `on` map toLower . coerce + (==) = (==) `on` T.toCaseFold . coerce instance Ord CIString where - compare = compare `on` map toLower . coerce + compare = compare `on` T.toCaseFold . coerce data VertAlign = BaseLn | SupScrpt | SubScrpt deriving Show @@ -108,7 +114,7 @@ data RunStyle = RunStyle { isBold :: Maybe Bool deriving Show data ParStyle = ParStyle { headingLev :: Maybe (ParaStyleName, Int) - , numInfo :: Maybe (String, String) + , numInfo :: Maybe (T.Text, T.Text) , psParentStyle :: Maybe ParStyle , pStyleName :: ParaStyleName , pStyleId :: ParaStyleId @@ -146,7 +152,7 @@ isBasedOnStyle ns element parentStyle , Just styleType <- findAttrByName ns "w" "type" element , styleType == cStyleType parentStyle , Just basedOnVal <- findChildByName ns "w" "basedOn" element >>= - findAttrByName ns "w" "val" + findAttrTextByName ns "w" "val" , Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps) | isElem ns "w" "style" element , Just styleType <- findAttrByName ns "w" "type" element @@ -234,7 +240,7 @@ checkOnOff _ _ _ = Nothing elemToCharStyle :: NameSpaces -> Element -> Maybe CharStyle -> Maybe CharStyle elemToCharStyle ns element parentStyle - = CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element) + = CharStyle <$> (CharStyleId <$> findAttrTextByName ns "w" "styleId" element) <*> getElementStyleName ns element <*> (Just $ elemToRunStyle ns element parentStyle) @@ -267,32 +273,32 @@ elemToRunStyle _ _ _ = defaultRunStyle getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int) getHeaderLevel ns element | Just styleName <- getElementStyleName ns element - , Just n <- stringToInteger =<< - (stripPrefix "heading " . map toLower $ + , Just n <- stringToInteger . T.unpack =<< + (T.stripPrefix "heading " . T.toLower $ fromStyleName styleName) , n > 0 = Just (styleName, fromInteger n) getHeaderLevel _ _ = Nothing -getElementStyleName :: Coercible String a => NameSpaces -> Element -> Maybe a +getElementStyleName :: Coercible T.Text a => NameSpaces -> Element -> Maybe a getElementStyleName ns el = coerce <$> - ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val") - <|> findAttrByName ns "w" "styleId" el) + ((findChildByName ns "w" "name" el >>= findAttrTextByName ns "w" "val") + <|> findAttrTextByName ns "w" "styleId" el) -getNumInfo :: NameSpaces -> Element -> Maybe (String, String) +getNumInfo :: NameSpaces -> Element -> Maybe (T.Text, T.Text) getNumInfo ns element = do let numPr = findChildByName ns "w" "pPr" element >>= findChildByName ns "w" "numPr" lvl = fromMaybe "0" (numPr >>= findChildByName ns "w" "ilvl" >>= - findAttrByName ns "w" "val") + findAttrTextByName ns "w" "val") numId <- numPr >>= findChildByName ns "w" "numId" >>= - findAttrByName ns "w" "val" + findAttrTextByName ns "w" "val" return (numId, lvl) elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle elemToParStyleData ns element parentStyle - | Just styleId <- findAttrByName ns "w" "styleId" element + | Just styleId <- findAttrTextByName ns "w" "styleId" element , Just styleName <- getElementStyleName ns element = Just $ ParStyle { diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs index f4855efd2..0de1114bd 100644 --- a/src/Text/Pandoc/Readers/Docx/Util.hs +++ b/src/Text/Pandoc/Readers/Docx/Util.hs @@ -19,11 +19,14 @@ module Text.Pandoc.Readers.Docx.Util ( , elemToNameSpaces , findChildByName , findChildrenByName + , findAttrText , findAttrByName + , findAttrTextByName ) where import Prelude import Data.Maybe (mapMaybe) +import qualified Data.Text as T import Text.XML.Light type NameSpaces = [(String, String)] @@ -55,7 +58,13 @@ findChildrenByName ns pref name el = let ns' = ns ++ elemToNameSpaces el in findChildren (elemName ns' pref name) el +findAttrText :: QName -> Element -> Maybe T.Text +findAttrText x = fmap T.pack . findAttr x + findAttrByName :: NameSpaces -> String -> String -> Element -> Maybe String findAttrByName ns pref name el = let ns' = ns ++ elemToNameSpaces el in findAttr (elemName ns' pref name) el + +findAttrTextByName :: NameSpaces -> String -> String -> Element -> Maybe T.Text +findAttrTextByName a b c = fmap T.pack . findAttrByName a b c |
