diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/Docx')
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx/Fonts.hs | 1 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx/Lists.hs | 1 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse.hs | 72 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx/Reducible.hs | 1 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx/StyleMap.hs | 1 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx/Util.hs | 1 |
6 files changed, 41 insertions, 36 deletions
diff --git a/src/Text/Pandoc/Readers/Docx/Fonts.hs b/src/Text/Pandoc/Readers/Docx/Fonts.hs index 967ca296c..b44c71412 100644 --- a/src/Text/Pandoc/Readers/Docx/Fonts.hs +++ b/src/Text/Pandoc/Readers/Docx/Fonts.hs @@ -29,7 +29,6 @@ Utilities to convert between font codepoints and unicode characters. -} module Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..)) where -import Prelude -- | Enumeration of recognised fonts data Font = Symbol -- ^ <http://en.wikipedia.org/wiki/Symbol_(typeface) Adobe Symbol> diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs index 0c9297139..c265ad074 100644 --- a/src/Text/Pandoc/Readers/Docx/Lists.hs +++ b/src/Text/Pandoc/Readers/Docx/Lists.hs @@ -33,7 +33,6 @@ module Text.Pandoc.Readers.Docx.Lists ( blocksToBullets , listParagraphDivs ) where -import Prelude import Text.Pandoc.JSON import Text.Pandoc.Generic (bottomUp) import Text.Pandoc.Shared (trim) diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 91eab1339..5910a476b 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -50,7 +50,6 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..) , Cell(..) , archiveToDocx ) where -import Prelude import Codec.Archive.Zip import Text.XML.Light import Data.Maybe @@ -76,6 +75,7 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes , envFont :: Maybe Font , envCharStyles :: CharStyleMap , envParStyles :: ParStyleMap + , envLocation :: DocumentLocation } deriving Show @@ -88,7 +88,7 @@ instance Error DocxError where type D = ExceptT DocxError (Reader ReaderEnv) runD :: D a -> ReaderEnv -> Either DocxError a -runD dx re = runReader (runExceptT dx ) re +runD dx re = runReader (runExceptT dx) re maybeToD :: Maybe a -> D a maybeToD (Just a) = return a @@ -141,7 +141,10 @@ data AbstractNumb = AbstractNumb String [Level] -- (ilvl, format, string, start) type Level = (String, String, String, Maybe Integer) -data Relationship = Relationship (RelId, Target) +data DocumentLocation = InDocument | InFootnote | InEndnote + deriving (Eq,Show) + +data Relationship = Relationship DocumentLocation RelId Target deriving Show data Notes = Notes NameSpaces @@ -175,6 +178,7 @@ defaultParagraphStyle = ParagraphStyle { pStyle = [] data BodyPart = Paragraph ParagraphStyle [ParPart] | ListItem ParagraphStyle String String Level [ParPart] + | DummyListItem ParagraphStyle String [ParPart] | Tbl String TblGrid TblLook [Row] | OMathPara [Exp] deriving Show @@ -239,7 +243,6 @@ defaultRunStyle = RunStyle { isBold = Nothing , rUnderline = Nothing , rStyle = Nothing} - type Target = String type Anchor = String type URL = String @@ -256,7 +259,8 @@ archiveToDocx archive = do rels = archiveToRelationships archive media = archiveToMedia archive (styles, parstyles) = archiveToStyles archive - rEnv = ReaderEnv notes numbering rels media Nothing styles parstyles + rEnv = + ReaderEnv notes numbering rels media Nothing styles parstyles InDocument doc <- runD (archiveToDocument archive) rEnv return $ Docx doc @@ -363,29 +367,30 @@ archiveToNotes zf = in Notes ns fn en -filePathIsRel :: FilePath -> Bool -filePathIsRel fp = - let (dir, name) = splitFileName fp - in - (dir == "word/_rels/") && ((takeExtension name) == ".rels") +filePathToRelType :: FilePath -> Maybe DocumentLocation +filePathToRelType "word/_rels/document.xml.rels" = Just InDocument +filePathToRelType "word/_rels/footnotes.xml.rels" = Just InFootnote +filePathToRelType "word/_rels/endnotes.xml.rels" = Just InEndnote +filePathToRelType _ = Nothing -relElemToRelationship :: Element -> Maybe Relationship -relElemToRelationship element | qName (elName element) == "Relationship" = +relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship +relElemToRelationship relType element | qName (elName element) == "Relationship" = do relId <- findAttr (QName "Id" Nothing Nothing) element target <- findAttr (QName "Target" Nothing Nothing) element - return $ Relationship (relId, target) -relElemToRelationship _ = Nothing - - + return $ Relationship relType relId target +relElemToRelationship _ _ = Nothing + +filePathToRelationships :: Archive -> FilePath -> [Relationship] +filePathToRelationships ar fp | Just relType <- filePathToRelType fp + , Just entry <- findEntryByPath fp ar + , Just relElems <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry = + mapMaybe (relElemToRelationship relType) $ elChildren relElems +filePathToRelationships _ _ = [] + archiveToRelationships :: Archive -> [Relationship] archiveToRelationships archive = - let relPaths = filter filePathIsRel (filesInArchive archive) - entries = mapMaybe (\f -> findEntryByPath f archive) relPaths - relElems = mapMaybe (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries - rels = mapMaybe relElemToRelationship $ concatMap elChildren relElems - in - rels + concatMap (filePathToRelationships archive) $ filesInArchive archive filePathIsMedia :: FilePath -> Bool filePathIsMedia fp = @@ -410,6 +415,7 @@ lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do lvl <- lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls return lvl + numElemToNum :: NameSpaces -> Element -> Maybe Numb numElemToNum ns element | qName (elName element) == "num" && @@ -561,7 +567,7 @@ elemToBodyPart ns element num <- asks envNumbering case lookupLevel numId lvl num of Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts - Nothing -> throwError WrongElem + Nothing -> return $ DummyListItem parstyle lvl parparts elemToBodyPart ns element | isElem ns "w" "p" element = do sty <- asks envParStyles @@ -574,7 +580,7 @@ elemToBodyPart ns element Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts Nothing -> - throwError WrongElem + return $ DummyListItem parstyle lvl parparts Nothing -> return $ Paragraph parstyle parparts elemToBodyPart ns element | isElem ns "w" "tbl" element = do @@ -597,13 +603,16 @@ elemToBodyPart ns element return $ Tbl caption grid tblLook rows elemToBodyPart _ _ = throwError WrongElem -lookupRelationship :: RelId -> [Relationship] -> Maybe Target -lookupRelationship relid rels = - lookup relid (map (\(Relationship pair) -> pair) rels) +lookupRelationship :: DocumentLocation -> RelId -> [Relationship] -> Maybe Target +lookupRelationship docLocation relid rels = + lookup (docLocation, relid) pairs + where + pairs = map (\(Relationship loc relid' target) -> ((loc, relid'), target)) rels expandDrawingId :: String -> D (FilePath, B.ByteString) expandDrawingId s = do - target <- asks (lookupRelationship s . envRelationships) + location <- asks envLocation + target <- asks (lookupRelationship location s . envRelationships) case target of Just filepath -> do bytes <- asks (lookup ("word/" ++ filepath) . envMedia) @@ -658,9 +667,10 @@ elemToParPart ns element elemToParPart ns element | isElem ns "w" "hyperlink" element , Just relId <- findAttr (elemName ns "r" "id") element = do + location <- asks envLocation runs <- mapD (elemToRun ns) (elChildren element) rels <- asks envRelationships - case lookupRelationship relId rels of + case lookupRelationship location relId rels of Just target -> do case findAttr (elemName ns "w" "anchor") element of Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs @@ -700,7 +710,7 @@ elemToRun ns element , Just fnId <- findAttr (elemName ns "w" "id") ref = do notes <- asks envNotes case lookupFootnote fnId notes of - Just e -> do bps <- mapD (elemToBodyPart ns) (elChildren e) + Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e) return $ Footnote bps Nothing -> return $ Footnote [] elemToRun ns element @@ -709,7 +719,7 @@ elemToRun ns element , Just enId <- findAttr (elemName ns "w" "id") ref = do notes <- asks envNotes case lookupEndnote enId notes of - Just e -> do bps <- mapD (elemToBodyPart ns) (elChildren e) + Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e) return $ Endnote bps Nothing -> return $ Endnote [] elemToRun ns element diff --git a/src/Text/Pandoc/Readers/Docx/Reducible.hs b/src/Text/Pandoc/Readers/Docx/Reducible.hs index a850141f6..c93b40119 100644 --- a/src/Text/Pandoc/Readers/Docx/Reducible.hs +++ b/src/Text/Pandoc/Readers/Docx/Reducible.hs @@ -7,7 +7,6 @@ module Text.Pandoc.Readers.Docx.Reducible ( concatReduce where -import Prelude import Text.Pandoc.Builder import Data.List import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr) diff --git a/src/Text/Pandoc/Readers/Docx/StyleMap.hs b/src/Text/Pandoc/Readers/Docx/StyleMap.hs index 231653106..2901ea2a3 100644 --- a/src/Text/Pandoc/Readers/Docx/StyleMap.hs +++ b/src/Text/Pandoc/Readers/Docx/StyleMap.hs @@ -5,7 +5,6 @@ module Text.Pandoc.Readers.Docx.StyleMap ( StyleMaps(..) , hasStyleName ) where -import Prelude import Text.XML.Light import Text.Pandoc.Readers.Docx.Util import Control.Monad.State diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs index 2790c0d1a..891f107b0 100644 --- a/src/Text/Pandoc/Readers/Docx/Util.hs +++ b/src/Text/Pandoc/Readers/Docx/Util.hs @@ -5,7 +5,6 @@ module Text.Pandoc.Readers.Docx.Util ( , elemToNameSpaces ) where -import Prelude import Text.XML.Light import Data.Maybe (mapMaybe) |
