diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/Docx')
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx/Fonts.hs | 1 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx/Lists.hs | 1 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse.hs | 72 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx/Reducible.hs | 1 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx/StyleMap.hs | 1 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx/Util.hs | 1 | 
6 files changed, 41 insertions, 36 deletions
| diff --git a/src/Text/Pandoc/Readers/Docx/Fonts.hs b/src/Text/Pandoc/Readers/Docx/Fonts.hs index 967ca296c..b44c71412 100644 --- a/src/Text/Pandoc/Readers/Docx/Fonts.hs +++ b/src/Text/Pandoc/Readers/Docx/Fonts.hs @@ -29,7 +29,6 @@ Utilities to convert between font codepoints and unicode characters.  -}  module Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..)) where -import Prelude  -- | Enumeration of recognised fonts  data Font = Symbol -- ^ <http://en.wikipedia.org/wiki/Symbol_(typeface) Adobe Symbol> diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs index 0c9297139..c265ad074 100644 --- a/src/Text/Pandoc/Readers/Docx/Lists.hs +++ b/src/Text/Pandoc/Readers/Docx/Lists.hs @@ -33,7 +33,6 @@ module Text.Pandoc.Readers.Docx.Lists ( blocksToBullets                                        , listParagraphDivs                                        ) where -import Prelude  import Text.Pandoc.JSON  import Text.Pandoc.Generic (bottomUp)  import Text.Pandoc.Shared (trim) diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 91eab1339..5910a476b 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -50,7 +50,6 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..)                                        , Cell(..)                                        , archiveToDocx                                        ) where -import Prelude  import Codec.Archive.Zip  import Text.XML.Light  import Data.Maybe @@ -76,6 +75,7 @@ data ReaderEnv = ReaderEnv { envNotes         :: Notes                             , envFont          :: Maybe Font                             , envCharStyles    :: CharStyleMap                             , envParStyles     :: ParStyleMap +                           , envLocation      :: DocumentLocation                             }                 deriving Show @@ -88,7 +88,7 @@ instance Error DocxError where  type D = ExceptT DocxError (Reader ReaderEnv)  runD :: D a -> ReaderEnv -> Either DocxError a -runD dx re = runReader (runExceptT dx ) re +runD dx re = runReader (runExceptT dx) re  maybeToD :: Maybe a -> D a  maybeToD (Just a) = return a @@ -141,7 +141,10 @@ data AbstractNumb = AbstractNumb String [Level]  -- (ilvl, format, string, start)  type Level = (String, String, String, Maybe Integer) -data Relationship = Relationship (RelId, Target) +data DocumentLocation = InDocument | InFootnote | InEndnote +                      deriving (Eq,Show) + +data Relationship = Relationship DocumentLocation RelId Target                    deriving Show  data Notes = Notes NameSpaces @@ -175,6 +178,7 @@ defaultParagraphStyle = ParagraphStyle { pStyle = []  data BodyPart = Paragraph ParagraphStyle [ParPart]                | ListItem ParagraphStyle String String Level [ParPart] +              | DummyListItem ParagraphStyle String [ParPart]                | Tbl String TblGrid TblLook [Row]                | OMathPara [Exp]                deriving Show @@ -239,7 +243,6 @@ defaultRunStyle = RunStyle { isBold = Nothing                             , rUnderline = Nothing                             , rStyle = Nothing} -  type Target = String  type Anchor = String  type URL = String @@ -256,7 +259,8 @@ archiveToDocx archive = do        rels      = archiveToRelationships archive        media     = archiveToMedia archive        (styles, parstyles) = archiveToStyles archive -      rEnv = ReaderEnv notes numbering rels media Nothing styles parstyles +      rEnv = +        ReaderEnv notes numbering rels media Nothing styles parstyles InDocument    doc <- runD (archiveToDocument archive) rEnv    return $ Docx doc @@ -363,29 +367,30 @@ archiveToNotes zf =    in     Notes ns fn en -filePathIsRel :: FilePath -> Bool -filePathIsRel fp = -  let (dir, name) = splitFileName fp -  in -   (dir == "word/_rels/") && ((takeExtension name) == ".rels") +filePathToRelType :: FilePath -> Maybe DocumentLocation +filePathToRelType "word/_rels/document.xml.rels" = Just InDocument +filePathToRelType "word/_rels/footnotes.xml.rels" = Just InFootnote +filePathToRelType "word/_rels/endnotes.xml.rels" = Just InEndnote +filePathToRelType _ = Nothing -relElemToRelationship :: Element -> Maybe Relationship -relElemToRelationship element | qName (elName element) == "Relationship" = +relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship +relElemToRelationship relType element | qName (elName element) == "Relationship" =    do      relId <- findAttr (QName "Id" Nothing Nothing) element      target <- findAttr (QName "Target" Nothing Nothing) element -    return $ Relationship (relId, target) -relElemToRelationship _ = Nothing - - +    return $ Relationship relType relId target +relElemToRelationship _ _ = Nothing + +filePathToRelationships :: Archive -> FilePath -> [Relationship] +filePathToRelationships ar fp | Just relType <- filePathToRelType fp +                              , Just entry <- findEntryByPath fp ar +                              , Just relElems <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry = +  mapMaybe (relElemToRelationship relType) $ elChildren relElems +filePathToRelationships _ _ = [] +                                 archiveToRelationships :: Archive -> [Relationship]  archiveToRelationships archive = -  let relPaths = filter filePathIsRel (filesInArchive archive) -      entries  = mapMaybe (\f -> findEntryByPath f archive) relPaths -      relElems = mapMaybe (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries -      rels =     mapMaybe relElemToRelationship $ concatMap elChildren relElems -  in -   rels +  concatMap (filePathToRelationships archive) $ filesInArchive archive  filePathIsMedia :: FilePath -> Bool  filePathIsMedia fp = @@ -410,6 +415,7 @@ lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do    lvl  <- lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls    return lvl +  numElemToNum :: NameSpaces -> Element -> Maybe Numb  numElemToNum ns element |    qName (elName element) == "num" && @@ -561,7 +567,7 @@ elemToBodyPart ns element      num <- asks envNumbering      case lookupLevel numId lvl num of       Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts -     Nothing        -> throwError WrongElem +     Nothing        -> return $ DummyListItem parstyle lvl parparts  elemToBodyPart ns element    | isElem ns "w" "p" element = do        sty <- asks envParStyles @@ -574,7 +580,7 @@ elemToBodyPart ns element            Just levelInfo ->              return $ ListItem parstyle numId lvl levelInfo parparts            Nothing         -> -            throwError WrongElem +            return $ DummyListItem parstyle lvl parparts         Nothing -> return $ Paragraph parstyle parparts  elemToBodyPart ns element    | isElem ns "w" "tbl" element = do @@ -597,13 +603,16 @@ elemToBodyPart ns element      return $ Tbl caption grid tblLook rows  elemToBodyPart _ _ = throwError WrongElem -lookupRelationship :: RelId -> [Relationship] -> Maybe Target -lookupRelationship relid rels = -  lookup relid (map (\(Relationship pair) -> pair) rels) +lookupRelationship :: DocumentLocation -> RelId -> [Relationship] -> Maybe Target +lookupRelationship docLocation relid rels = +  lookup (docLocation, relid) pairs +  where +    pairs = map (\(Relationship loc relid' target) -> ((loc, relid'), target)) rels  expandDrawingId :: String -> D (FilePath, B.ByteString)  expandDrawingId s = do -  target <- asks (lookupRelationship s . envRelationships) +  location <- asks envLocation +  target <- asks (lookupRelationship location s . envRelationships)    case target of      Just filepath -> do        bytes <- asks (lookup ("word/" ++ filepath) . envMedia) @@ -658,9 +667,10 @@ elemToParPart ns element  elemToParPart ns element    | isElem ns "w" "hyperlink" element    , Just relId <- findAttr (elemName ns "r" "id") element = do +    location <- asks envLocation      runs <- mapD (elemToRun ns) (elChildren element)      rels <- asks envRelationships -    case lookupRelationship relId rels of +    case lookupRelationship location relId rels of        Just target -> do           case findAttr (elemName ns "w" "anchor") element of               Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs @@ -700,7 +710,7 @@ elemToRun ns element    , Just fnId <- findAttr (elemName ns "w" "id") ref = do      notes <- asks envNotes      case lookupFootnote fnId notes of -      Just e -> do bps <- mapD (elemToBodyPart ns) (elChildren e) +      Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e)                     return $ Footnote bps        Nothing  -> return $ Footnote []  elemToRun ns element @@ -709,7 +719,7 @@ elemToRun ns element    , Just enId <- findAttr (elemName ns "w" "id") ref = do      notes <- asks envNotes      case lookupEndnote enId notes of -      Just e -> do bps <- mapD (elemToBodyPart ns) (elChildren e) +      Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e)                     return $ Endnote bps        Nothing  -> return $ Endnote []  elemToRun ns element diff --git a/src/Text/Pandoc/Readers/Docx/Reducible.hs b/src/Text/Pandoc/Readers/Docx/Reducible.hs index a850141f6..c93b40119 100644 --- a/src/Text/Pandoc/Readers/Docx/Reducible.hs +++ b/src/Text/Pandoc/Readers/Docx/Reducible.hs @@ -7,7 +7,6 @@ module Text.Pandoc.Readers.Docx.Reducible ( concatReduce         where -import Prelude  import Text.Pandoc.Builder  import Data.List  import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr) diff --git a/src/Text/Pandoc/Readers/Docx/StyleMap.hs b/src/Text/Pandoc/Readers/Docx/StyleMap.hs index 231653106..2901ea2a3 100644 --- a/src/Text/Pandoc/Readers/Docx/StyleMap.hs +++ b/src/Text/Pandoc/Readers/Docx/StyleMap.hs @@ -5,7 +5,6 @@ module Text.Pandoc.Readers.Docx.StyleMap (  StyleMaps(..)                                            , hasStyleName                                            ) where -import Prelude  import           Text.XML.Light  import           Text.Pandoc.Readers.Docx.Util  import           Control.Monad.State diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs index 2790c0d1a..891f107b0 100644 --- a/src/Text/Pandoc/Readers/Docx/Util.hs +++ b/src/Text/Pandoc/Readers/Docx/Util.hs @@ -5,7 +5,6 @@ module Text.Pandoc.Readers.Docx.Util (                                        , elemToNameSpaces                                        ) where -import Prelude  import Text.XML.Light  import Data.Maybe (mapMaybe) | 
