aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJesse Rosenthal <jrosenthal@jhu.edu>2014-07-30 12:46:03 -0400
committerJesse Rosenthal <jrosenthal@jhu.edu>2014-07-30 12:46:03 -0400
commit9ce229570056ddb67f95d6b5c7b58ccf6d0413fd (patch)
tree63cc5a0b26984d8ee36f2bec858d76d3564a0718 /src
parent02c79ea4f6c050e9e610825e8b462382beae2b5a (diff)
downloadpandoc-9ce229570056ddb67f95d6b5c7b58ccf6d0413fd.tar.gz
Docx reader: Make docx reader put image data in MediaBag.
Image data will not be put in a media bag map, which will be output along with the pandoc output.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs57
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs10
2 files changed, 30 insertions, 37 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 48a23dd3c..f5fb6565a 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -78,8 +78,6 @@ import Codec.Archive.Zip
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Builder (text, toList)
-import Text.Pandoc.MIME (getMimeType)
-import Text.Pandoc.UTF8 (toString)
import Text.Pandoc.Walk
import Text.Pandoc.Readers.Docx.Parse
import Text.Pandoc.Readers.Docx.Lists
@@ -88,9 +86,7 @@ import Text.Pandoc.Readers.Docx.TexChar
import Text.Pandoc.Shared
import Data.Maybe (mapMaybe, fromMaybe)
import Data.List (delete, isPrefixOf, (\\), intercalate, intersect)
-import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as B
-import Data.ByteString.Base64 (encode)
import qualified Data.Map as M
import Control.Monad.Reader
import Control.Monad.State
@@ -98,17 +94,24 @@ import Text.Printf (printf)
readDocx :: ReaderOptions
-> B.ByteString
- -> Pandoc
+ -> (Pandoc, MediaBag)
readDocx opts bytes =
case archiveToDocx (toArchive bytes) of
- Right docx -> Pandoc meta blks where
- (meta, blks) = (docxToMetaAndBlocks opts docx)
+ Right docx -> (Pandoc meta blks, mediaBag) where
+ (meta, blks, mediaBag) = (docxToOutput opts docx)
Left _ -> error $ "couldn't parse docx file"
data DState = DState { docxAnchorMap :: M.Map String String
+ , docxMediaBag :: MediaBag
, docxInHeaderBlock :: Bool
, docxInTexSubscript :: Bool }
+defaultDState :: DState
+defaultDState = DState { docxAnchorMap = M.empty
+ , docxMediaBag = M.empty
+ , docxInHeaderBlock = False
+ , docxInTexSubscript = False}
+
data DEnv = DEnv { docxOptions :: ReaderOptions
, docxDocument :: Docx}
@@ -319,13 +322,6 @@ runToInlines (Footnote bps) =
runToInlines (Endnote bps) =
concatMapM bodyPartToBlocks bps >>= (\blks -> return [Note blks])
-makeDataUrl :: String -> B.ByteString -> Maybe String
-makeDataUrl fp bs =
- case getMimeType fp of
- Just mime -> Just $ "data:" ++ mime ++ ";base64," ++
- toString (encode $ BS.concat $ B.toChunks bs)
- Nothing -> Nothing
-
parPartToInlines :: ParPart -> DocxContext [Inline]
parPartToInlines (PlainRun r) = runToInlines r
parPartToInlines (Insertion _ author date runs) = do
@@ -372,11 +368,9 @@ parPartToInlines (BookMark _ anchor) =
modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap}
return [Span (newAnchor, ["anchor"], []) []]
parPartToInlines (Drawing fp bs) = do
- return $ case True of -- TODO: add self-contained images
- True -> [Image [] (fp, "")]
- False -> case makeDataUrl fp bs of
- Just d -> [Image [] (d, "")]
- Nothing -> [Image [] ("", "")]
+ mediaBag <- gets docxMediaBag
+ modify $ \s -> s { docxMediaBag = M.insert fp bs mediaBag}
+ return [Image [] (fp, "")]
parPartToInlines (InternalHyperLink anchor runs) = do
ils <- concatMapM runToInlines runs
return [Link ils ('#' : anchor, "")]
@@ -675,26 +669,25 @@ rewriteLink l@(Link ils ('#':target, title)) = do
Nothing -> l
rewriteLink il = return il
-bodyToMetaAndBlocks :: Body -> DocxContext (Meta, [Block])
-bodyToMetaAndBlocks (Body bps) = do
+bodyToOutput :: Body -> DocxContext (Meta, [Block], MediaBag)
+bodyToOutput (Body bps) = do
let (metabps, blkbps) = sepBodyParts bps
meta <- bodyPartsToMeta metabps
blks <- concatMapM bodyPartToBlocks blkbps >>=
walkM rewriteLink
- return $
- (meta,
- blocksToDefinitions $
- blocksToBullets $ blks)
-
-docxToMetaAndBlocks :: ReaderOptions -> Docx -> (Meta, [Block])
-docxToMetaAndBlocks opts d@(Docx (Document _ body)) =
- let dState = DState { docxAnchorMap = M.empty
- , docxInHeaderBlock = False
- , docxInTexSubscript = False}
+ mediaBag <- gets docxMediaBag
+ return $ (meta,
+ blocksToDefinitions $ blocksToBullets $ blks,
+ mediaBag)
+
+docxToOutput :: ReaderOptions -> Docx -> (Meta, [Block], MediaBag)
+docxToOutput opts d@(Docx (Document _ body)) =
+ let dState = defaultDState
dEnv = DEnv { docxOptions = opts
, docxDocument = d}
in
- evalDocxContext (bodyToMetaAndBlocks body) dEnv dState
+ evalDocxContext (bodyToOutput body) dEnv dState
+
ilToCode :: Inline -> String
ilToCode (Str s) = s
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 8541a1a3a..71938afe0 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -779,11 +779,11 @@ expandDrawingId :: String -> D ParPart
expandDrawingId s = do
target <- asks (lookupRelationship s . envRelationships)
case target of
- Just t -> do let filepath = combine "word" t
- bytes <- asks (lookup filepath . envMedia)
- case bytes of
- Just bs -> return $ Drawing filepath bs
- Nothing -> throwError DocxError
+ Just filepath -> do
+ bytes <- asks (lookup (combine "word" filepath) . envMedia)
+ case bytes of
+ Just bs -> return $ Drawing filepath bs
+ Nothing -> throwError DocxError
Nothing -> throwError DocxError
elemToParPart :: NameSpaces -> Element -> D ParPart