aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pandoc.hs3
-rw-r--r--src/Text/Pandoc.hs6
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs57
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs10
-rw-r--r--src/Text/Pandoc/Shared.hs11
-rw-r--r--tests/Tests/Readers/Docx.hs3
-rw-r--r--tests/docx.image_no_embed.native2
7 files changed, 49 insertions, 43 deletions
diff --git a/pandoc.hs b/pandoc.hs
index 908643a1f..629c16c86 100644
--- a/pandoc.hs
+++ b/pandoc.hs
@@ -1201,7 +1201,8 @@ main = do
readSources sources >>=
handleIncludes' . convertTabs . intercalate "\n" >>=
r readerOpts
- ByteStringReader r -> readFiles sources >>= r readerOpts
+ ByteStringReader r -> readFiles sources >>= r readerOpts >>=
+ (return . fst)
let doc0 = M.foldWithKey setMeta doc metadata
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index be34641a9..11553383c 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -160,7 +160,7 @@ import Text.Pandoc.Writers.Haddock
import Text.Pandoc.Writers.Custom
import Text.Pandoc.Templates
import Text.Pandoc.Options
-import Text.Pandoc.Shared (safeRead, warn)
+import Text.Pandoc.Shared (safeRead, warn, MediaBag)
import Data.Aeson
import qualified Data.ByteString.Lazy as BL
import Data.List (intercalate)
@@ -204,12 +204,12 @@ markdown o s = do
return doc
data Reader = StringReader (ReaderOptions -> String -> IO Pandoc)
- | ByteStringReader (ReaderOptions -> BL.ByteString -> IO Pandoc)
+ | ByteStringReader (ReaderOptions -> BL.ByteString -> IO (Pandoc, MediaBag))
mkStringReader :: (ReaderOptions -> String -> Pandoc) -> Reader
mkStringReader r = StringReader (\o s -> return $ r o s)
-mkBSReader :: (ReaderOptions -> BL.ByteString -> Pandoc) -> Reader
+mkBSReader :: (ReaderOptions -> BL.ByteString -> (Pandoc, MediaBag)) -> Reader
mkBSReader r = ByteStringReader (\o s -> return $ r o s)
-- | Association list of formats and readers.
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 48a23dd3c..f5fb6565a 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -78,8 +78,6 @@ import Codec.Archive.Zip
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Builder (text, toList)
-import Text.Pandoc.MIME (getMimeType)
-import Text.Pandoc.UTF8 (toString)
import Text.Pandoc.Walk
import Text.Pandoc.Readers.Docx.Parse
import Text.Pandoc.Readers.Docx.Lists
@@ -88,9 +86,7 @@ import Text.Pandoc.Readers.Docx.TexChar
import Text.Pandoc.Shared
import Data.Maybe (mapMaybe, fromMaybe)
import Data.List (delete, isPrefixOf, (\\), intercalate, intersect)
-import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as B
-import Data.ByteString.Base64 (encode)
import qualified Data.Map as M
import Control.Monad.Reader
import Control.Monad.State
@@ -98,17 +94,24 @@ import Text.Printf (printf)
readDocx :: ReaderOptions
-> B.ByteString
- -> Pandoc
+ -> (Pandoc, MediaBag)
readDocx opts bytes =
case archiveToDocx (toArchive bytes) of
- Right docx -> Pandoc meta blks where
- (meta, blks) = (docxToMetaAndBlocks opts docx)
+ Right docx -> (Pandoc meta blks, mediaBag) where
+ (meta, blks, mediaBag) = (docxToOutput opts docx)
Left _ -> error $ "couldn't parse docx file"
data DState = DState { docxAnchorMap :: M.Map String String
+ , docxMediaBag :: MediaBag
, docxInHeaderBlock :: Bool
, docxInTexSubscript :: Bool }
+defaultDState :: DState
+defaultDState = DState { docxAnchorMap = M.empty
+ , docxMediaBag = M.empty
+ , docxInHeaderBlock = False
+ , docxInTexSubscript = False}
+
data DEnv = DEnv { docxOptions :: ReaderOptions
, docxDocument :: Docx}
@@ -319,13 +322,6 @@ runToInlines (Footnote bps) =
runToInlines (Endnote bps) =
concatMapM bodyPartToBlocks bps >>= (\blks -> return [Note blks])
-makeDataUrl :: String -> B.ByteString -> Maybe String
-makeDataUrl fp bs =
- case getMimeType fp of
- Just mime -> Just $ "data:" ++ mime ++ ";base64," ++
- toString (encode $ BS.concat $ B.toChunks bs)
- Nothing -> Nothing
-
parPartToInlines :: ParPart -> DocxContext [Inline]
parPartToInlines (PlainRun r) = runToInlines r
parPartToInlines (Insertion _ author date runs) = do
@@ -372,11 +368,9 @@ parPartToInlines (BookMark _ anchor) =
modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap}
return [Span (newAnchor, ["anchor"], []) []]
parPartToInlines (Drawing fp bs) = do
- return $ case True of -- TODO: add self-contained images
- True -> [Image [] (fp, "")]
- False -> case makeDataUrl fp bs of
- Just d -> [Image [] (d, "")]
- Nothing -> [Image [] ("", "")]
+ mediaBag <- gets docxMediaBag
+ modify $ \s -> s { docxMediaBag = M.insert fp bs mediaBag}
+ return [Image [] (fp, "")]
parPartToInlines (InternalHyperLink anchor runs) = do
ils <- concatMapM runToInlines runs
return [Link ils ('#' : anchor, "")]
@@ -675,26 +669,25 @@ rewriteLink l@(Link ils ('#':target, title)) = do
Nothing -> l
rewriteLink il = return il
-bodyToMetaAndBlocks :: Body -> DocxContext (Meta, [Block])
-bodyToMetaAndBlocks (Body bps) = do
+bodyToOutput :: Body -> DocxContext (Meta, [Block], MediaBag)
+bodyToOutput (Body bps) = do
let (metabps, blkbps) = sepBodyParts bps
meta <- bodyPartsToMeta metabps
blks <- concatMapM bodyPartToBlocks blkbps >>=
walkM rewriteLink
- return $
- (meta,
- blocksToDefinitions $
- blocksToBullets $ blks)
-
-docxToMetaAndBlocks :: ReaderOptions -> Docx -> (Meta, [Block])
-docxToMetaAndBlocks opts d@(Docx (Document _ body)) =
- let dState = DState { docxAnchorMap = M.empty
- , docxInHeaderBlock = False
- , docxInTexSubscript = False}
+ mediaBag <- gets docxMediaBag
+ return $ (meta,
+ blocksToDefinitions $ blocksToBullets $ blks,
+ mediaBag)
+
+docxToOutput :: ReaderOptions -> Docx -> (Meta, [Block], MediaBag)
+docxToOutput opts d@(Docx (Document _ body)) =
+ let dState = defaultDState
dEnv = DEnv { docxOptions = opts
, docxDocument = d}
in
- evalDocxContext (bodyToMetaAndBlocks body) dEnv dState
+ evalDocxContext (bodyToOutput body) dEnv dState
+
ilToCode :: Inline -> String
ilToCode (Str s) = s
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 8541a1a3a..71938afe0 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -779,11 +779,11 @@ expandDrawingId :: String -> D ParPart
expandDrawingId s = do
target <- asks (lookupRelationship s . envRelationships)
case target of
- Just t -> do let filepath = combine "word" t
- bytes <- asks (lookup filepath . envMedia)
- case bytes of
- Just bs -> return $ Drawing filepath bs
- Nothing -> throwError DocxError
+ Just filepath -> do
+ bytes <- asks (lookup (combine "word" filepath) . envMedia)
+ case bytes of
+ Just bs -> return $ Drawing filepath bs
+ Nothing -> throwError DocxError
Nothing -> throwError DocxError
elemToParPart :: NameSpaces -> Element -> D ParPart
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 77180bd4b..0282eadf6 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -48,6 +48,8 @@ module Text.Pandoc.Shared (
toRomanNumeral,
escapeURI,
tabFilter,
+ -- * Media Handling
+ MediaBag,
-- * Date/time
normalizeDate,
-- * Pandoc block and inline list processing
@@ -114,6 +116,7 @@ import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
renderOptions)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as B8
+import qualified Data.ByteString.Lazy as BL
import Text.Pandoc.Compat.Monoid
import Data.ByteString.Base64 (decodeLenient)
import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
@@ -280,6 +283,14 @@ tabFilter tabStop =
x : go (spsToNextStop - 1) xs
in go tabStop
+---
+--- Media handling
+---
+
+-- | A map of media paths to their binary representations.
+
+type MediaBag = M.Map String BL.ByteString
+
--
-- Date/time
--
diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs
index b1a966969..e8dd6b72e 100644
--- a/tests/Tests/Readers/Docx.hs
+++ b/tests/Tests/Readers/Docx.hs
@@ -37,7 +37,8 @@ compareOutput :: ReaderOptions
compareOutput opts docxFile nativeFile = do
df <- B.readFile docxFile
nf <- Prelude.readFile nativeFile
- return $ (noNorm (readDocx opts df), noNorm (readNative nf))
+ let (p, _) = readDocx opts df
+ return $ (noNorm p, noNorm (readNative nf))
testCompareWithOptsIO :: ReaderOptions -> String -> FilePath -> FilePath -> IO Test
testCompareWithOptsIO opts name docxFile nativeFile = do
diff --git a/tests/docx.image_no_embed.native b/tests/docx.image_no_embed.native
index 063958bc7..aa0f65d27 100644
--- a/tests/docx.image_no_embed.native
+++ b/tests/docx.image_no_embed.native
@@ -1,2 +1,2 @@
[Header 2 ("an-image",[],[]) [Str "An",Space,Str "image"]
-,Para [Image [] ("word/media/image1.jpeg","")]]
+,Para [Image [] ("media/image1.jpeg","")]]