aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-07-30 10:55:29 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2014-07-30 10:55:29 -0700
commitb12d2ea20a8b914ed2d6ee111eee5a25ad989597 (patch)
treeccf89e4668affc8ee0bf16089f6eadc2c3a427ad /src/Text
parent02c79ea4f6c050e9e610825e8b462382beae2b5a (diff)
parentf733b50150c3e7bf445d0d7c71e1f34e62ddf61a (diff)
downloadpandoc-b12d2ea20a8b914ed2d6ee111eee5a25ad989597.tar.gz
Merge pull request #1468 from jkr/mediabag
Mediabag
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc.hs6
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs57
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs10
-rw-r--r--src/Text/Pandoc/Shared.hs11
4 files changed, 44 insertions, 40 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index be34641a9..11553383c 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -160,7 +160,7 @@ import Text.Pandoc.Writers.Haddock
import Text.Pandoc.Writers.Custom
import Text.Pandoc.Templates
import Text.Pandoc.Options
-import Text.Pandoc.Shared (safeRead, warn)
+import Text.Pandoc.Shared (safeRead, warn, MediaBag)
import Data.Aeson
import qualified Data.ByteString.Lazy as BL
import Data.List (intercalate)
@@ -204,12 +204,12 @@ markdown o s = do
return doc
data Reader = StringReader (ReaderOptions -> String -> IO Pandoc)
- | ByteStringReader (ReaderOptions -> BL.ByteString -> IO Pandoc)
+ | ByteStringReader (ReaderOptions -> BL.ByteString -> IO (Pandoc, MediaBag))
mkStringReader :: (ReaderOptions -> String -> Pandoc) -> Reader
mkStringReader r = StringReader (\o s -> return $ r o s)
-mkBSReader :: (ReaderOptions -> BL.ByteString -> Pandoc) -> Reader
+mkBSReader :: (ReaderOptions -> BL.ByteString -> (Pandoc, MediaBag)) -> Reader
mkBSReader r = ByteStringReader (\o s -> return $ r o s)
-- | Association list of formats and readers.
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 48a23dd3c..f5fb6565a 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -78,8 +78,6 @@ import Codec.Archive.Zip
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Builder (text, toList)
-import Text.Pandoc.MIME (getMimeType)
-import Text.Pandoc.UTF8 (toString)
import Text.Pandoc.Walk
import Text.Pandoc.Readers.Docx.Parse
import Text.Pandoc.Readers.Docx.Lists
@@ -88,9 +86,7 @@ import Text.Pandoc.Readers.Docx.TexChar
import Text.Pandoc.Shared
import Data.Maybe (mapMaybe, fromMaybe)
import Data.List (delete, isPrefixOf, (\\), intercalate, intersect)
-import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as B
-import Data.ByteString.Base64 (encode)
import qualified Data.Map as M
import Control.Monad.Reader
import Control.Monad.State
@@ -98,17 +94,24 @@ import Text.Printf (printf)
readDocx :: ReaderOptions
-> B.ByteString
- -> Pandoc
+ -> (Pandoc, MediaBag)
readDocx opts bytes =
case archiveToDocx (toArchive bytes) of
- Right docx -> Pandoc meta blks where
- (meta, blks) = (docxToMetaAndBlocks opts docx)
+ Right docx -> (Pandoc meta blks, mediaBag) where
+ (meta, blks, mediaBag) = (docxToOutput opts docx)
Left _ -> error $ "couldn't parse docx file"
data DState = DState { docxAnchorMap :: M.Map String String
+ , docxMediaBag :: MediaBag
, docxInHeaderBlock :: Bool
, docxInTexSubscript :: Bool }
+defaultDState :: DState
+defaultDState = DState { docxAnchorMap = M.empty
+ , docxMediaBag = M.empty
+ , docxInHeaderBlock = False
+ , docxInTexSubscript = False}
+
data DEnv = DEnv { docxOptions :: ReaderOptions
, docxDocument :: Docx}
@@ -319,13 +322,6 @@ runToInlines (Footnote bps) =
runToInlines (Endnote bps) =
concatMapM bodyPartToBlocks bps >>= (\blks -> return [Note blks])
-makeDataUrl :: String -> B.ByteString -> Maybe String
-makeDataUrl fp bs =
- case getMimeType fp of
- Just mime -> Just $ "data:" ++ mime ++ ";base64," ++
- toString (encode $ BS.concat $ B.toChunks bs)
- Nothing -> Nothing
-
parPartToInlines :: ParPart -> DocxContext [Inline]
parPartToInlines (PlainRun r) = runToInlines r
parPartToInlines (Insertion _ author date runs) = do
@@ -372,11 +368,9 @@ parPartToInlines (BookMark _ anchor) =
modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap}
return [Span (newAnchor, ["anchor"], []) []]
parPartToInlines (Drawing fp bs) = do
- return $ case True of -- TODO: add self-contained images
- True -> [Image [] (fp, "")]
- False -> case makeDataUrl fp bs of
- Just d -> [Image [] (d, "")]
- Nothing -> [Image [] ("", "")]
+ mediaBag <- gets docxMediaBag
+ modify $ \s -> s { docxMediaBag = M.insert fp bs mediaBag}
+ return [Image [] (fp, "")]
parPartToInlines (InternalHyperLink anchor runs) = do
ils <- concatMapM runToInlines runs
return [Link ils ('#' : anchor, "")]
@@ -675,26 +669,25 @@ rewriteLink l@(Link ils ('#':target, title)) = do
Nothing -> l
rewriteLink il = return il
-bodyToMetaAndBlocks :: Body -> DocxContext (Meta, [Block])
-bodyToMetaAndBlocks (Body bps) = do
+bodyToOutput :: Body -> DocxContext (Meta, [Block], MediaBag)
+bodyToOutput (Body bps) = do
let (metabps, blkbps) = sepBodyParts bps
meta <- bodyPartsToMeta metabps
blks <- concatMapM bodyPartToBlocks blkbps >>=
walkM rewriteLink
- return $
- (meta,
- blocksToDefinitions $
- blocksToBullets $ blks)
-
-docxToMetaAndBlocks :: ReaderOptions -> Docx -> (Meta, [Block])
-docxToMetaAndBlocks opts d@(Docx (Document _ body)) =
- let dState = DState { docxAnchorMap = M.empty
- , docxInHeaderBlock = False
- , docxInTexSubscript = False}
+ mediaBag <- gets docxMediaBag
+ return $ (meta,
+ blocksToDefinitions $ blocksToBullets $ blks,
+ mediaBag)
+
+docxToOutput :: ReaderOptions -> Docx -> (Meta, [Block], MediaBag)
+docxToOutput opts d@(Docx (Document _ body)) =
+ let dState = defaultDState
dEnv = DEnv { docxOptions = opts
, docxDocument = d}
in
- evalDocxContext (bodyToMetaAndBlocks body) dEnv dState
+ evalDocxContext (bodyToOutput body) dEnv dState
+
ilToCode :: Inline -> String
ilToCode (Str s) = s
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 8541a1a3a..71938afe0 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -779,11 +779,11 @@ expandDrawingId :: String -> D ParPart
expandDrawingId s = do
target <- asks (lookupRelationship s . envRelationships)
case target of
- Just t -> do let filepath = combine "word" t
- bytes <- asks (lookup filepath . envMedia)
- case bytes of
- Just bs -> return $ Drawing filepath bs
- Nothing -> throwError DocxError
+ Just filepath -> do
+ bytes <- asks (lookup (combine "word" filepath) . envMedia)
+ case bytes of
+ Just bs -> return $ Drawing filepath bs
+ Nothing -> throwError DocxError
Nothing -> throwError DocxError
elemToParPart :: NameSpaces -> Element -> D ParPart
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 77180bd4b..0282eadf6 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -48,6 +48,8 @@ module Text.Pandoc.Shared (
toRomanNumeral,
escapeURI,
tabFilter,
+ -- * Media Handling
+ MediaBag,
-- * Date/time
normalizeDate,
-- * Pandoc block and inline list processing
@@ -114,6 +116,7 @@ import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
renderOptions)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as B8
+import qualified Data.ByteString.Lazy as BL
import Text.Pandoc.Compat.Monoid
import Data.ByteString.Base64 (decodeLenient)
import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
@@ -280,6 +283,14 @@ tabFilter tabStop =
x : go (spsToNextStop - 1) xs
in go tabStop
+---
+--- Media handling
+---
+
+-- | A map of media paths to their binary representations.
+
+type MediaBag = M.Map String BL.ByteString
+
--
-- Date/time
--