diff options
author | John MacFarlane <jgm@berkeley.edu> | 2014-07-30 10:55:29 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2014-07-30 10:55:29 -0700 |
commit | b12d2ea20a8b914ed2d6ee111eee5a25ad989597 (patch) | |
tree | ccf89e4668affc8ee0bf16089f6eadc2c3a427ad /src/Text/Pandoc/Readers/Docx.hs | |
parent | 02c79ea4f6c050e9e610825e8b462382beae2b5a (diff) | |
parent | f733b50150c3e7bf445d0d7c71e1f34e62ddf61a (diff) | |
download | pandoc-b12d2ea20a8b914ed2d6ee111eee5a25ad989597.tar.gz |
Merge pull request #1468 from jkr/mediabag
Mediabag
Diffstat (limited to 'src/Text/Pandoc/Readers/Docx.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/Docx.hs | 57 |
1 files changed, 25 insertions, 32 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 48a23dd3c..f5fb6565a 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -78,8 +78,6 @@ import Codec.Archive.Zip import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Builder (text, toList) -import Text.Pandoc.MIME (getMimeType) -import Text.Pandoc.UTF8 (toString) import Text.Pandoc.Walk import Text.Pandoc.Readers.Docx.Parse import Text.Pandoc.Readers.Docx.Lists @@ -88,9 +86,7 @@ import Text.Pandoc.Readers.Docx.TexChar import Text.Pandoc.Shared import Data.Maybe (mapMaybe, fromMaybe) import Data.List (delete, isPrefixOf, (\\), intercalate, intersect) -import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as B -import Data.ByteString.Base64 (encode) import qualified Data.Map as M import Control.Monad.Reader import Control.Monad.State @@ -98,17 +94,24 @@ import Text.Printf (printf) readDocx :: ReaderOptions -> B.ByteString - -> Pandoc + -> (Pandoc, MediaBag) readDocx opts bytes = case archiveToDocx (toArchive bytes) of - Right docx -> Pandoc meta blks where - (meta, blks) = (docxToMetaAndBlocks opts docx) + Right docx -> (Pandoc meta blks, mediaBag) where + (meta, blks, mediaBag) = (docxToOutput opts docx) Left _ -> error $ "couldn't parse docx file" data DState = DState { docxAnchorMap :: M.Map String String + , docxMediaBag :: MediaBag , docxInHeaderBlock :: Bool , docxInTexSubscript :: Bool } +defaultDState :: DState +defaultDState = DState { docxAnchorMap = M.empty + , docxMediaBag = M.empty + , docxInHeaderBlock = False + , docxInTexSubscript = False} + data DEnv = DEnv { docxOptions :: ReaderOptions , docxDocument :: Docx} @@ -319,13 +322,6 @@ runToInlines (Footnote bps) = runToInlines (Endnote bps) = concatMapM bodyPartToBlocks bps >>= (\blks -> return [Note blks]) -makeDataUrl :: String -> B.ByteString -> Maybe String -makeDataUrl fp bs = - case getMimeType fp of - Just mime -> Just $ "data:" ++ mime ++ ";base64," ++ - toString (encode $ BS.concat $ B.toChunks bs) - Nothing -> Nothing - parPartToInlines :: ParPart -> DocxContext [Inline] parPartToInlines (PlainRun r) = runToInlines r parPartToInlines (Insertion _ author date runs) = do @@ -372,11 +368,9 @@ parPartToInlines (BookMark _ anchor) = modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap} return [Span (newAnchor, ["anchor"], []) []] parPartToInlines (Drawing fp bs) = do - return $ case True of -- TODO: add self-contained images - True -> [Image [] (fp, "")] - False -> case makeDataUrl fp bs of - Just d -> [Image [] (d, "")] - Nothing -> [Image [] ("", "")] + mediaBag <- gets docxMediaBag + modify $ \s -> s { docxMediaBag = M.insert fp bs mediaBag} + return [Image [] (fp, "")] parPartToInlines (InternalHyperLink anchor runs) = do ils <- concatMapM runToInlines runs return [Link ils ('#' : anchor, "")] @@ -675,26 +669,25 @@ rewriteLink l@(Link ils ('#':target, title)) = do Nothing -> l rewriteLink il = return il -bodyToMetaAndBlocks :: Body -> DocxContext (Meta, [Block]) -bodyToMetaAndBlocks (Body bps) = do +bodyToOutput :: Body -> DocxContext (Meta, [Block], MediaBag) +bodyToOutput (Body bps) = do let (metabps, blkbps) = sepBodyParts bps meta <- bodyPartsToMeta metabps blks <- concatMapM bodyPartToBlocks blkbps >>= walkM rewriteLink - return $ - (meta, - blocksToDefinitions $ - blocksToBullets $ blks) - -docxToMetaAndBlocks :: ReaderOptions -> Docx -> (Meta, [Block]) -docxToMetaAndBlocks opts d@(Docx (Document _ body)) = - let dState = DState { docxAnchorMap = M.empty - , docxInHeaderBlock = False - , docxInTexSubscript = False} + mediaBag <- gets docxMediaBag + return $ (meta, + blocksToDefinitions $ blocksToBullets $ blks, + mediaBag) + +docxToOutput :: ReaderOptions -> Docx -> (Meta, [Block], MediaBag) +docxToOutput opts d@(Docx (Document _ body)) = + let dState = defaultDState dEnv = DEnv { docxOptions = opts , docxDocument = d} in - evalDocxContext (bodyToMetaAndBlocks body) dEnv dState + evalDocxContext (bodyToOutput body) dEnv dState + ilToCode :: Inline -> String ilToCode (Str s) = s |