aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pandoc.cabal4
-rw-r--r--pandoc.hs152
-rw-r--r--tests/Tests/Readers/Docx.hs59
-rw-r--r--tests/docx.image.docxbin109656 -> 36942 bytes
-rw-r--r--tests/docx.image1.jpegbin46626 -> 0 bytes
-rw-r--r--tests/docx.image_no_embed.native4
6 files changed, 124 insertions, 95 deletions
diff --git a/pandoc.cabal b/pandoc.cabal
index 47bdb6587..1c74933ab 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -386,7 +386,6 @@ Test-Suite test-pandoc
syb >= 0.1 && < 0.5,
pandoc,
pandoc-types >= 1.12.3.3 && < 1.13,
- base64-bytestring >= 0.1 && < 1.1,
bytestring >= 0.9 && < 0.11,
text >= 0.11 && < 1.2,
directory >= 1 && < 1.3,
@@ -400,7 +399,8 @@ Test-Suite test-pandoc
QuickCheck >= 2.4 && < 2.8,
HUnit >= 1.2 && < 1.3,
containers >= 0.1 && < 0.6,
- ansi-terminal >= 0.5 && < 0.7
+ ansi-terminal >= 0.5 && < 0.7,
+ zip-archive >= 0.2.3.2 && < 0.3
Other-Modules: Tests.Old
Tests.Helpers
Tests.Arbitrary
diff --git a/pandoc.hs b/pandoc.hs
index 2563afc9c..70f192086 100644
--- a/pandoc.hs
+++ b/pandoc.hs
@@ -38,13 +38,13 @@ import Text.Pandoc.Readers.LaTeX (handleIncludes)
import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8, readDataFile,
safeRead, headerShift, normalize, err, warn,
openURL )
-import Text.Pandoc.MediaBag ( mediaDirectory, extractMediaBag )
+import Text.Pandoc.MediaBag ( mediaDirectory, extractMediaBag, MediaBag )
import Text.Pandoc.XML ( toEntities )
import Text.Pandoc.SelfContained ( makeSelfContained )
import Text.Pandoc.Process (pipeProcess)
import Text.Highlighting.Kate ( languages, Style, tango, pygments,
espresso, zenburn, kate, haddock, monochrome )
-import System.Environment ( getArgs, getProgName )
+import System.Environment ( getArgs, getProgName, getEnvironment )
import System.Exit ( exitWith, ExitCode (..) )
import System.FilePath
import System.Console.GetOpt
@@ -71,6 +71,8 @@ import Control.Applicative ((<$>))
import Text.Pandoc.Readers.Txt2Tags (getT2TMeta)
import Data.Monoid
+type Transform = Pandoc -> Pandoc
+
copyrightMessage :: String
copyrightMessage = "\nCopyright (C) 2006-2014 John MacFarlane\n" ++
"Web: http://johnmacfarlane.net/pandoc\n" ++
@@ -101,7 +103,10 @@ isTextFormat s = takeWhile (`notElem` "+-") s `notElem` ["odt","docx","epub","ep
externalFilter :: FilePath -> [String] -> Pandoc -> IO Pandoc
externalFilter f args' d = do
- mbexe <- if '/' `elem` f -- don't check PATH if filter name it has a path
+ mbPath <- lookup "PATH" <$> getEnvironment
+ mbexe <- if '/' `elem` f || mbPath == Nothing
+ -- don't check PATH if filter name has a path, or
+ -- if the PATH is not set
then return Nothing
else findExecutable f
(f', args'') <- case mbexe of
@@ -141,7 +146,7 @@ data Opt = Opt
, optWriter :: String -- ^ Writer format
, optParseRaw :: Bool -- ^ Parse unconvertable HTML and TeX
, optTableOfContents :: Bool -- ^ Include table of contents
- , optTransforms :: [Pandoc -> Pandoc] -- ^ Doc transforms to apply
+ , optTransforms :: [Transform] -- ^ Doc transforms to apply
, optTemplate :: Maybe FilePath -- ^ Custom template
, optVariables :: [(String,String)] -- ^ Template variables to set
, optMetadata :: M.Map String MetaValue -- ^ Metadata fields to set
@@ -934,6 +939,31 @@ defaultWriterName x =
['.',y] | y `elem` ['1'..'9'] -> "man"
_ -> "html"
+-- Transformations of a Pandoc document post-parsing:
+
+extractMedia :: MediaBag -> FilePath -> Pandoc -> IO Pandoc
+extractMedia media dir d =
+ case [fp | (fp, _, _) <- mediaDirectory media] of
+ [] -> return d
+ fps -> do
+ extractMediaBag True dir media
+ return $ walk (adjustImagePath dir fps) d
+
+adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline
+adjustImagePath dir paths (Image lab (src, tit))
+ | src `elem` paths = Image lab (dir ++ "/" ++ src, tit)
+adjustImagePath _ _ x = x
+
+adjustMetadata :: M.Map String MetaValue -> Pandoc -> IO Pandoc
+adjustMetadata metadata d = return $ M.foldWithKey setMeta d metadata
+
+applyTransforms :: [Transform] -> Pandoc -> IO Pandoc
+applyTransforms transforms d = return $ foldr ($) d transforms
+
+applyFilters :: [FilePath] -> [String] -> Pandoc -> IO Pandoc
+applyFilters filters args d =
+ foldrM ($) d $ map (flip externalFilter args) filters
+
main :: IO ()
main = do
@@ -1026,7 +1056,6 @@ main = do
all (\f -> takeBaseName f /= "pandoc-citeproc")
filters -> "pandoc-citeproc" : filters
_ -> filters
- let plugins = map externalFilter filters'
let sources = if ignoreArgs then [] else args
@@ -1143,6 +1172,40 @@ main = do
, readerTrackChanges = trackChanges
}
+ when (not (isTextFormat writerName') && outputFile == "-") $
+ err 5 $ "Cannot write " ++ writerName' ++ " output to stdout.\n" ++
+ "Specify an output file using the -o option."
+
+ let readSources [] = mapM readSource ["-"]
+ readSources srcs = mapM readSource srcs
+ readSource "-" = UTF8.getContents
+ readSource src = case parseURI src of
+ Just u | uriScheme u `elem` ["http:","https:"] ->
+ readURI src
+ _ -> UTF8.readFile src
+ readURI src = do
+ res <- openURL src
+ case res of
+ Left e -> throwIO e
+ Right (bs,_) -> return $ UTF8.toString bs
+
+ let readFiles [] = error "Cannot read archive from stdin"
+ readFiles (x:_) = B.readFile x
+
+ let convertTabs = tabFilter (if (preserveTabs || readerName' == "t2t") then 0 else tabStop)
+
+ let handleIncludes' = if readerName' == "latex" || readerName' == "latex+lhs"
+ then handleIncludes
+ else return
+
+ (doc, media) <-
+ case reader of
+ StringReader r-> (, mempty) <$>
+ ( readSources >=>
+ handleIncludes' . convertTabs . intercalate "\n" >=>
+ r readerOpts ) sources
+ ByteStringReader r -> readFiles sources >>= r readerOpts
+
let writerOptions = def { writerStandalone = standalone',
writerTemplate = templ,
writerVariables = variables'',
@@ -1178,70 +1241,15 @@ main = do
writerEpubChapterLevel = epubChapterLevel,
writerTOCDepth = epubTOCDepth,
writerReferenceODT = referenceODT,
- writerReferenceDocx = referenceDocx
+ writerReferenceDocx = referenceDocx,
+ writerMediaBag = media
}
- when (not (isTextFormat writerName') && outputFile == "-") $
- err 5 $ "Cannot write " ++ writerName' ++ " output to stdout.\n" ++
- "Specify an output file using the -o option."
-
- let readSources [] = mapM readSource ["-"]
- readSources srcs = mapM readSource srcs
- readSource "-" = UTF8.getContents
- readSource src = case parseURI src of
- Just u | uriScheme u `elem` ["http:","https:"] ->
- readURI src
- _ -> UTF8.readFile src
- readURI src = do
- res <- openURL src
- case res of
- Left e -> throwIO e
- Right (bs,_) -> return $ UTF8.toString bs
-
- let readFiles [] = error "Cannot read archive from stdin"
- readFiles (x:_) = B.readFile x
-
- let convertTabs = tabFilter (if (preserveTabs || readerName' == "t2t") then 0 else tabStop)
-
- let handleIncludes' = if readerName' == "latex" || readerName' == "latex+lhs"
- then handleIncludes
- else return
-
- let adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline
- adjustImagePath dir paths (Image lab (src, tit))
- | src `elem` paths = Image lab (dir ++ "/" ++ src, tit)
- adjustImagePath _ _ x = x
-
- (doc, media) <-
- case reader of
- StringReader r-> (, mempty) <$>
- ( readSources >=>
- handleIncludes' . convertTabs . intercalate "\n" >=>
- r readerOpts ) sources
- ByteStringReader r -> readFiles sources >>= r readerOpts
-
- let writerOptions' = writerOptions{ writerMediaBag = media }
-
- let extractMedia d = do
- case mbExtractMedia of
- Just dir -> do
- case [fp | (fp, _, _) <- mediaDirectory media] of
- [] -> return d
- fps -> do
- extractMediaBag True dir media
- return $ walk (adjustImagePath dir fps) d
- _ -> return d
-
- let adjustMetadata d = return $ M.foldWithKey setMeta d metadata
-
- let applyTransforms d = return $ foldr ($) d transforms
-
- let applyPlugins d = foldrM ($) d $ map ($ [writerName']) plugins
- doc' <- (extractMedia >=>
- adjustMetadata >=>
- applyTransforms >=>
- applyPlugins) doc
+ doc' <- (maybe return (extractMedia media) mbExtractMedia >=>
+ adjustMetadata metadata >=>
+ applyTransforms transforms >=>
+ applyFilters filters' [writerName']) doc
let writeBinary :: B.ByteString -> IO ()
writeBinary = B.writeFile (UTF8.encodePath outputFile)
@@ -1251,8 +1259,8 @@ main = do
writerFn f = UTF8.writeFile f
case writer of
- IOStringWriter f -> f writerOptions' doc' >>= writerFn outputFile
- IOByteStringWriter f -> f writerOptions' doc' >>= writeBinary
+ IOStringWriter f -> f writerOptions doc' >>= writerFn outputFile
+ IOByteStringWriter f -> f writerOptions doc' >>= writeBinary
PureStringWriter f
| pdfOutput -> do
-- make sure writer is latex or beamer
@@ -1266,14 +1274,14 @@ main = do
err 41 $ latexEngine ++ " not found. " ++
latexEngine ++ " is needed for pdf output."
- res <- makePDF latexEngine f writerOptions' doc'
+ res <- makePDF latexEngine f writerOptions doc'
case res of
Right pdf -> writeBinary pdf
Left err' -> do
B.hPutStr stderr $ err'
B.hPut stderr $ B.pack [10]
err 43 "Error producing PDF from TeX source"
- | otherwise -> selfcontain (f writerOptions' doc' ++
+ | otherwise -> selfcontain (f writerOptions doc' ++
['\n' | not standalone'])
>>= writerFn outputFile . handleEntities
where htmlFormat = writerName' `elem`
@@ -1281,8 +1289,8 @@ main = do
"s5","slidy","slideous","dzslides","revealjs"]
selfcontain = if selfContained && htmlFormat
then makeSelfContained
- (writerMediaBag writerOptions')
- (writerUserDataDir writerOptions')
+ (writerMediaBag writerOptions)
+ (writerUserDataDir writerOptions)
else return
handleEntities = if htmlFormat && ascii
then toEntities
diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs
index 85a02debd..efc520dba 100644
--- a/tests/Tests/Readers/Docx.hs
+++ b/tests/Tests/Readers/Docx.hs
@@ -5,14 +5,15 @@ import Text.Pandoc.Readers.Native
import Text.Pandoc.Definition
import Tests.Helpers
import Test.Framework
-import qualified Data.ByteString as BS
+import Test.HUnit (assertBool)
+import Test.Framework.Providers.HUnit
import qualified Data.ByteString.Lazy as B
-import qualified Data.ByteString.Char8 as B8
-import qualified Data.ByteString.Base64 as B64
import Text.Pandoc.Readers.Docx
import Text.Pandoc.Writers.Native (writeNative)
import qualified Data.Map as M
-import Text.Pandoc.MediaBag (lookupMedia)
+import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory)
+import Codec.Archive.Zip
+import System.FilePath (combine)
-- We define a wrapper around pandoc that doesn't normalize in the
-- tests. Since we do our own normalization, we want to make sure
@@ -56,22 +57,44 @@ testCompareWithOpts opts name docxFile nativeFile =
testCompare :: String -> FilePath -> FilePath -> Test
testCompare = testCompareWithOpts def
-testCompareMediaIO :: String -> FilePath -> FilePath -> FilePath -> IO Test
-testCompareMediaIO name docxFile mediaPath mediaFile = do
+getMedia :: FilePath -> FilePath -> IO (Maybe B.ByteString)
+getMedia archivePath mediaPath = do
+ zf <- B.readFile archivePath >>= return . toArchive
+ return $ findEntryByPath (combine "word" mediaPath) zf >>= (Just . fromEntry)
+
+compareMediaPathIO :: FilePath -> MediaBag -> FilePath -> IO Bool
+compareMediaPathIO mediaPath mediaBag docxPath = do
+ docxMedia <- getMedia docxPath mediaPath
+ let mbBS = case lookupMedia mediaPath mediaBag of
+ Just (_, bs) -> bs
+ Nothing -> error ("couldn't find " ++
+ mediaPath ++
+ " in media bag")
+ docxBS = case docxMedia of
+ Just bs -> bs
+ Nothing -> error ("couldn't find " ++
+ mediaPath ++
+ " in media bag")
+ return $ mbBS == docxBS
+
+compareMediaBagIO :: FilePath -> IO Bool
+compareMediaBagIO docxFile = do
df <- B.readFile docxFile
- mf <- B.readFile mediaFile
let (_, mb) = readDocx def df
- dBytes = case lookupMedia mediaPath mb of
- Just (_,bs) -> bs
- Nothing -> error "Media file not found"
- d64 = B8.unpack $ B64.encode $ BS.concat $ B.toChunks dBytes
- m64 = B8.unpack $ B64.encode $ BS.concat $ B.toChunks mf
- return $ test id name (d64, m64)
+ bools <- mapM
+ (\(fp, _, _) -> compareMediaPathIO fp mb docxFile)
+ (mediaDirectory mb)
+ return $ and bools
-testCompareMedia :: String -> FilePath -> FilePath -> FilePath -> Test
-testCompareMedia name docxFile mediaPath mediaFile =
- buildTest $ testCompareMediaIO name docxFile mediaPath mediaFile
+testMediaBagIO :: String -> FilePath -> IO Test
+testMediaBagIO name docxFile = do
+ outcome <- compareMediaBagIO docxFile
+ return $ testCase name (assertBool
+ ("Media didn't match media bag in file " ++ docxFile)
+ outcome)
+testMediaBag :: String -> FilePath -> Test
+testMediaBag name docxFile = buildTest $ testMediaBagIO name docxFile
tests :: [Test]
tests = [ testGroup "inlines"
@@ -186,11 +209,9 @@ tests = [ testGroup "inlines"
"docx.track_changes_deletion_all.native"
]
, testGroup "media"
- [ testCompareMedia
+ [ testMediaBag
"image extraction"
"docx.image.docx"
- "media/image1.jpeg"
- "docx.image1.jpeg"
]
, testGroup "metadata"
[ testCompareWithOpts def{readerStandalone=True}
diff --git a/tests/docx.image.docx b/tests/docx.image.docx
index 060f2b204..06e4efd1a 100644
--- a/tests/docx.image.docx
+++ b/tests/docx.image.docx
Binary files differ
diff --git a/tests/docx.image1.jpeg b/tests/docx.image1.jpeg
deleted file mode 100644
index 423dff48b..000000000
--- a/tests/docx.image1.jpeg
+++ /dev/null
Binary files differ
diff --git a/tests/docx.image_no_embed.native b/tests/docx.image_no_embed.native
index aa0f65d27..95c73610e 100644
--- a/tests/docx.image_no_embed.native
+++ b/tests/docx.image_no_embed.native
@@ -1,2 +1,2 @@
-[Header 2 ("an-image",[],[]) [Str "An",Space,Str "image"]
-,Para [Image [] ("media/image1.jpeg","")]]
+[Para [Str "An",Space,Str "image:"]
+,Para [Image [] ("media/image1.jpg","")]]