aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/MediaBag.hs107
-rw-r--r--src/Text/Pandoc/Options.hs6
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs9
-rw-r--r--src/Text/Pandoc/SelfContained.hs26
-rw-r--r--src/Text/Pandoc/Shared.hs22
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs3
6 files changed, 137 insertions, 36 deletions
diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs
new file mode 100644
index 000000000..667089f55
--- /dev/null
+++ b/src/Text/Pandoc/MediaBag.hs
@@ -0,0 +1,107 @@
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-
+Copyright (C) 2014 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.MediaBag
+ Copyright : Copyright (C) 2014 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Definition of a MediaBag object to hold binary resources, and an
+interface for interacting with it.
+-}
+module Text.Pandoc.MediaBag (
+ MediaBag,
+ lookupMedia,
+ insertMedia,
+ mediaDirectory,
+ extractMediaBag
+ ) where
+import System.FilePath
+import System.Directory (createDirectoryIfMissing)
+import qualified Data.Map as M
+import qualified Data.ByteString.Lazy as BL
+import Data.Monoid (Monoid)
+import Control.Monad (when, MonadPlus(..))
+import Text.Pandoc.MIME (getMimeType)
+import qualified Text.Pandoc.UTF8 as UTF8
+import Data.Maybe (fromMaybe)
+import System.IO (stderr)
+
+-- | A container for a collection of binary resources, with names and
+-- mime types. Note that a 'MediaBag' is a Monoid, so 'mempty'
+-- can be used for an empty 'MediaBag', and '<>' can be used to append
+-- two 'MediaBag's.
+newtype MediaBag = MediaBag (M.Map String (String, BL.ByteString))
+ deriving (Monoid)
+
+instance Show MediaBag where
+ show bag = "MediaBag " ++ show (mediaDirectory bag)
+
+-- | Insert a media item into a 'MediaBag', replacing any existing
+-- value with the same name.
+insertMedia :: FilePath -- ^ relative path and canonical name of resource
+ -> Maybe String -- ^ mime type (Nothing = determine from extension)
+ -> BL.ByteString -- ^ contents of resource
+ -> MediaBag
+ -> MediaBag
+insertMedia fp mbMime contents (MediaBag mediamap) =
+ MediaBag (M.insert fp (mime, contents) mediamap)
+ where mime = fromMaybe "application/octet-stream" (mbMime `mplus` fallback)
+ fallback = case takeExtension fp of
+ ".gz" -> getMimeType $ dropExtension fp
+ _ -> getMimeType fp
+
+-- | Lookup a media item in a 'MediaBag', returning mime type and contents.
+lookupMedia :: FilePath
+ -> MediaBag
+ -> Maybe (String, BL.ByteString)
+lookupMedia fp (MediaBag mediamap) = M.lookup fp mediamap
+
+-- | Get a list of the file paths stored in a 'MediaBag', with
+-- their corresponding mime types and the lengths in bytes of the contents.
+mediaDirectory :: MediaBag -> [(String, String, Int)]
+mediaDirectory (MediaBag mediamap) =
+ M.foldWithKey (\fp (mime,contents) ->
+ ((fp, mime, fromIntegral $ BL.length contents):)) [] mediamap
+
+-- | Extract contents of MediaBag to a given directory. Print informational
+-- messages if 'verbose' is true.
+extractMediaBag :: Bool
+ -> FilePath
+ -> MediaBag
+ -> IO ()
+extractMediaBag verbose dir (MediaBag mediamap) = do
+ sequence_ $ M.foldWithKey
+ (\fp (_ ,contents) ->
+ ((writeMedia verbose dir (fp, contents)):)) [] mediamap
+
+writeMedia :: Bool -> FilePath -> (FilePath, BL.ByteString) -> IO ()
+writeMedia verbose dir (subpath, bs) = do
+ -- we join and split to convert a/b/c to a\b\c on Windows;
+ -- in zip containers all paths use /
+ let fullpath = dir </> joinPath (splitPath subpath)
+ createDirectoryIfMissing True $ takeDirectory fullpath
+ when verbose $ UTF8.hPutStrLn stderr $ "pandoc: extracting " ++ fullpath
+ BL.writeFile fullpath bs
+
+
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index 2de1a9e35..85a6a3096 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -49,8 +49,8 @@ import Data.Set (Set)
import qualified Data.Set as Set
import Data.Default
import Text.Pandoc.Highlighting (Style, pygments)
-import Text.Pandoc.Shared (MediaBag)
-import qualified Data.Map as M
+import Text.Pandoc.MediaBag (MediaBag)
+import Data.Monoid
-- | Individually selectable syntax extensions.
data Extension =
@@ -359,7 +359,7 @@ instance Default WriterOptions where
, writerTOCDepth = 3
, writerReferenceODT = Nothing
, writerReferenceDocx = Nothing
- , writerMediaBag = M.empty
+ , writerMediaBag = mempty
}
-- | Returns True if the given extension is enabled.
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index f5fb6565a..86ce62ced 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -84,8 +84,10 @@ import Text.Pandoc.Readers.Docx.Lists
import Text.Pandoc.Readers.Docx.Reducible
import Text.Pandoc.Readers.Docx.TexChar
import Text.Pandoc.Shared
+import Text.Pandoc.MediaBag (insertMedia, MediaBag)
import Data.Maybe (mapMaybe, fromMaybe)
import Data.List (delete, isPrefixOf, (\\), intercalate, intersect)
+import Data.Monoid
import qualified Data.ByteString.Lazy as B
import qualified Data.Map as M
import Control.Monad.Reader
@@ -108,7 +110,7 @@ data DState = DState { docxAnchorMap :: M.Map String String
defaultDState :: DState
defaultDState = DState { docxAnchorMap = M.empty
- , docxMediaBag = M.empty
+ , docxMediaBag = mempty
, docxInHeaderBlock = False
, docxInTexSubscript = False}
@@ -217,7 +219,8 @@ runStyleToContainers rPr =
, if isStrike rPr then (Just Strikeout) else Nothing
, if isSuperScript rPr then (Just Superscript) else Nothing
, if isSubScript rPr then (Just Subscript) else Nothing
- , rUnderline rPr >>= (\f -> Just $ Span ("", [], [("underline", f)]))
+ , rUnderline rPr >>=
+ (\f -> if f == "single" then (Just Emph) else Nothing)
]
in
classContainers ++ formatters
@@ -369,7 +372,7 @@ parPartToInlines (BookMark _ anchor) =
return [Span (newAnchor, ["anchor"], []) []]
parPartToInlines (Drawing fp bs) = do
mediaBag <- gets docxMediaBag
- modify $ \s -> s { docxMediaBag = M.insert fp bs mediaBag}
+ modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag }
return [Image [] (fp, "")]
parPartToInlines (InternalHyperLink anchor runs) = do
ils <- concatMapM runToInlines runs
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
index 777da3551..adb2c0014 100644
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -41,11 +41,11 @@ import System.FilePath (takeExtension, dropExtension, takeDirectory, (</>))
import Data.Char (toLower, isAscii, isAlphaNum)
import Codec.Compression.GZip as Gzip
import qualified Data.ByteString.Lazy as L
-import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err, MediaBag)
+import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err)
+import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
import Text.Pandoc.UTF8 (toString, fromString)
import Text.Pandoc.MIME (getMimeType)
import System.Directory (doesFileExist)
-import qualified Data.Map as M
isOk :: Char -> Bool
isOk c = isAscii c && isAlphaNum c
@@ -110,16 +110,20 @@ getItem media userdata f =
-- this is needed for things like cmunrm.eot?#iefix,
-- which is used to get old versions of IE to work with web fonts.
let f' = takeWhile (\c -> c /= '?' && c /= '#') f
- let mime = case takeExtension f' of
- ".gz" -> getMimeType $ dropExtension f'
- x -> getMimeType x
+ let mbMime = case takeExtension f' of
+ ".gz" -> getMimeType $ dropExtension f'
+ x -> getMimeType x
exists <- doesFileExist f'
- cont <- if exists
- then B.readFile f'
- else case M.lookup f media of
- Just bs -> return $ BS.concat $ L.toChunks bs
- Nothing -> readDataFile userdata f'
- return (cont, mime)
+ if exists
+ then do
+ cont <- B.readFile f'
+ return (cont, mbMime)
+ else case lookupMedia f media of
+ Just (mime,bs) -> return (BS.concat $ L.toChunks bs,
+ Just mime)
+ Nothing -> do
+ cont <- readDataFile userdata f'
+ return (cont, mbMime)
where handleErr e = err 61 $ "Failed to retrieve " ++ f ++ "\n" ++ show e
getRaw :: MediaBag -> Maybe FilePath -> String -> String
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 81aa6cf5a..d5769c1ab 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -48,8 +48,6 @@ module Text.Pandoc.Shared (
toRomanNumeral,
escapeURI,
tabFilter,
- -- * Media Handling
- MediaBag,
-- * Date/time
normalizeDate,
-- * Pandoc block and inline list processing
@@ -92,6 +90,7 @@ module Text.Pandoc.Shared (
import Text.Pandoc.Definition
import Text.Pandoc.Walk
+import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..))
import qualified Text.Pandoc.Builder as B
import qualified Text.Pandoc.UTF8 as UTF8
@@ -106,7 +105,7 @@ import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo,
import qualified Data.Set as Set
import System.Directory
import Text.Pandoc.MIME (getMimeType)
-import System.FilePath ( (</>), takeExtension, dropExtension )
+import System.FilePath ( (</>), takeExtension, dropExtension)
import Data.Generics (Typeable, Data)
import qualified Control.Monad.State as S
import qualified Control.Exception as E
@@ -120,7 +119,6 @@ import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
renderOptions)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as B8
-import qualified Data.ByteString.Lazy as BL
import Text.Pandoc.Compat.Monoid
import Data.ByteString.Base64 (decodeLenient)
import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
@@ -287,14 +285,6 @@ tabFilter tabStop =
x : go (spsToNextStop - 1) xs
in go tabStop
----
---- Media handling
----
-
--- | A map of media paths to their binary representations.
-
-type MediaBag = M.Map String BL.ByteString
-
--
-- Date/time
--
@@ -796,13 +786,9 @@ fetchItem sourceURL s
fetchItem' :: MediaBag -> Maybe String -> String
-> IO (Either E.SomeException (BS.ByteString, Maybe String))
fetchItem' media sourceURL s = do
- case M.lookup s media of
+ case lookupMedia s media of
Nothing -> fetchItem sourceURL s
- Just bs -> do
- let mime = case takeExtension s of
- ".gz" -> getMimeType $ dropExtension s
- x -> getMimeType x
- return $ Right (BS.concat $ toChunks bs, mime)
+ Just (mime, bs) -> return $ Right (BS.concat $ toChunks bs, Just mime)
-- | Read from a URL and return raw data and maybe mime type.
openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe String))
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 1f222b8b8..770b6f244 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -61,6 +61,7 @@ import Text.Pandoc.MIME (getMimeType)
import qualified Control.Exception as E
import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
import Text.HTML.TagSoup
+import Data.Monoid
-- A Chapter includes a list of blocks and maybe a section
-- number offset. Note, some chapters are unnumbered. The section
@@ -793,7 +794,7 @@ transformInline opts mediaRef (Image lab (src,tit)) = do
return $ Image lab (newsrc, tit)
transformInline opts _ (x@(Math _ _))
| WebTeX _ <- writerHTMLMathMethod opts = do
- raw <- makeSelfContained M.empty Nothing $ writeHtmlInline opts x
+ raw <- makeSelfContained mempty Nothing $ writeHtmlInline opts x
return $ RawInline (Format "html") raw
transformInline opts mediaRef (RawInline fmt raw)
| fmt == Format "html" = do