diff options
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r-- | src/Text/Pandoc/MediaBag.hs | 107 | ||||
-rw-r--r-- | src/Text/Pandoc/Options.hs | 6 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx.hs | 9 | ||||
-rw-r--r-- | src/Text/Pandoc/SelfContained.hs | 26 | ||||
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 22 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/EPUB.hs | 3 |
6 files changed, 137 insertions, 36 deletions
diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs new file mode 100644 index 000000000..667089f55 --- /dev/null +++ b/src/Text/Pandoc/MediaBag.hs @@ -0,0 +1,107 @@ +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{- +Copyright (C) 2014 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.MediaBag + Copyright : Copyright (C) 2014 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Definition of a MediaBag object to hold binary resources, and an +interface for interacting with it. +-} +module Text.Pandoc.MediaBag ( + MediaBag, + lookupMedia, + insertMedia, + mediaDirectory, + extractMediaBag + ) where +import System.FilePath +import System.Directory (createDirectoryIfMissing) +import qualified Data.Map as M +import qualified Data.ByteString.Lazy as BL +import Data.Monoid (Monoid) +import Control.Monad (when, MonadPlus(..)) +import Text.Pandoc.MIME (getMimeType) +import qualified Text.Pandoc.UTF8 as UTF8 +import Data.Maybe (fromMaybe) +import System.IO (stderr) + +-- | A container for a collection of binary resources, with names and +-- mime types. Note that a 'MediaBag' is a Monoid, so 'mempty' +-- can be used for an empty 'MediaBag', and '<>' can be used to append +-- two 'MediaBag's. +newtype MediaBag = MediaBag (M.Map String (String, BL.ByteString)) + deriving (Monoid) + +instance Show MediaBag where + show bag = "MediaBag " ++ show (mediaDirectory bag) + +-- | Insert a media item into a 'MediaBag', replacing any existing +-- value with the same name. +insertMedia :: FilePath -- ^ relative path and canonical name of resource + -> Maybe String -- ^ mime type (Nothing = determine from extension) + -> BL.ByteString -- ^ contents of resource + -> MediaBag + -> MediaBag +insertMedia fp mbMime contents (MediaBag mediamap) = + MediaBag (M.insert fp (mime, contents) mediamap) + where mime = fromMaybe "application/octet-stream" (mbMime `mplus` fallback) + fallback = case takeExtension fp of + ".gz" -> getMimeType $ dropExtension fp + _ -> getMimeType fp + +-- | Lookup a media item in a 'MediaBag', returning mime type and contents. +lookupMedia :: FilePath + -> MediaBag + -> Maybe (String, BL.ByteString) +lookupMedia fp (MediaBag mediamap) = M.lookup fp mediamap + +-- | Get a list of the file paths stored in a 'MediaBag', with +-- their corresponding mime types and the lengths in bytes of the contents. +mediaDirectory :: MediaBag -> [(String, String, Int)] +mediaDirectory (MediaBag mediamap) = + M.foldWithKey (\fp (mime,contents) -> + ((fp, mime, fromIntegral $ BL.length contents):)) [] mediamap + +-- | Extract contents of MediaBag to a given directory. Print informational +-- messages if 'verbose' is true. +extractMediaBag :: Bool + -> FilePath + -> MediaBag + -> IO () +extractMediaBag verbose dir (MediaBag mediamap) = do + sequence_ $ M.foldWithKey + (\fp (_ ,contents) -> + ((writeMedia verbose dir (fp, contents)):)) [] mediamap + +writeMedia :: Bool -> FilePath -> (FilePath, BL.ByteString) -> IO () +writeMedia verbose dir (subpath, bs) = do + -- we join and split to convert a/b/c to a\b\c on Windows; + -- in zip containers all paths use / + let fullpath = dir </> joinPath (splitPath subpath) + createDirectoryIfMissing True $ takeDirectory fullpath + when verbose $ UTF8.hPutStrLn stderr $ "pandoc: extracting " ++ fullpath + BL.writeFile fullpath bs + + diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 2de1a9e35..85a6a3096 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -49,8 +49,8 @@ import Data.Set (Set) import qualified Data.Set as Set import Data.Default import Text.Pandoc.Highlighting (Style, pygments) -import Text.Pandoc.Shared (MediaBag) -import qualified Data.Map as M +import Text.Pandoc.MediaBag (MediaBag) +import Data.Monoid -- | Individually selectable syntax extensions. data Extension = @@ -359,7 +359,7 @@ instance Default WriterOptions where , writerTOCDepth = 3 , writerReferenceODT = Nothing , writerReferenceDocx = Nothing - , writerMediaBag = M.empty + , writerMediaBag = mempty } -- | Returns True if the given extension is enabled. diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index f5fb6565a..86ce62ced 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -84,8 +84,10 @@ import Text.Pandoc.Readers.Docx.Lists import Text.Pandoc.Readers.Docx.Reducible import Text.Pandoc.Readers.Docx.TexChar import Text.Pandoc.Shared +import Text.Pandoc.MediaBag (insertMedia, MediaBag) import Data.Maybe (mapMaybe, fromMaybe) import Data.List (delete, isPrefixOf, (\\), intercalate, intersect) +import Data.Monoid import qualified Data.ByteString.Lazy as B import qualified Data.Map as M import Control.Monad.Reader @@ -108,7 +110,7 @@ data DState = DState { docxAnchorMap :: M.Map String String defaultDState :: DState defaultDState = DState { docxAnchorMap = M.empty - , docxMediaBag = M.empty + , docxMediaBag = mempty , docxInHeaderBlock = False , docxInTexSubscript = False} @@ -217,7 +219,8 @@ runStyleToContainers rPr = , if isStrike rPr then (Just Strikeout) else Nothing , if isSuperScript rPr then (Just Superscript) else Nothing , if isSubScript rPr then (Just Subscript) else Nothing - , rUnderline rPr >>= (\f -> Just $ Span ("", [], [("underline", f)])) + , rUnderline rPr >>= + (\f -> if f == "single" then (Just Emph) else Nothing) ] in classContainers ++ formatters @@ -369,7 +372,7 @@ parPartToInlines (BookMark _ anchor) = return [Span (newAnchor, ["anchor"], []) []] parPartToInlines (Drawing fp bs) = do mediaBag <- gets docxMediaBag - modify $ \s -> s { docxMediaBag = M.insert fp bs mediaBag} + modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag } return [Image [] (fp, "")] parPartToInlines (InternalHyperLink anchor runs) = do ils <- concatMapM runToInlines runs diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index 777da3551..adb2c0014 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -41,11 +41,11 @@ import System.FilePath (takeExtension, dropExtension, takeDirectory, (</>)) import Data.Char (toLower, isAscii, isAlphaNum) import Codec.Compression.GZip as Gzip import qualified Data.ByteString.Lazy as L -import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err, MediaBag) +import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err) +import Text.Pandoc.MediaBag (MediaBag, lookupMedia) import Text.Pandoc.UTF8 (toString, fromString) import Text.Pandoc.MIME (getMimeType) import System.Directory (doesFileExist) -import qualified Data.Map as M isOk :: Char -> Bool isOk c = isAscii c && isAlphaNum c @@ -110,16 +110,20 @@ getItem media userdata f = -- this is needed for things like cmunrm.eot?#iefix, -- which is used to get old versions of IE to work with web fonts. let f' = takeWhile (\c -> c /= '?' && c /= '#') f - let mime = case takeExtension f' of - ".gz" -> getMimeType $ dropExtension f' - x -> getMimeType x + let mbMime = case takeExtension f' of + ".gz" -> getMimeType $ dropExtension f' + x -> getMimeType x exists <- doesFileExist f' - cont <- if exists - then B.readFile f' - else case M.lookup f media of - Just bs -> return $ BS.concat $ L.toChunks bs - Nothing -> readDataFile userdata f' - return (cont, mime) + if exists + then do + cont <- B.readFile f' + return (cont, mbMime) + else case lookupMedia f media of + Just (mime,bs) -> return (BS.concat $ L.toChunks bs, + Just mime) + Nothing -> do + cont <- readDataFile userdata f' + return (cont, mbMime) where handleErr e = err 61 $ "Failed to retrieve " ++ f ++ "\n" ++ show e getRaw :: MediaBag -> Maybe FilePath -> String -> String diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 81aa6cf5a..d5769c1ab 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -48,8 +48,6 @@ module Text.Pandoc.Shared ( toRomanNumeral, escapeURI, tabFilter, - -- * Media Handling - MediaBag, -- * Date/time normalizeDate, -- * Pandoc block and inline list processing @@ -92,6 +90,7 @@ module Text.Pandoc.Shared ( import Text.Pandoc.Definition import Text.Pandoc.Walk +import Text.Pandoc.MediaBag (MediaBag, lookupMedia) import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..)) import qualified Text.Pandoc.Builder as B import qualified Text.Pandoc.UTF8 as UTF8 @@ -106,7 +105,7 @@ import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo, import qualified Data.Set as Set import System.Directory import Text.Pandoc.MIME (getMimeType) -import System.FilePath ( (</>), takeExtension, dropExtension ) +import System.FilePath ( (</>), takeExtension, dropExtension) import Data.Generics (Typeable, Data) import qualified Control.Monad.State as S import qualified Control.Exception as E @@ -120,7 +119,6 @@ import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..), renderOptions) import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as B8 -import qualified Data.ByteString.Lazy as BL import Text.Pandoc.Compat.Monoid import Data.ByteString.Base64 (decodeLenient) import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr) @@ -287,14 +285,6 @@ tabFilter tabStop = x : go (spsToNextStop - 1) xs in go tabStop ---- ---- Media handling ---- - --- | A map of media paths to their binary representations. - -type MediaBag = M.Map String BL.ByteString - -- -- Date/time -- @@ -796,13 +786,9 @@ fetchItem sourceURL s fetchItem' :: MediaBag -> Maybe String -> String -> IO (Either E.SomeException (BS.ByteString, Maybe String)) fetchItem' media sourceURL s = do - case M.lookup s media of + case lookupMedia s media of Nothing -> fetchItem sourceURL s - Just bs -> do - let mime = case takeExtension s of - ".gz" -> getMimeType $ dropExtension s - x -> getMimeType x - return $ Right (BS.concat $ toChunks bs, mime) + Just (mime, bs) -> return $ Right (BS.concat $ toChunks bs, Just mime) -- | Read from a URL and return raw data and maybe mime type. openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe String)) diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 1f222b8b8..770b6f244 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -61,6 +61,7 @@ import Text.Pandoc.MIME (getMimeType) import qualified Control.Exception as E import Text.Blaze.Html.Renderer.Utf8 (renderHtml) import Text.HTML.TagSoup +import Data.Monoid -- A Chapter includes a list of blocks and maybe a section -- number offset. Note, some chapters are unnumbered. The section @@ -793,7 +794,7 @@ transformInline opts mediaRef (Image lab (src,tit)) = do return $ Image lab (newsrc, tit) transformInline opts _ (x@(Math _ _)) | WebTeX _ <- writerHTMLMathMethod opts = do - raw <- makeSelfContained M.empty Nothing $ writeHtmlInline opts x + raw <- makeSelfContained mempty Nothing $ writeHtmlInline opts x return $ RawInline (Format "html") raw transformInline opts mediaRef (RawInline fmt raw) | fmt == Format "html" = do |