Introduce pure versions of IO Writers.

Using Text.Pandoc.Free, introduce pure versions of Docx, EPUB, ICML, and ODT writers. Each of the pure versions is exported along with the IO version (produced by running `runIO` on the pure reader). Ideally, this should make the writers easier to test.
author: Jesse Rosenthal <jrosenthal@jhu.edu> 2016-09-24 17:52:25 -0400
committer: John MacFarlane <jgm@berkeley.edu> 2017-01-25 17:07:39 +0100
commit: 32c68dada92eb142949c5be5224a3ddf20fcf484 (patch)
tree: 41ba1aaf202d0f6093218ab1ceadaf3b159c5a83 /src/Text/Pandoc/Writers/EPUB.hs
parent: 0ab4af2f03f4226714a39c959c161def679d9d57 (diff)
download: pandoc-32c68dada92eb142949c5be5224a3ddf20fcf484.tar.gz
1 files changed, 44 insertions, 37 deletions
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 00bf4a81c..4a93d52e2 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -28,26 +28,23 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 Conversion of 'Pandoc' documents to EPUB.
 -}
-module Text.Pandoc.Writers.EPUB ( writeEPUB ) where
-import Data.IORef ( IORef, newIORef, readIORef, modifyIORef )
+module Text.Pandoc.Writers.EPUB ( writeEPUB, writeEPUBPure ) where
+import Data.IORef ( IORef )
 import qualified Data.Map as M
 import qualified Data.Set as Set
 import Data.Maybe ( fromMaybe, catMaybes )
 import Data.List ( isPrefixOf, isInfixOf, intercalate )
-import System.Environment ( getEnv )
 import Text.Printf (printf)
 import System.FilePath ( takeExtension, takeFileName )
-import System.FilePath.Glob ( namesMatching )
 import Network.HTTP ( urlEncode )
 import qualified Data.ByteString.Lazy as B
 import qualified Data.ByteString.Lazy.Char8 as B8
 import qualified Text.Pandoc.UTF8 as UTF8
 import Codec.Archive.Zip ( emptyArchive, addEntryToArchive, eRelativePath, fromEntry                         , Entry, toEntry, fromArchive)
-import Data.Time.Clock.POSIX ( getPOSIXTime )
 import Text.Pandoc.Compat.Time
 import Text.Pandoc.Shared ( renderTags', safeRead, uniqueIdent, trim
-                          , normalizeDate, readDataFile, stringify, warn
-                          , hierarchicalize, fetchItem' )
+                          , normalizeDate, stringify
+                          , hierarchicalize )
 import qualified Text.Pandoc.Shared as S (Element(..))
 import Text.Pandoc.Builder (fromList, setMeta)
 import Text.Pandoc.Options ( WriterOptions(..)
@@ -58,17 +55,19 @@ import Text.Pandoc.Options ( WriterOptions(..)
 import Text.Pandoc.Definition
 import Text.Pandoc.Walk (walk, walkM, query)
 import Control.Monad.State (modify, get, State, put, evalState)
-import Control.Monad (mplus, liftM, when)
+import Control.Monad (mplus, when)
 import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs
                       , strContent, lookupAttr, Node(..), QName(..), parseXML
                       , onlyElems, node, ppElement)
-import Text.Pandoc.UUID (getRandomUUID)
 import Text.Pandoc.Writers.HTML ( writeHtml )
 import Data.Char ( toLower, isDigit, isAlphaNum )
 import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType)
-import qualified Control.Exception as E
 import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
 import Text.HTML.TagSoup (Tag(TagOpen), fromAttrib, parseTags)
+import Text.Pandoc.Free (PandocAction, runIO)
+import qualified Text.Pandoc.Free as P
+
+type EPUBAction = PandocAction [(FilePath, (FilePath, Maybe Entry))]
 
 -- A Chapter includes a list of blocks and maybe a section
 -- number offset.  Note, some chapters are unnumbered. The section
@@ -143,7 +142,7 @@ removeNote :: Inline -> Inline
 removeNote (Note _) = Str ""
 removeNote x        = x
 
-getEPUBMetadata :: WriterOptions -> Meta -> IO EPUBMetadata
+getEPUBMetadata :: WriterOptions -> Meta -> EPUBAction EPUBMetadata
 getEPUBMetadata opts meta = do
   let md = metadataFromMeta opts meta
   let elts = onlyElems $ parseXML $ writerEpubMetadata opts
@@ -151,7 +150,7 @@ getEPUBMetadata opts meta = do
   let addIdentifier m =
        if null (epubIdentifier m)
           then do
-            randomId <- fmap show getRandomUUID
+            randomId <- fmap show P.newUUID
             return $ m{ epubIdentifier = [Identifier randomId Nothing] }
           else return m
   let addLanguage m =
@@ -159,16 +158,19 @@ getEPUBMetadata opts meta = do
           then case lookup "lang" (writerVariables opts) of
                      Just x  -> return m{ epubLanguage = x }
                      Nothing -> do
-                       localeLang <- E.catch (liftM
-                            (map (\c -> if c == '_' then '-' else c) .
-                             takeWhile (/='.')) $ getEnv "LANG")
-                          (\e -> let _ = (e :: E.SomeException) in return "en-US")
+                       mLang <- P.lookupEnv "LANG"
+                       let localeLang =
+                             case mLang of
+                               Just lang ->
+                                 map (\c -> if c == '_' then '-' else c) $
+                                 takeWhile (/='.') lang
+                               Nothing -> "en-US"
                        return m{ epubLanguage = localeLang }
           else return m
   let fixDate m =
        if null (epubDate m)
           then do
-            currentTime <- getCurrentTime
+            currentTime <- P.getCurrentTime
             return $ m{ epubDate = [ Date{
                              dateText = showDateTimeISO8601 currentTime
                            , dateEvent = Nothing } ] }
@@ -333,10 +335,15 @@ metadataFromMeta opts meta = EPUBMetadata{
 writeEPUB :: WriterOptions  -- ^ Writer options
           -> Pandoc         -- ^ Document to convert
           -> IO B.ByteString
-writeEPUB opts doc@(Pandoc meta _) = do
+writeEPUB opts doc = runIO $ writeEPUBPure opts doc
+
+writeEPUBPure :: WriterOptions  -- ^ Writer options
+              -> Pandoc         -- ^ Document to convert
+              -> EPUBAction B.ByteString
+writeEPUBPure opts doc@(Pandoc meta _) = do
   let version = fromMaybe EPUB2 (writerEpubVersion opts)
   let epub3 = version == EPUB3
-  epochtime <- floor `fmap` getPOSIXTime
+  epochtime <- floor <$> P.getPOSIXTime
   let mkEntry path content = toEntry path epochtime content
   let vars = ("epub3", if epub3 then "true" else "false")
            : ("css", "stylesheet.css")
@@ -361,7 +368,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
                        let cpContent = renderHtml $ writeHtml
                             opts'{ writerVariables = ("coverpage","true"):vars }
                             (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"])
-                       imgContent <- B.readFile img
+                       imgContent <- P.readFileLazy img
                        return ( [mkEntry "cover.xhtml" cpContent]
                               , [mkEntry coverImage imgContent] )
 
@@ -372,18 +379,18 @@ writeEPUB opts doc@(Pandoc meta _) = do
   let tpEntry = mkEntry "title_page.xhtml" tpContent
 
   -- handle pictures
-  mediaRef <- newIORef []
+  mediaRef <- P.newIORef []
   Pandoc _ blocks <- walkM (transformInline opts' mediaRef) doc >>=
                      walkM (transformBlock opts' mediaRef)
-  picEntries <- (catMaybes . map (snd . snd)) <$> readIORef mediaRef
+  picEntries <- (catMaybes . map (snd . snd)) <$> P.readIORef mediaRef
 
   -- handle fonts
   let matchingGlob f = do
-        xs <- namesMatching f
+        xs <- P.namesMatching f
         when (null xs) $
-          warn $ f ++ " did not match any font files."
+          P.warn $ f ++ " did not match any font files."
         return xs
-  let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f
+  let mkFontEntry f = mkEntry (takeFileName f) `fmap` P.readFileLazy f
   fontFiles <- concat <$> mapM matchingGlob (writerEpubFonts opts')
   fontEntries <- mapM mkFontEntry fontFiles
 
@@ -520,7 +527,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
   let uuid = case epubIdentifier metadata of
                   (x:_) -> identifierText x  -- use first identifier as UUID
                   []    -> error "epubIdentifier is null"  -- shouldn't happen
-  currentTime <- getCurrentTime
+  currentTime <- P.getCurrentTime
   let contentsData = UTF8.fromStringLazy $ ppTopElement $
         unode "package" ! [("version", case version of
                                              EPUB2 -> "2.0"
@@ -692,10 +699,10 @@ writeEPUB opts doc@(Pandoc meta _) = do
 
   -- stylesheet
   stylesheet <- case epubStylesheet metadata of
-                   Just (StylesheetPath fp)    -> UTF8.readFile fp
+                   Just (StylesheetPath fp)    -> P.readFileUTF8 fp
                    Just (StylesheetContents s) -> return s
                    Nothing -> UTF8.toString `fmap`
-                              readDataFile (writerUserDataDir opts) "epub.css"
+                              P.readDataFile (writerUserDataDir opts) "epub.css"
   let stylesheetEntry = mkEntry "stylesheet.css" $ UTF8.fromStringLazy stylesheet
 
   -- construct archive
@@ -814,7 +821,7 @@ showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ"
 transformTag :: WriterOptions
              -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media
              -> Tag String
-             -> IO (Tag String)
+             -> EPUBAction (Tag String)
 transformTag opts mediaRef tag@(TagOpen name attr)
   | name `elem` ["video", "source", "img", "audio"] &&
     lookup "data-external" attr == Nothing = do
@@ -831,34 +838,34 @@ transformTag _ _ tag = return tag
 modifyMediaRef :: WriterOptions
                -> IORef [(FilePath, (FilePath, Maybe Entry))]
                -> FilePath
-               -> IO FilePath
+               -> EPUBAction FilePath
 modifyMediaRef _ _ "" = return ""
 modifyMediaRef opts mediaRef oldsrc = do
-  media <- readIORef mediaRef
+  media <- P.readIORef mediaRef
   case lookup oldsrc media of
          Just (n,_) -> return n
          Nothing    -> do
-           res <- fetchItem' (writerMediaBag opts)
+           res <- P.fetchItem' (writerMediaBag opts)
                     (writerSourceURL opts) oldsrc
            (new, mbEntry) <-
                 case res of
                       Left _        -> do
-                        warn $ "Could not find media `" ++ oldsrc ++ "', skipping..."
+                        P.warn $ "Could not find media `" ++ oldsrc ++ "', skipping..."
                         return (oldsrc, Nothing)
                       Right (img,mbMime) -> do
                         let new = "media/file" ++ show (length media) ++
                                fromMaybe (takeExtension (takeWhile (/='?') oldsrc))
                                  (('.':) <$> (mbMime >>= extensionFromMimeType))
-                        epochtime <- floor `fmap` getPOSIXTime
+                        epochtime <- floor `fmap` P.getPOSIXTime
                         let entry = toEntry new epochtime $ B.fromChunks . (:[]) $ img
                         return (new, Just entry)
-           modifyIORef mediaRef ( (oldsrc, (new, mbEntry)): )
+           P.modifyIORef mediaRef ( (oldsrc, (new, mbEntry)): )
            return new
 
 transformBlock  :: WriterOptions
                 -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media
                 -> Block
-                -> IO Block
+                -> EPUBAction Block
 transformBlock opts mediaRef (RawBlock fmt raw)
   | fmt == Format "html" = do
   let tags = parseTags raw
@@ -869,7 +876,7 @@ transformBlock _ _ b = return b
 transformInline  :: WriterOptions
                  -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath) media
                  -> Inline
-                 -> IO Inline
+                 -> EPUBAction Inline
 transformInline opts mediaRef (Image attr lab (src,tit)) = do
     newsrc <- modifyMediaRef opts mediaRef src
     return $ Image attr lab (newsrc, tit)
author	Jesse Rosenthal <jrosenthal@jhu.edu>	2016-09-24 17:52:25 -0400
committer	John MacFarlane <jgm@berkeley.edu>	2017-01-25 17:07:39 +0100
commit	32c68dada92eb142949c5be5224a3ddf20fcf484 (patch)
tree	41ba1aaf202d0f6093218ab1ceadaf3b159c5a83 /src/Text/Pandoc/Writers/EPUB.hs
parent	0ab4af2f03f4226714a39c959c161def679d9d57 (diff)
download	pandoc-32c68dada92eb142949c5be5224a3ddf20fcf484.tar.gz