aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2015-05-27 09:39:05 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2015-05-27 09:39:05 -0700
commitdbf2a63669bb40e3ecfcd4ae4317d495d50ec0a2 (patch)
treefe6ef5112855e66d0f5f92756bec118c0a3400a2 /src
parent1daa26468cd221c390cfd47dd48eb65e8b425433 (diff)
downloadpandoc-dbf2a63669bb40e3ecfcd4ae4317d495d50ec0a2.tar.gz
EPUB writer: Improved chapter splitting and internal link rewriting.
Closes #1887. Closes #2163. Closes #2162.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs87
1 files changed, 34 insertions, 53 deletions
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 4ce7857ac..cfbec7bfa 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -56,10 +56,10 @@ import Text.Pandoc.Options ( WriterOptions(..)
, EPUBVersion(..)
, ObfuscationMethod(NoObfuscation) )
import Text.Pandoc.Definition
-import Text.Pandoc.Walk (walk, walkM)
+import Text.Pandoc.Walk (walk, walkM, query)
import Data.Default
import Text.Pandoc.Writers.Markdown (writePlain)
-import Control.Monad.State (modify, get, execState, State, put, evalState)
+import Control.Monad.State (modify, get, State, put, evalState)
import Control.Monad (mplus, liftM, when)
import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs
, strContent, lookupAttr, Node(..), QName(..), parseXML
@@ -408,11 +408,6 @@ writeEPUB opts doc@(Pandoc meta _) = do
(docTitle' meta) : blocks
let chapterHeaderLevel = writerEpubChapterLevel opts
- -- internal reference IDs change when we chunk the file,
- -- so that '#my-header-1' might turn into 'chap004.xhtml#my-header'.
- -- the next two lines fix that:
- let reftable = correlateRefs chapterHeaderLevel blocks'
- let blocks'' = replaceRefs reftable blocks'
let isChapterHeader (Header n _ _) = n <= chapterHeaderLevel
isChapterHeader (Div ("",["references"],[]) (Header n _ _:_)) =
@@ -443,7 +438,37 @@ writeEPUB opts doc@(Pandoc meta _) = do
let (xs,ys) = break isChapterHeader bs
(Chapter Nothing (b:xs) :) `fmap` toChapters ys
- let chapters = evalState (toChapters blocks'') []
+ let chapters' = evalState (toChapters blocks') []
+
+ let extractLinkURL' :: Int -> Inline -> [(String, String)]
+ extractLinkURL' num (Span (ident, _, _) _)
+ | not (null ident) = [(ident, showChapter num ++ ('#':ident))]
+ extractLinkURL' _ _ = []
+
+ let extractLinkURL :: Int -> Block -> [(String, String)]
+ extractLinkURL num (Div (ident, _, _) _)
+ | not (null ident) = [(ident, showChapter num ++ ('#':ident))]
+ extractLinkURL num (Header _ (ident, _, _) _)
+ | not (null ident) = [(ident, showChapter num ++ ('#':ident))]
+ extractLinkURL num b = query (extractLinkURL' num) b
+
+ let reftable = concat $ zipWith (\(Chapter _ bs) num ->
+ query (extractLinkURL num) bs)
+ chapters' [1..]
+
+ let fixInternalReferences :: Inline -> Inline
+ fixInternalReferences (Link lab ('#':xs, tit)) =
+ case lookup xs reftable of
+ Just ys -> Link lab (ys, tit)
+ Nothing -> Link lab ('#':xs, tit)
+ fixInternalReferences x = x
+
+ -- internal reference IDs change when we chunk the file,
+ -- so that '#my-header-1' might turn into 'chap004.xhtml#my-header'.
+ -- this fixes that:
+ let chapters = map (\(Chapter mbnum bs) ->
+ Chapter mbnum $ walk fixInternalReferences bs)
+ chapters'
let chapToEntry :: Int -> Chapter -> Entry
chapToEntry num (Chapter mbnum bs) = mkEntry (showChapter num)
@@ -549,7 +574,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
let contentsEntry = mkEntry "content.opf" contentsData
-- toc.ncx
- let secs = hierarchicalize blocks''
+ let secs = hierarchicalize blocks'
let tocLevel = writerTOCDepth opts
@@ -889,11 +914,6 @@ mediaTypeOf x =
Just y | any (`isPrefixOf` y) mediaPrefixes -> Just y
_ -> Nothing
-data IdentState = IdentState{
- chapterNumber :: Int,
- identTable :: [(String,String)]
- } deriving (Read, Show)
-
-- Returns filename for chapter number.
showChapter :: Int -> String
showChapter = printf "ch%03d.xhtml"
@@ -910,45 +930,6 @@ addIdentifiers bs = evalState (mapM go bs) []
return $ Header n (ident',classes,kvs) ils
go x = return x
--- Go through a block list and construct a table
--- correlating the automatically constructed references
--- that would be used in a normal pandoc document with
--- new URLs to be used in the EPUB. For example, what
--- was "header-1" might turn into "ch006.xhtml#header".
-correlateRefs :: Int -> [Block] -> [(String,String)]
-correlateRefs chapterHeaderLevel bs =
- identTable $ execState (walkM goBlock bs >>= walkM goInline)
- IdentState{ chapterNumber = 0
- , identTable = [] }
- where goBlock :: Block -> State IdentState Block
- goBlock x@(Header n (ident,_,_) _) = x <$ addIdentifier (Just n) ident
- goBlock x@(Div (ident,_,_) _) = x <$ addIdentifier Nothing ident
- goBlock x = return x
- goInline :: Inline -> State IdentState Inline
- goInline x@(Span (ident,_,_) _) = x <$ addIdentifier Nothing ident
- goInline x = return x
- addIdentifier mbHeaderLevel ident = do
- case mbHeaderLevel of
- Just n | n <= chapterHeaderLevel ->
- modify $ \s -> s{ chapterNumber = chapterNumber s + 1 }
- _ -> return ()
- st <- get
- let chapterid = showChapter (chapterNumber st) ++
- case mbHeaderLevel of
- Just n | n <= chapterHeaderLevel -> ""
- _ -> '#' : ident
- modify $ \s -> s{ identTable = (ident, chapterid) : identTable st }
-
--- Replace internal link references using the table produced
--- by correlateRefs.
-replaceRefs :: [(String,String)] -> [Block] -> [Block]
-replaceRefs refTable = walk replaceOneRef
- where replaceOneRef x@(Link lab ('#':xs,tit)) =
- case lookup xs refTable of
- Just url -> Link lab (url,tit)
- Nothing -> x
- replaceOneRef x = x
-
-- Variant of normalizeDate that allows partial dates: YYYY, YYYY-MM
normalizeDate' :: String -> Maybe String
normalizeDate' xs =