aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/ImageSize.hs233
-rw-r--r--src/Text/Pandoc/MIME.hs4
-rw-r--r--src/Text/Pandoc/Options.hs2
-rw-r--r--src/Text/Pandoc/PDF.hs29
-rw-r--r--src/Text/Pandoc/Parsing.hs49
-rw-r--r--src/Text/Pandoc/Pretty.hs14
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs11
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs24
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs11
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs125
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs30
-rw-r--r--src/Text/Pandoc/Readers/Org.hs552
-rw-r--r--src/Text/Pandoc/Readers/RST.hs116
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs2
-rw-r--r--src/Text/Pandoc/SelfContained.hs18
-rw-r--r--src/Text/Pandoc/Shared.hs32
-rw-r--r--src/Text/Pandoc/Templates.hs1
-rw-r--r--src/Text/Pandoc/Writers/ConTeXt.hs8
-rw-r--r--src/Text/Pandoc/Writers/Docx.hs47
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs814
-rw-r--r--src/Text/Pandoc/Writers/FB2.hs6
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs49
-rw-r--r--src/Text/Pandoc/Writers/ICML.hs525
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs84
-rw-r--r--src/Text/Pandoc/Writers/Man.hs2
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs38
-rw-r--r--src/Text/Pandoc/Writers/MediaWiki.hs2
-rw-r--r--src/Text/Pandoc/Writers/ODT.hs53
-rw-r--r--src/Text/Pandoc/Writers/OpenDocument.hs26
-rw-r--r--src/Text/Pandoc/Writers/Org.hs2
-rw-r--r--src/Text/Pandoc/Writers/RST.hs10
-rw-r--r--src/Text/Pandoc/Writers/Shared.hs30
-rw-r--r--src/Text/Pandoc/Writers/Texinfo.hs6
-rw-r--r--src/Text/Pandoc/Writers/Textile.hs2
34 files changed, 2569 insertions, 388 deletions
diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
index 9b0850efb..3c9623b3c 100644
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -32,9 +32,14 @@ module Text.Pandoc.ImageSize ( ImageType(..), imageType, imageSize,
sizeInPixels, sizeInPoints ) where
import Data.ByteString (ByteString, unpack)
import qualified Data.ByteString.Char8 as B
+import qualified Data.ByteString.Lazy as BL
+import Control.Applicative
import Control.Monad
import Data.Bits
+import Data.Binary
+import Data.Binary.Get
import Text.Pandoc.Shared (safeRead)
+import qualified Data.Map as M
-- quick and dirty functions to get image sizes
-- algorithms borrowed from wwwis.pl
@@ -53,7 +58,8 @@ imageType :: ByteString -> Maybe ImageType
imageType img = case B.take 4 img of
"\x89\x50\x4e\x47" -> return Png
"\x47\x49\x46\x38" -> return Gif
- "\xff\xd8\xff\xe0" -> return Jpeg
+ "\xff\xd8\xff\xe0" -> return Jpeg -- JFIF
+ "\xff\xd8\xff\xe1" -> return Jpeg -- Exif
"%PDF" -> return Pdf
"%!PS"
| (B.take 4 $ B.drop 1 $ B.dropWhile (/=' ') img) == "EPSF"
@@ -139,8 +145,14 @@ gifSize img = do
jpegSize :: ByteString -> Maybe ImageSize
jpegSize img = do
let (hdr, rest) = B.splitAt 4 img
- guard $ hdr == "\xff\xd8\xff\xe0"
guard $ B.length rest >= 14
+ case hdr of
+ "\xff\xd8\xff\xe0" -> jfifSize rest
+ "\xff\xd8\xff\xe1" -> exifSize $ B.takeWhile (/= '\xff') rest
+ _ -> mzero
+
+jfifSize :: ByteString -> Maybe ImageSize
+jfifSize rest = do
let [dpiDensity,dpix1,dpix2,dpiy1,dpiy2] = map fromIntegral
$ unpack $ B.take 5 $ B.drop 9 $ rest
let factor = case dpiDensity of
@@ -149,11 +161,11 @@ jpegSize img = do
_ -> const 72
let dpix = factor (shift dpix1 8 + dpix2)
let dpiy = factor (shift dpiy1 8 + dpiy2)
- (w,h) <- findJpegSize rest
+ (w,h) <- findJfifSize rest
return $ ImageSize { pxX = w, pxY = h, dpiX = dpix, dpiY = dpiy }
-findJpegSize :: ByteString -> Maybe (Integer,Integer)
-findJpegSize bs = do
+findJfifSize :: ByteString -> Maybe (Integer,Integer)
+findJfifSize bs = do
let bs' = B.dropWhile (=='\xff') $ B.dropWhile (/='\xff') bs
case B.uncons bs' of
Just (c,bs'') | c >= '\xc0' && c <= '\xc3' -> do
@@ -165,8 +177,217 @@ findJpegSize bs = do
[c1,c2] -> do
let len = shift c1 8 + c2
-- skip variables
- findJpegSize $ B.drop len bs''
+ findJfifSize $ B.drop len bs''
_ -> fail "JPEG parse error"
Nothing -> fail "Did not find length record"
+exifSize :: ByteString -> Maybe ImageSize
+exifSize bs = runGet (Just <$> exifHeader bl) bl
+ where bl = BL.fromChunks [bs]
+-- NOTE: It would be nicer to do
+-- runGet ((Just <$> exifHeader) <|> return Nothing)
+-- which would prevent pandoc from raising an error when an exif header can't
+-- be parsed. But we only get an Alternative instance for Get in binary 0.6,
+-- and binary 0.5 ships with ghc 7.6.
+
+exifHeader :: BL.ByteString -> Get ImageSize
+exifHeader hdr = do
+ _app1DataSize <- getWord16be
+ exifHdr <- getWord32be
+ unless (exifHdr == 0x45786966) $ fail "Did not find exif header"
+ zeros <- getWord16be
+ unless (zeros == 0) $ fail "Expected zeros after exif header"
+ -- beginning of tiff header -- we read whole thing to use
+ -- in getting data from offsets:
+ let tiffHeader = BL.drop 8 hdr
+ byteAlign <- getWord16be
+ let bigEndian = byteAlign == 0x4d4d
+ let (getWord16, getWord32, getWord64) =
+ if bigEndian
+ then (getWord16be, getWord32be, getWord64be)
+ else (getWord16le, getWord32le, getWord64le)
+ let getRational = do
+ num <- getWord32
+ den <- getWord32
+ return $ fromIntegral num / fromIntegral den
+ tagmark <- getWord16
+ unless (tagmark == 0x002a) $ fail "Failed alignment sanity check"
+ ifdOffset <- getWord32
+ skip (fromIntegral ifdOffset - 8) -- skip to IDF
+ numentries <- getWord16
+ let ifdEntry = do
+ tag <- getWord16 >>= \t ->
+ maybe (fail $ "Unknown tag type " ++ show t) return
+ (M.lookup t tagTypeTable)
+ dataFormat <- getWord16
+ numComponents <- getWord32
+ (fmt, bytesPerComponent) <-
+ case dataFormat of
+ 1 -> return (UnsignedByte . runGet getWord8, 1)
+ 2 -> return (AsciiString, 1)
+ 3 -> return (UnsignedShort . runGet getWord16, 2)
+ 4 -> return (UnsignedLong . runGet getWord32, 4)
+ 5 -> return (UnsignedRational . runGet getRational, 8)
+ 6 -> return (SignedByte . runGet getWord8, 1)
+ 7 -> return (Undefined . runGet getWord8, 1)
+ 8 -> return (SignedShort . runGet getWord16, 2)
+ 9 -> return (SignedLong . runGet getWord32, 4)
+ 10 -> return (SignedRational . runGet getRational, 8)
+ 11 -> return (SingleFloat . runGet getWord32 {- TODO -}, 4)
+ 12 -> return (DoubleFloat . runGet getWord64 {- TODO -}, 8)
+ _ -> fail $ "Unknown data format " ++ show dataFormat
+ let totalBytes = fromIntegral $ numComponents * bytesPerComponent
+ payload <- if totalBytes <= 4 -- data is right here
+ then fmt <$>
+ (getLazyByteString (fromIntegral totalBytes) <*
+ skip (4 - totalBytes))
+ else do -- get data from offset
+ offs <- getWord32
+ return $ fmt $ BL.take (fromIntegral totalBytes) $
+ BL.drop (fromIntegral offs) tiffHeader
+ return (tag, payload)
+ entries <- sequence $ replicate (fromIntegral numentries) ifdEntry
+ subentries <- case lookup ExifOffset entries of
+ Just (UnsignedLong offset) -> do
+ pos <- bytesRead
+ skip (fromIntegral offset - (fromIntegral pos - 8))
+ numsubentries <- getWord16
+ sequence $
+ replicate (fromIntegral numsubentries) ifdEntry
+ _ -> return []
+ let allentries = entries ++ subentries
+ (width, height) <- case (lookup ExifImageWidth allentries,
+ lookup ExifImageHeight allentries) of
+ (Just (UnsignedLong w), Just (UnsignedLong h)) ->
+ return (fromIntegral w, fromIntegral h)
+ _ -> fail "Could not determine image width, height"
+ let resfactor = case lookup ResolutionUnit allentries of
+ Just (UnsignedShort 1) -> (100 / 254)
+ _ -> 1
+ let xres = maybe 72 (\(UnsignedRational x) -> floor $ x * resfactor)
+ $ lookup XResolution allentries
+ let yres = maybe 72 (\(UnsignedRational x) -> floor $ x * resfactor)
+ $ lookup YResolution allentries
+ return $ ImageSize{
+ pxX = width
+ , pxY = height
+ , dpiX = xres
+ , dpiY = yres }
+
+data DataFormat = UnsignedByte Word8
+ | AsciiString BL.ByteString
+ | UnsignedShort Word16
+ | UnsignedLong Word32
+ | UnsignedRational Rational
+ | SignedByte Word8
+ | Undefined Word8
+ | SignedShort Word16
+ | SignedLong Word32
+ | SignedRational Rational
+ | SingleFloat Word32
+ | DoubleFloat Word64
+ deriving (Show)
+
+data TagType = ImageDescription
+ | Make
+ | Model
+ | Orientation
+ | XResolution
+ | YResolution
+ | ResolutionUnit
+ | Software
+ | DateTime
+ | WhitePoint
+ | PrimaryChromaticities
+ | YCbCrCoefficients
+ | YCbCrPositioning
+ | ReferenceBlackWhite
+ | Copyright
+ | ExifOffset
+ | ExposureTime
+ | FNumber
+ | ExposureProgram
+ | ISOSpeedRatings
+ | ExifVersion
+ | DateTimeOriginal
+ | DateTimeDigitized
+ | ComponentConfiguration
+ | CompressedBitsPerPixel
+ | ShutterSpeedValue
+ | ApertureValue
+ | BrightnessValue
+ | ExposureBiasValue
+ | MaxApertureValue
+ | SubjectDistance
+ | MeteringMode
+ | LightSource
+ | Flash
+ | FocalLength
+ | MakerNote
+ | UserComment
+ | FlashPixVersion
+ | ColorSpace
+ | ExifImageWidth
+ | ExifImageHeight
+ | RelatedSoundFile
+ | ExifInteroperabilityOffset
+ | FocalPlaneXResolution
+ | FocalPlaneYResolution
+ | FocalPlaneResolutionUnit
+ | SensingMethod
+ | FileSource
+ | SceneType
+ deriving (Show, Eq, Ord)
+tagTypeTable :: M.Map Word16 TagType
+tagTypeTable = M.fromList
+ [ (0x010e, ImageDescription)
+ , (0x010f, Make)
+ , (0x0110, Model)
+ , (0x0112, Orientation)
+ , (0x011a, XResolution)
+ , (0x011b, YResolution)
+ , (0x0128, ResolutionUnit)
+ , (0x0131, Software)
+ , (0x0132, DateTime)
+ , (0x013e, WhitePoint)
+ , (0x013f, PrimaryChromaticities)
+ , (0x0211, YCbCrCoefficients)
+ , (0x0213, YCbCrPositioning)
+ , (0x0214, ReferenceBlackWhite)
+ , (0x8298, Copyright)
+ , (0x8769, ExifOffset)
+ , (0x829a, ExposureTime)
+ , (0x829d, FNumber)
+ , (0x8822, ExposureProgram)
+ , (0x8827, ISOSpeedRatings)
+ , (0x9000, ExifVersion)
+ , (0x9003, DateTimeOriginal)
+ , (0x9004, DateTimeDigitized)
+ , (0x9101, ComponentConfiguration)
+ , (0x9102, CompressedBitsPerPixel)
+ , (0x9201, ShutterSpeedValue)
+ , (0x9202, ApertureValue)
+ , (0x9203, BrightnessValue)
+ , (0x9204, ExposureBiasValue)
+ , (0x9205, MaxApertureValue)
+ , (0x9206, SubjectDistance)
+ , (0x9207, MeteringMode)
+ , (0x9208, LightSource)
+ , (0x9209, Flash)
+ , (0x920a, FocalLength)
+ , (0x927c, MakerNote)
+ , (0x9286, UserComment)
+ , (0xa000, FlashPixVersion)
+ , (0xa001, ColorSpace)
+ , (0xa002, ExifImageWidth)
+ , (0xa003, ExifImageHeight)
+ , (0xa004, RelatedSoundFile)
+ , (0xa005, ExifInteroperabilityOffset)
+ , (0xa20e, FocalPlaneXResolution)
+ , (0xa20f, FocalPlaneYResolution)
+ , (0xa210, FocalPlaneResolutionUnit)
+ , (0xa217, SensingMethod)
+ , (0xa300, FileSource)
+ , (0xa301, SceneType)
+ ]
diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs
index 1f5f6f862..977cb576b 100644
--- a/src/Text/Pandoc/MIME.hs
+++ b/src/Text/Pandoc/MIME.hs
@@ -147,6 +147,7 @@ mimeTypesList = -- List borrowed from happstack-server.
,("dxr","application/x-director")
,("emb","chemical/x-embl-dl-nucleotide")
,("embl","chemical/x-embl-dl-nucleotide")
+ ,("emf","image/x-emf")
,("eml","message/rfc822")
,("ent","chemical/x-ncbi-asn1-ascii")
,("eot","application/vnd.ms-fontobject")
@@ -220,6 +221,7 @@ mimeTypesList = -- List borrowed from happstack-server.
,("jnlp","application/x-java-jnlp-file")
,("jpe","image/jpeg")
,("jpeg","image/jpeg")
+ ,("jfif","image/jpeg")
,("jpg","image/jpeg")
,("js","application/x-javascript")
,("kar","audio/midi")
@@ -244,6 +246,7 @@ mimeTypesList = -- List borrowed from happstack-server.
,("lzx","application/x-lzx")
,("m3u","audio/mpegurl")
,("m4a","audio/mpeg")
+ ,("m4v","video/x-m4v")
,("maker","application/x-maker")
,("man","application/x-troff-man")
,("mcif","chemical/x-mmcif")
@@ -464,6 +467,7 @@ mimeTypesList = -- List borrowed from happstack-server.
,("wm","video/x-ms-wm")
,("wma","audio/x-ms-wma")
,("wmd","application/x-ms-wmd")
+ ,("wmf","image/x-wmf")
,("wml","text/vnd.wap.wml")
,("wmlc","application/vnd.wap.wmlc")
,("wmls","text/vnd.wap.wmlscript")
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index 5f65abdde..38220f542 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -210,6 +210,7 @@ data ReaderOptions = ReaderOptions{
, readerIndentedCodeClasses :: [String] -- ^ Default classes for
-- indented code blocks
, readerDefaultImageExtension :: String -- ^ Default extension for images
+ , readerTrace :: Bool -- ^ Print debugging info
} deriving (Show, Read)
instance Default ReaderOptions
@@ -225,6 +226,7 @@ instance Default ReaderOptions
, readerApplyMacros = True
, readerIndentedCodeClasses = []
, readerDefaultImageExtension = ""
+ , readerTrace = False
}
--
diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index e8683b98f..608cad2e9 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -41,6 +41,7 @@ import System.Directory
import System.Environment
import Control.Monad (unless)
import Data.List (isInfixOf)
+import Data.Maybe (fromMaybe)
import qualified Data.ByteString.Base64 as B64
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.Definition
@@ -50,6 +51,9 @@ import Text.Pandoc.Options (WriterOptions(..))
import Text.Pandoc.MIME (extensionFromMimeType)
import Text.Pandoc.Process (pipeProcess)
import qualified Data.ByteString.Lazy as BL
+#ifdef _WINDOWS
+import Data.List (intercalate)
+#endif
withTempDir :: String -> (FilePath -> IO a) -> IO a
withTempDir =
@@ -59,6 +63,11 @@ withTempDir =
withSystemTempDirectory
#endif
+#ifdef _WINDOWS
+changePathSeparators :: FilePath -> FilePath
+changePathSeparators = intercalate "/" . splitDirectories
+#endif
+
makePDF :: String -- ^ pdf creator (pdflatex, lualatex, xelatex)
-> (WriterOptions -> Pandoc -> String) -- ^ writer
-> WriterOptions -- ^ options
@@ -87,7 +96,7 @@ handleImage' baseURL tmpdir (Image ils (src,tit)) = do
res <- fetchItem baseURL src
case res of
Right (contents, Just mime) -> do
- let ext = maybe (takeExtension src) id $
+ let ext = fromMaybe (takeExtension src) $
extensionFromMimeType mime
let basename = UTF8.toString $ B64.encode $ UTF8.fromString src
let fname = tmpdir </> basename <.> ext
@@ -107,7 +116,6 @@ tex2pdf' tmpDir program source = do
then 3 -- to get page numbers
else 2 -- 1 run won't give you PDF bookmarks
(exit, log', mbPdf) <- runTeXProgram program numruns tmpDir source
- let msg = "Error producing PDF from TeX source.\n"
case (exit, mbPdf) of
(ExitFailure _, _) -> do
let logmsg = extractMsg log'
@@ -116,8 +124,8 @@ tex2pdf' tmpDir program source = do
x | "! Package inputenc Error" `BC.isPrefixOf` x ->
"\nTry running pandoc with --latex-engine=xelatex."
_ -> ""
- return $ Left $ msg <> logmsg <> extramsg
- (ExitSuccess, Nothing) -> return $ Left msg
+ return $ Left $ logmsg <> extramsg
+ (ExitSuccess, Nothing) -> return $ Left ""
(ExitSuccess, Just pdf) -> return $ Right pdf
(<>) :: ByteString -> ByteString -> ByteString
@@ -145,10 +153,19 @@ runTeXProgram program runsLeft tmpDir source = do
let file = tmpDir </> "input.tex"
exists <- doesFileExist file
unless exists $ UTF8.writeFile file source
+#ifdef _WINDOWS
+ -- note: we want / even on Windows, for TexLive
+ let tmpDir' = changePathSeparators tmpDir
+ let file' = changePathSeparators file
+#else
+ let tmpDir' = tmpDir
+ let file' = file
+#endif
let programArgs = ["-halt-on-error", "-interaction", "nonstopmode",
- "-output-directory", tmpDir, file]
+ "-output-directory", tmpDir', file']
env' <- getEnvironment
- let texinputs = maybe (tmpDir ++ ":") ((tmpDir ++ ":") ++)
+ let sep = searchPathSeparator:[]
+ let texinputs = maybe (tmpDir' ++ sep) ((tmpDir' ++ sep) ++)
$ lookup "TEXINPUTS" env'
let env'' = ("TEXINPUTS", texinputs) :
[(k,v) | (k,v) <- env', k /= "TEXINPUTS"]
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 9687d7712..883a560d0 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -48,6 +48,8 @@ module Text.Pandoc.Parsing ( (>>~),
romanNumeral,
emailAddress,
uri,
+ mathInline,
+ mathDisplay,
withHorizDisplacement,
withRaw,
escaped,
@@ -172,13 +174,13 @@ import Text.Pandoc.Asciify (toAsciiChar)
import Data.Default
import qualified Data.Set as Set
import Control.Monad.Reader
-import Control.Applicative ((*>), (<*), (<$), liftA2)
+import Control.Applicative ((*>), (<*), (<$), liftA2, Applicative)
import Data.Monoid
import Data.Maybe (catMaybes)
type Parser t s = Parsec t s
-newtype F a = F { unF :: Reader ParserState a } deriving (Monad, Functor)
+newtype F a = F { unF :: Reader ParserState a } deriving (Monad, Applicative, Functor)
runF :: F a -> ParserState -> a
runF = runReader . unF
@@ -269,7 +271,7 @@ spaceChar = satisfy $ \c -> c == ' ' || c == '\t'
-- | Parses a nonspace, nonnewline character.
nonspaceChar :: Parser [Char] st Char
-nonspaceChar = satisfy $ \x -> x /= '\t' && x /= '\n' && x /= ' ' && x /= '\r'
+nonspaceChar = satisfy $ flip notElem ['\t', '\n', ' ', '\r']
-- | Skips zero or more spaces or tabs.
skipSpaces :: Parser [Char] st ()
@@ -455,6 +457,39 @@ uri = try $ do
let uri' = scheme ++ ":" ++ fromEntities str'
return (uri', escapeURI uri')
+mathInlineWith :: String -> String -> Parser [Char] st String
+mathInlineWith op cl = try $ do
+ string op
+ notFollowedBy space
+ words' <- many1Till (count 1 (noneOf "\n\\")
+ <|> (char '\\' >> anyChar >>= \c -> return ['\\',c])
+ <|> count 1 newline <* notFollowedBy' blankline
+ *> return " ")
+ (try $ string cl)
+ notFollowedBy digit -- to prevent capture of $5
+ return $ concat words'
+
+mathDisplayWith :: String -> String -> Parser [Char] st String
+mathDisplayWith op cl = try $ do
+ string op
+ many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string cl)
+
+mathDisplay :: Parser [Char] ParserState String
+mathDisplay =
+ (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
+ <|> (guardEnabled Ext_tex_math_single_backslash >>
+ mathDisplayWith "\\[" "\\]")
+ <|> (guardEnabled Ext_tex_math_double_backslash >>
+ mathDisplayWith "\\\\[" "\\\\]")
+
+mathInline :: Parser [Char] ParserState String
+mathInline =
+ (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
+ <|> (guardEnabled Ext_tex_math_single_backslash >>
+ mathInlineWith "\\(" "\\)")
+ <|> (guardEnabled Ext_tex_math_double_backslash >>
+ mathInlineWith "\\\\(" "\\\\)")
+
-- | Applies a parser, returns tuple of its results and its horizontal
-- displacement (the difference between the source column at the end
-- and the source column at the beginning). Vertical displacement
@@ -818,6 +853,11 @@ data ParserState = ParserState
stateHasChapters :: Bool, -- ^ True if \chapter encountered
stateMacros :: [Macro], -- ^ List of macros defined so far
stateRstDefaultRole :: String, -- ^ Current rST default interpreted text role
+ stateRstCustomRoles :: M.Map String (String, Maybe String, Attr -> (String, Attr)), -- ^ Current rST custom text roles
+ -- Triple represents: 1) Base role, 2) Optional format (only for :raw:
+ -- roles), 3) Source language annotation for code (could be used to
+ -- annotate role classes too).
+
stateWarnings :: [String] -- ^ Warnings generated by the parser
}
@@ -880,6 +920,7 @@ defaultParserState =
stateHasChapters = False,
stateMacros = [],
stateRstDefaultRole = "title-reference",
+ stateRstCustomRoles = M.empty,
stateWarnings = []}
getOption :: (ReaderOptions -> a) -> Parser s ParserState a
@@ -1027,7 +1068,7 @@ doubleQuoteStart :: Parser [Char] ParserState ()
doubleQuoteStart = do
failIfInQuoteContext InDoubleQuote
try $ do charOrRef "\"\8220\147"
- notFollowedBy (satisfy (\c -> c == ' ' || c == '\t' || c == '\n'))
+ notFollowedBy . satisfy $ flip elem [' ', '\t', '\n']
doubleQuoteEnd :: Parser [Char] st ()
doubleQuoteEnd = do
diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs
index faf2a6797..5331587ce 100644
--- a/src/Text/Pandoc/Pretty.hs
+++ b/src/Text/Pandoc/Pretty.hs
@@ -60,6 +60,7 @@ module Text.Pandoc.Pretty (
, hsep
, vcat
, vsep
+ , nestle
, chomp
, inside
, braces
@@ -72,7 +73,7 @@ module Text.Pandoc.Pretty (
)
where
-import Data.Sequence (Seq, fromList, (<|), singleton, mapWithIndex)
+import Data.Sequence (Seq, fromList, (<|), singleton, mapWithIndex, viewl, ViewL(..))
import Data.Foldable (toList)
import Data.List (intercalate)
import Data.Monoid
@@ -80,8 +81,7 @@ import Data.String
import Control.Monad.State
import Data.Char (isSpace)
-data Monoid a =>
- RenderState a = RenderState{
+data RenderState a = RenderState{
output :: [a] -- ^ In reverse order
, prefix :: String
, usePrefix :: Bool
@@ -186,6 +186,14 @@ vcat = foldr ($$) empty
vsep :: [Doc] -> Doc
vsep = foldr ($+$) empty
+-- | Removes leading blank lines from a 'Doc'.
+nestle :: Doc -> Doc
+nestle (Doc d) = Doc $ go d
+ where go x = case viewl x of
+ (BlankLine :< rest) -> go rest
+ (NewLine :< rest) -> go rest
+ _ -> x
+
-- | Chomps trailing blank space off of a 'Doc'.
chomp :: Doc -> Doc
chomp d = Doc (fromList dl')
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index fc29988d5..56cb16b20 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -12,6 +12,7 @@ import Data.Char (isSpace)
import Control.Monad.State
import Control.Applicative ((<$>))
import Data.List (intersperse)
+import Data.Maybe (fromMaybe)
{-
@@ -683,7 +684,7 @@ parseBlock (Elem e) =
"lowerroman" -> LowerRoman
"upperroman" -> UpperRoman
_ -> Decimal
- let start = maybe 1 id $
+ let start = fromMaybe 1 $
(attrValue "override" <$> filterElement (named "listitem") e)
>>= safeRead
orderedListWith (start,listStyle,DefaultDelim)
@@ -779,7 +780,7 @@ parseBlock (Elem e) =
caption <- case filterChild isCaption e of
Just t -> getInlines t
Nothing -> return mempty
- let e' = maybe e id $ filterChild (named "tgroup") e
+ let e' = fromMaybe e $ filterChild (named "tgroup") e
let isColspec x = named "colspec" x || named "col" x
let colspecs = case filterChild (named "colgroup") e' of
Just c -> filterChildren isColspec c
@@ -801,12 +802,14 @@ parseBlock (Elem e) =
Just "center" -> AlignCenter
_ -> AlignDefault
let toWidth c = case findAttr (unqual "colwidth") c of
- Just w -> maybe 0 id
+ Just w -> fromMaybe 0
$ safeRead $ '0': filter (\x ->
(x >= '0' && x <= '9')
|| x == '.') w
Nothing -> 0 :: Double
- let numrows = maximum $ map length bodyrows
+ let numrows = case bodyrows of
+ [] -> 0
+ xs -> maximum $ map length xs
let aligns = case colspecs of
[] -> replicate numrows AlignDefault
cs -> map toAlignment cs
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index d691c9878..d1e4d0024 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -76,9 +76,18 @@ pBody :: TagParser [Block]
pBody = pInTags "body" block
pHead :: TagParser [Block]
-pHead = pInTags "head" $ pTitle <|> ([] <$ pAnyTag)
+pHead = pInTags "head" $ pTitle <|> pMetaTag <|> ([] <$ pAnyTag)
where pTitle = pInTags "title" inline >>= setTitle . normalizeSpaces
setTitle t = [] <$ (updateState $ B.setMeta "title" (B.fromList t))
+ pMetaTag = do
+ mt <- pSatisfy (~== TagOpen "meta" [])
+ let name = fromAttrib "name" mt
+ if null name
+ then return []
+ else do
+ let content = fromAttrib "content" mt
+ updateState $ B.setMeta name (B.text content)
+ return []
block :: TagParser [Block]
block = choice
@@ -207,7 +216,7 @@ pHeader = try $ do
let bodyTitle = TagOpen tagtype attr ~== TagOpen "h1" [("class","title")]
let level = read (drop 1 tagtype)
contents <- liftM concat $ manyTill inline (pCloses tagtype <|> eof)
- let ident = maybe "" id $ lookup "id" attr
+ let ident = fromMaybe "" $ lookup "id" attr
let classes = maybe [] words $ lookup "class" attr
let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"]
return $ if bodyTitle
@@ -257,7 +266,7 @@ pCol = try $ do
skipMany pBlank
return $ case lookup "width" attribs of
Just x | not (null x) && last x == '%' ->
- maybe 0.0 id $ safeRead ('0':'.':init x)
+ fromMaybe 0.0 $ safeRead ('0':'.':init x)
_ -> 0.0
pColgroup :: TagParser [Double]
@@ -467,7 +476,13 @@ pBlank = try $ do
pTagContents :: Parser [Char] ParserState Inline
pTagContents =
- pStr <|> pSpace <|> smartPunctuation pTagContents <|> pSymbol <|> pBad
+ Math DisplayMath `fmap` mathDisplay
+ <|> Math InlineMath `fmap` mathInline
+ <|> pStr
+ <|> pSpace
+ <|> smartPunctuation pTagContents
+ <|> pSymbol
+ <|> pBad
pStr :: Parser [Char] ParserState Inline
pStr = do
@@ -482,6 +497,7 @@ isSpecial '"' = True
isSpecial '\'' = True
isSpecial '.' = True
isSpecial '-' = True
+isSpecial '$' = True
isSpecial '\8216' = True
isSpecial '\8217' = True
isSpecial '\8220' = True
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 75e29ebb9..51271edc5 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -38,7 +38,8 @@ import Text.Pandoc.Definition
import Text.Pandoc.Walk
import Text.Pandoc.Shared
import Text.Pandoc.Options
-import Text.Pandoc.Parsing hiding ((<|>), many, optional, space)
+import Text.Pandoc.Parsing hiding ((<|>), many, optional, space,
+ mathDisplay, mathInline)
import qualified Text.Pandoc.UTF8 as UTF8
import Data.Char ( chr, ord )
import Control.Monad
@@ -172,6 +173,8 @@ double_quote :: LP Inlines
double_quote =
( quoted' doubleQuoted (try $ string "``") (void $ try $ string "''")
<|> quoted' doubleQuoted (string "“") (void $ char '”')
+ -- the following is used by babel for localized quotes:
+ <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'")
<|> quoted' doubleQuoted (string "\"") (void $ char '"')
)
@@ -454,6 +457,7 @@ inlineCommands = M.fromList $
, ("footnote", (note . mconcat) <$> (char '{' *> manyTill block (char '}')))
, ("verb", doverb)
, ("lstinline", doverb)
+ , ("Verb", doverb)
, ("texttt", (code . stringify . toList) <$> tok)
, ("url", (unescapeURL <$> braced) >>= \url ->
pure (link url "" (str url)))
@@ -870,9 +874,8 @@ verbatimEnv = do
(_,r) <- withRaw $ do
controlSeq "begin"
name <- braced
- guard $ name == "verbatim" || name == "Verbatim" ||
- name == "lstlisting" || name == "minted" ||
- name == "alltt"
+ guard $ name `elem` ["verbatim", "Verbatim", "lstlisting",
+ "minted", "alltt"]
verbEnv name
rest <- getInput
return (r,rest)
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 33d1a9620..aa0252266 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -60,6 +60,8 @@ import System.FilePath (takeExtension, addExtension)
import Text.HTML.TagSoup
import Text.HTML.TagSoup.Match (tagOpen)
import qualified Data.Set as Set
+import Text.Printf (printf)
+import Debug.Trace (trace)
type MarkdownParser = Parser [Char] ParserState
@@ -215,10 +217,10 @@ pandocTitleBlock = try $ do
author' <- author
date' <- date
return $
- ( if B.isNull title' then id else B.setMeta "title" title'
- . if null author' then id else B.setMeta "author" author'
- . if B.isNull date' then id else B.setMeta "date" date' )
- nullMeta
+ (if B.isNull title' then id else B.setMeta "title" title')
+ . (if null author' then id else B.setMeta "author" author')
+ . (if B.isNull date' then id else B.setMeta "date" date')
+ $ nullMeta
updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
yamlMetaBlock :: MarkdownParser (F Blocks)
@@ -354,7 +356,7 @@ referenceKey = try $ do
tit <- option "" referenceTitle
-- currently we just ignore MMD-style link/image attributes
_kvs <- option [] $ guardEnabled Ext_link_attributes
- >> many (spnl >> keyValAttr)
+ >> many (try $ spnl >> keyValAttr)
blanklines
let target = (escapeURI $ trimr src, tit)
st <- getState
@@ -440,7 +442,10 @@ parseBlocks :: MarkdownParser (F Blocks)
parseBlocks = mconcat <$> manyTill block eof
block :: MarkdownParser (F Blocks)
-block = choice [ mempty <$ blanklines
+block = do
+ tr <- getOption readerTrace
+ pos <- getPosition
+ res <- choice [ mempty <$ blanklines
, codeBlockFenced
, yamlMetaBlock
, guardEnabled Ext_latex_macros *> (macro >>= return . return)
@@ -465,6 +470,11 @@ block = choice [ mempty <$ blanklines
, para
, plain
] <?> "block"
+ when tr $ do
+ st <- getState
+ trace (printf "line %d: %s" (sourceLine pos)
+ (take 60 $ show $ B.toList $ runF res st)) (return ())
+ return res
--
-- header blocks
@@ -730,9 +740,14 @@ listStart = bulletListStart <|> (anyOrderedListStart >> return ())
listLine :: MarkdownParser String
listLine = try $ do
notFollowedBy' (do indentSpaces
- many (spaceChar)
+ many spaceChar
listStart)
- chunks <- manyTill (liftM snd (htmlTag isCommentTag) <|> count 1 anyChar) newline
+ notFollowedBy' $ htmlTag (~== TagClose "div")
+ chunks <- manyTill
+ ( many1 (satisfy $ \c -> c /= '\n' && c /= '<')
+ <|> liftM snd (htmlTag isCommentTag)
+ <|> count 1 anyChar
+ ) newline
return $ concat chunks
-- parse raw text for one list item, excluding start marker and continuations
@@ -759,6 +774,7 @@ listContinuationLine :: MarkdownParser String
listContinuationLine = try $ do
notFollowedBy blankline
notFollowedBy' listStart
+ notFollowedBy' $ htmlTag (~== TagClose "div")
optional indentSpaces
result <- anyLine
return $ result ++ "\n"
@@ -783,8 +799,8 @@ listItem start = try $ do
orderedList :: MarkdownParser (F Blocks)
orderedList = try $ do
(start, style, delim) <- lookAhead anyOrderedListStart
- unless ((style == DefaultStyle || style == Decimal || style == Example) &&
- (delim == DefaultDelim || delim == Period)) $
+ unless (style `elem` [DefaultStyle, Decimal, Example] &&
+ delim `elem` [DefaultDelim, Period]) $
guardEnabled Ext_fancy_lists
when (style == Example) $ guardEnabled Ext_example_lists
items <- fmap sequence $ many1 $ listItem
@@ -896,7 +912,9 @@ plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline
--
htmlElement :: MarkdownParser String
-htmlElement = strictHtmlBlock <|> liftM snd (htmlTag isBlockTag)
+htmlElement = rawVerbatimBlock
+ <|> strictHtmlBlock
+ <|> liftM snd (htmlTag isBlockTag)
htmlBlock :: MarkdownParser (F Blocks)
htmlBlock = do
@@ -917,8 +935,8 @@ strictHtmlBlock = htmlInBalanced (not . isInlineTag)
rawVerbatimBlock :: MarkdownParser String
rawVerbatimBlock = try $ do
- (TagOpen tag _, open) <- htmlTag (tagOpen (\t ->
- t == "pre" || t == "style" || t == "script")
+ (TagOpen tag _, open) <- htmlTag (tagOpen (flip elem
+ ["pre", "style", "script"])
(const True))
contents <- manyTill anyChar (htmlTag (~== TagClose tag))
return $ open ++ contents ++ renderTags [TagClose tag]
@@ -936,6 +954,8 @@ rawHtmlBlocks = do
htmlBlocks <- many1 $ try $ do
s <- rawVerbatimBlock <|> try (
do (t,raw) <- htmlTag isBlockTag
+ guard $ t ~/= TagOpen "div" [] &&
+ t ~/= TagClose "div"
exts <- getOption readerExtensions
-- if open tag, need markdown="1" if
-- markdown_attributes extension is set
@@ -1118,12 +1138,12 @@ multilineTableHeader headless = try $ do
then liftM (map (:[]) . tail .
splitStringByIndices (init indices)) $ lookAhead anyLine
else return $ transpose $ map
- (\ln -> tail $ splitStringByIndices (init indices) ln)
+ (tail . splitStringByIndices (init indices))
rawContent
let aligns = zipWith alignType rawHeadsList lengths
let rawHeads = if headless
then replicate (length dashes) ""
- else map unwords rawHeadsList
+ else map (unlines . map trim) rawHeadsList
heads <- fmap sequence $
mapM (parseFromString (mconcat <$> many plain)) $
map trim rawHeads
@@ -1180,7 +1200,7 @@ gridTableHeader headless = try $ do
-- RST does not have a notion of alignments
let rawHeads = if headless
then replicate (length dashes) ""
- else map unwords $ transpose
+ else map (unlines . map trim) $ transpose
$ map (gridTableSplitLine indices) rawContent
heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads
return (heads, aligns, indices)
@@ -1366,7 +1386,7 @@ ltSign :: MarkdownParser (F Inlines)
ltSign = do
guardDisabled Ext_raw_html
<|> guardDisabled Ext_markdown_in_html_blocks
- <|> (notFollowedBy' rawHtmlBlocks >> return ())
+ <|> (notFollowedBy' (htmlTag isBlockTag) >> return ())
char '<'
return $ return $ B.str "<"
@@ -1406,39 +1426,6 @@ math :: MarkdownParser (F Inlines)
math = (return . B.displayMath <$> (mathDisplay >>= applyMacros'))
<|> (return . B.math <$> (mathInline >>= applyMacros'))
-mathDisplay :: MarkdownParser String
-mathDisplay =
- (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
- <|> (guardEnabled Ext_tex_math_single_backslash >>
- mathDisplayWith "\\[" "\\]")
- <|> (guardEnabled Ext_tex_math_double_backslash >>
- mathDisplayWith "\\\\[" "\\\\]")
-
-mathDisplayWith :: String -> String -> MarkdownParser String
-mathDisplayWith op cl = try $ do
- string op
- many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string cl)
-
-mathInline :: MarkdownParser String
-mathInline =
- (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
- <|> (guardEnabled Ext_tex_math_single_backslash >>
- mathInlineWith "\\(" "\\)")
- <|> (guardEnabled Ext_tex_math_double_backslash >>
- mathInlineWith "\\\\(" "\\\\)")
-
-mathInlineWith :: String -> String -> MarkdownParser String
-mathInlineWith op cl = try $ do
- string op
- notFollowedBy space
- words' <- many1Till (count 1 (noneOf "\n\\")
- <|> (char '\\' >> anyChar >>= \c -> return ['\\',c])
- <|> count 1 newline <* notFollowedBy' blankline
- *> return " ")
- (try $ string cl)
- notFollowedBy digit -- to prevent capture of $5
- return $ concat words'
-
-- Parses material enclosed in *s, **s, _s, or __s.
-- Designed to avoid backtracking.
enclosure :: Char
@@ -1565,16 +1552,14 @@ endline :: MarkdownParser (F Inlines)
endline = try $ do
newline
notFollowedBy blankline
+ -- parse potential list-starts differently if in a list:
+ st <- getState
+ when (stateParserContext st == ListItemState) $ notFollowedBy listStart
guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart
guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart
guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header
- guardEnabled Ext_backtick_code_blocks >>
+ guardDisabled Ext_backtick_code_blocks <|>
notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced))
- -- parse potential list-starts differently if in a list:
- st <- getState
- when (stateParserContext st == ListItemState) $ do
- notFollowedBy' bulletListStart
- notFollowedBy' anyOrderedListStart
(guardEnabled Ext_hard_line_breaks >> return (return B.linebreak))
<|> (guardEnabled Ext_ignore_line_breaks >> return mempty)
<|> (return $ return B.space)
@@ -1747,7 +1732,7 @@ spanHtml = try $ do
guardEnabled Ext_markdown_in_html_blocks
(TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" [])
contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span"))
- let ident = maybe "" id $ lookup "id" attrs
+ let ident = fromMaybe "" $ lookup "id" attrs
let classes = maybe [] words $ lookup "class" attrs
let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
return $ B.spanWith (ident, classes, keyvals) <$> contents
@@ -1755,12 +1740,19 @@ spanHtml = try $ do
divHtml :: MarkdownParser (F Blocks)
divHtml = try $ do
guardEnabled Ext_markdown_in_html_blocks
- (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "div" [])
- contents <- mconcat <$> manyTill block (htmlTag (~== TagClose "div"))
- let ident = maybe "" id $ lookup "id" attrs
- let classes = maybe [] words $ lookup "class" attrs
- let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
- return $ B.divWith (ident, classes, keyvals) <$> contents
+ (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" [])
+ bls <- option "" (blankline >> option "" blanklines)
+ contents <- mconcat <$>
+ many (notFollowedBy' (htmlTag (~== TagClose "div")) >> block)
+ closed <- option False (True <$ htmlTag (~== TagClose "div"))
+ if closed
+ then do
+ let ident = fromMaybe "" $ lookup "id" attrs
+ let classes = maybe [] words $ lookup "class" attrs
+ let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+ return $ B.divWith (ident, classes, keyvals) <$> contents
+ else -- avoid backtracing
+ return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents
rawHtmlInline :: MarkdownParser (F Inlines)
rawHtmlInline = do
@@ -1836,9 +1828,10 @@ citeKey = try $ do
guard $ lastStrPos /= Just pos
suppress_author <- option False (char '-' >> return True)
char '@'
- first <- letter
- let internal p = try $ p >>~ lookAhead (letter <|> digit)
- rest <- many $ letter <|> digit <|> internal (oneOf ":.#$%&-_+?<>~/")
+ first <- letter <|> char '_'
+ let regchar = satisfy (\c -> isAlphaNum c || c == '_')
+ let internal p = try $ p >>~ lookAhead regchar
+ rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/")
let key = first:rest
return (suppress_author, key)
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 7f99af528..7ac2f33ba 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -54,6 +54,7 @@ import Data.Sequence (viewl, ViewL(..), (<|))
import qualified Data.Foldable as F
import qualified Data.Map as M
import Data.Char (isDigit, isSpace)
+import Data.Maybe (fromMaybe)
-- | Read mediawiki from an input string and return a Pandoc document.
readMediaWiki :: ReaderOptions -- ^ Reader options
@@ -148,9 +149,16 @@ inlinesInTags tag = try $ do
blocksInTags :: String -> MWParser Blocks
blocksInTags tag = try $ do
(_,raw) <- htmlTag (~== TagOpen tag [])
+ let closer = if tag == "li"
+ then htmlTag (~== TagClose "li")
+ <|> lookAhead (
+ htmlTag (~== TagOpen "li" [])
+ <|> htmlTag (~== TagClose "ol")
+ <|> htmlTag (~== TagClose "ul"))
+ else htmlTag (~== TagClose tag)
if '/' `elem` raw -- self-closing tag
then return mempty
- else mconcat <$> manyTill block (htmlTag (~== TagClose tag))
+ else mconcat <$> manyTill block closer
charsInTags :: String -> MWParser [Char]
charsInTags tag = try $ do
@@ -204,7 +212,7 @@ table = do
tableStart
styles <- option [] parseAttrs <* blankline
let tableWidth = case lookup "width" styles of
- Just w -> maybe 1.0 id $ parseWidth w
+ Just w -> fromMaybe 1.0 $ parseWidth w
Nothing -> 1.0
caption <- option mempty tableCaption
optional rowsep
@@ -285,7 +293,7 @@ tableCell = try $ do
Just "center" -> AlignCenter
_ -> AlignDefault
let width = case lookup "width" attrs of
- Just xs -> maybe 0.0 id $ parseWidth xs
+ Just xs -> fromMaybe 0.0 $ parseWidth xs
Nothing -> 0.0
return ((align, width), bs)
@@ -380,15 +388,13 @@ bulletList = B.bulletList <$>
orderedList :: MWParser Blocks
orderedList =
(B.orderedList <$> many1 (listItem '#'))
- <|> (B.orderedList <$> (htmlTag (~== TagOpen "ul" []) *> spaces *>
- many (listItem '#' <|> li) <*
- optional (htmlTag (~== TagClose "ul"))))
- <|> do (tag,_) <- htmlTag (~== TagOpen "ol" [])
- spaces
- items <- many (listItem '#' <|> li)
- optional (htmlTag (~== TagClose "ol"))
- let start = maybe 1 id $ safeRead $ fromAttrib "start" tag
- return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items
+ <|> try
+ (do (tag,_) <- htmlTag (~== TagOpen "ol" [])
+ spaces
+ items <- many (listItem '#' <|> li)
+ optional (htmlTag (~== TagClose "ol"))
+ let start = fromMaybe 1 $ safeRead $ fromAttrib "start" tag
+ return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items)
definitionList :: MWParser Blocks
definitionList = B.definitionList <$> many1 defListItem
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
new file mode 100644
index 000000000..5dc250f04
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -0,0 +1,552 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-
+Copyright (C) 2014 Albert Krewinkel <tarleb@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Org
+ Copyright : Copyright (C) 2014 Albert Krewinkel
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Albert Krewinkel <tarleb@moltkeplatz.de>
+
+Conversion of Org-Mode to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.Org ( readOrg ) where
+
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing hiding (orderedListMarker)
+import Text.Pandoc.Shared (compactify')
+
+import Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**>))
+import Control.Monad (guard, mzero)
+import Data.Char (toLower)
+import Data.List (foldl')
+import Data.Maybe (listToMaybe, fromMaybe)
+import Data.Monoid (mconcat, mempty, mappend)
+
+-- | Parse org-mode string and return a Pandoc document.
+readOrg :: ReaderOptions -- ^ Reader options
+ -> String -- ^ String to parse (assuming @'\n'@ line endings)
+ -> Pandoc
+readOrg opts s = (readWith parseOrg) def{ stateOptions = opts } (s ++ "\n\n")
+
+type OrgParser = Parser [Char] ParserState
+
+parseOrg:: OrgParser Pandoc
+parseOrg = do
+ blocks' <- B.toList <$> parseBlocks
+ st <- getState
+ let meta = stateMeta st
+ return $ Pandoc meta $ filter (/= Null) blocks'
+
+--
+-- parsing blocks
+--
+
+parseBlocks :: OrgParser Blocks
+parseBlocks = mconcat <$> manyTill block eof
+
+block :: OrgParser Blocks
+block = choice [ mempty <$ blanklines
+ , orgBlock
+ , example
+ , drawer
+ , specialLine
+ , header
+ , hline
+ , list
+ , table
+ , paraOrPlain
+ ] <?> "block"
+
+--
+-- Org Blocks (#+BEGIN_... / #+END_...)
+--
+
+orgBlock :: OrgParser Blocks
+orgBlock = try $ do
+ (indent, blockType, args) <- blockHeader
+ blockStr <- rawBlockContent indent blockType
+ let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ]
+ case blockType of
+ "comment" -> return mempty
+ "src" -> return $ B.codeBlockWith ("", classArgs, []) blockStr
+ _ -> B.divWith ("", [blockType], [])
+ <$> (parseFromString parseBlocks blockStr)
+
+blockHeader :: OrgParser (Int, String, [String])
+blockHeader = (,,) <$> blockIndent
+ <*> blockType
+ <*> (skipSpaces *> blockArgs)
+ where blockIndent = length <$> many spaceChar
+ blockType = map toLower <$> (stringAnyCase "#+begin_" *> many letter)
+ blockArgs = manyTill (many nonspaceChar <* skipSpaces) newline
+
+rawBlockContent :: Int -> String -> OrgParser String
+rawBlockContent indent blockType =
+ unlines . map commaEscaped <$> manyTill indentedLine blockEnder
+ where
+ indentedLine = try $ choice [ blankline *> pure "\n"
+ , indentWith indent *> anyLine
+ ]
+ blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType)
+
+-- indent by specified number of spaces (or equiv. tabs)
+indentWith :: Int -> OrgParser String
+indentWith num = do
+ tabStop <- getOption readerTabStop
+ if (num < tabStop)
+ then count num (char ' ')
+ else choice [ try (count num (char ' '))
+ , try (char '\t' >> count (num - tabStop) (char ' ')) ]
+
+translateLang :: String -> String
+translateLang "sh" = "bash"
+translateLang cs = cs
+
+commaEscaped :: String -> String
+commaEscaped (',':cs@('*':_)) = cs
+commaEscaped (',':cs@('#':'+':_)) = cs
+commaEscaped cs = cs
+
+example :: OrgParser Blocks
+example = try $
+ B.codeBlockWith ("", ["example"], []) . unlines <$> many1 exampleLine
+
+exampleLine :: OrgParser String
+exampleLine = try $ string ": " *> anyLine
+
+-- Drawers for properties or a logbook
+drawer :: OrgParser Blocks
+drawer = try $ do
+ drawerStart
+ manyTill drawerLine (try drawerEnd)
+ return mempty
+
+drawerStart :: OrgParser String
+drawerStart = try $
+ skipSpaces *> drawerName <* skipSpaces <* newline
+ where drawerName = try $ char ':' *> validDrawerName <* char ':'
+ validDrawerName = stringAnyCase "PROPERTIES"
+ <|> stringAnyCase "LOGBOOK"
+
+drawerLine :: OrgParser String
+drawerLine = try $ anyLine
+
+drawerEnd :: OrgParser String
+drawerEnd = try $
+ skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline
+
+
+-- Comments, Options and Metadata
+specialLine :: OrgParser Blocks
+specialLine = try $ metaLine <|> commentLine
+
+metaLine :: OrgParser Blocks
+metaLine = try $ metaLineStart *> declarationLine
+
+commentLine :: OrgParser Blocks
+commentLine = try $ commentLineStart *> anyLine *> pure mempty
+
+-- The order, in which blocks are tried, makes sure that we're not looking at
+-- the beginning of a block, so we don't need to check for it
+metaLineStart :: OrgParser String
+metaLineStart = try $ mappend <$> many spaceChar <*> string "#+"
+
+commentLineStart :: OrgParser String
+commentLineStart = try $ mappend <$> many spaceChar <*> string "# "
+
+declarationLine :: OrgParser Blocks
+declarationLine = try $ do
+ meta' <- B.setMeta <$> metaKey <*> metaValue <*> pure nullMeta
+ updateState $ \st -> st { stateMeta = stateMeta st <> meta' }
+ return mempty
+
+metaValue :: OrgParser MetaValue
+metaValue = MetaInlines . B.toList . trimInlines <$> restOfLine
+
+metaKey :: OrgParser [Char]
+metaKey = map toLower <$> many1 (noneOf ": \n\r")
+ <* char ':'
+ <* skipSpaces
+
+-- | Headers
+header :: OrgParser Blocks
+header = try $
+ B.header <$> headerStart
+ <*> (trimInlines <$> restOfLine)
+
+headerStart :: OrgParser Int
+headerStart = try $
+ (length <$> many1 (char '*')) <* many1 (char ' ')
+
+-- Horizontal Line (five dashes or more)
+hline :: OrgParser Blocks
+hline = try $ do
+ skipSpaces
+ string "-----"
+ many (char '-')
+ skipSpaces
+ newline
+ return B.horizontalRule
+
+--
+-- Tables
+--
+
+data OrgTableRow = OrgContentRow [Blocks]
+ | OrgAlignRow [Alignment]
+ | OrgHlineRow
+ deriving (Eq, Show)
+
+type OrgTableContent = (Int, [Alignment], [Double], [Blocks], [[Blocks]])
+
+table :: OrgParser Blocks
+table = try $ do
+ lookAhead tableStart
+ (_, aligns, widths, heads, lns) <- normalizeTable . tableContent <$> tableRows
+ return $ B.table "" (zip aligns widths) heads lns
+
+tableStart :: OrgParser Char
+tableStart = try $ skipSpaces *> char '|'
+
+tableRows :: OrgParser [OrgTableRow]
+tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow)
+
+tableContentRow :: OrgParser OrgTableRow
+tableContentRow = try $
+ OrgContentRow <$> (tableStart *> manyTill tableContentCell newline)
+
+tableContentCell :: OrgParser Blocks
+tableContentCell = try $
+ B.plain . trimInlines . mconcat <$> many1Till inline (try endOfCell)
+
+endOfCell :: OrgParser Char
+-- endOfCell = char '|' <|> newline
+endOfCell = try $ char '|' <|> lookAhead newline
+
+tableAlignRow :: OrgParser OrgTableRow
+tableAlignRow = try $
+ OrgAlignRow <$> (tableStart *> manyTill tableAlignCell newline)
+
+tableAlignCell :: OrgParser Alignment
+tableAlignCell =
+ choice [ try $ emptyCell *> return (AlignDefault)
+ , try $ skipSpaces
+ *> char '<'
+ *> tableAlignFromChar
+ <* many digit
+ <* char '>'
+ <* emptyCell
+ ] <?> "alignment info"
+ where emptyCell = try $ skipSpaces *> endOfCell
+
+tableAlignFromChar :: OrgParser Alignment
+tableAlignFromChar = try $ choice [ char 'l' *> return AlignLeft
+ , char 'c' *> return AlignCenter
+ , char 'r' *> return AlignRight
+ ]
+
+tableHline :: OrgParser OrgTableRow
+tableHline = try $
+ OrgHlineRow <$ (tableStart *> char '-' *> anyLine)
+
+tableContent :: [OrgTableRow]
+ -> OrgTableContent
+tableContent = foldl' (flip rowToContent) (0, mempty, repeat 0, mempty, mempty)
+
+normalizeTable :: OrgTableContent
+ -> OrgTableContent
+normalizeTable (cols, aligns, widths, heads, lns) =
+ let aligns' = fillColumns aligns AlignDefault
+ widths' = fillColumns widths 0.0
+ heads' = if heads == mempty
+ then heads
+ else fillColumns heads (B.plain mempty)
+ lns' = map (flip fillColumns (B.plain mempty)) lns
+ fillColumns base padding = take cols $ base ++ repeat padding
+ in (cols, aligns', widths', heads', lns')
+
+
+-- One or more horizontal rules after the first content line mark the previous
+-- line as a header. All other horizontal lines are discarded.
+rowToContent :: OrgTableRow
+ -> OrgTableContent
+ -> OrgTableContent
+rowToContent OrgHlineRow = maybeBodyToHeader
+rowToContent (OrgContentRow rs) = setLongestRow rs . appendToBody rs
+rowToContent (OrgAlignRow as) = setLongestRow as . setAligns as
+
+setLongestRow :: [a]
+ -> OrgTableContent
+ -> OrgTableContent
+setLongestRow r (cols, aligns, widths, heads, lns) =
+ (max cols (length r), aligns, widths, heads, lns)
+
+maybeBodyToHeader :: OrgTableContent
+ -> OrgTableContent
+maybeBodyToHeader (cols, aligns, widths, [], b:[]) = (cols, aligns, widths, b, [])
+maybeBodyToHeader content = content
+
+appendToBody :: [Blocks]
+ -> OrgTableContent
+ -> OrgTableContent
+appendToBody r (cols, aligns, widths, heads, lns) =
+ (cols, aligns, widths, heads, lns ++ [r])
+
+setAligns :: [Alignment]
+ -> OrgTableContent
+ -> OrgTableContent
+setAligns aligns (cols, _, widths, heads, lns) =
+ (cols, aligns, widths, heads, lns)
+
+-- Paragraphs or Plain text
+paraOrPlain :: OrgParser Blocks
+paraOrPlain = try $
+ trimInlines . mconcat
+ <$> many1 inline
+ <**> option B.plain
+ (try $ newline *> pure B.para)
+
+restOfLine :: OrgParser Inlines
+restOfLine = mconcat <$> manyTill inline newline
+
+
+--
+-- list blocks
+--
+
+list :: OrgParser Blocks
+list = choice [ bulletList, orderedList ] <?> "list"
+
+bulletList :: OrgParser Blocks
+bulletList = B.bulletList . compactify' <$> many1 (listItem bulletListStart)
+
+orderedList :: OrgParser Blocks
+orderedList = B.orderedList . compactify' <$> many1 (listItem orderedListStart)
+
+genericListStart :: OrgParser String
+ -> OrgParser Int
+genericListStart listMarker = try $
+ (+) <$> (length <$> many spaceChar)
+ <*> (length <$> listMarker <* many1 spaceChar)
+
+-- parses bullet list start and returns its length (excl. following whitespace)
+bulletListStart :: OrgParser Int
+bulletListStart = genericListStart bulletListMarker
+ where bulletListMarker = pure <$> oneOf "*-+"
+
+orderedListStart :: OrgParser Int
+orderedListStart = genericListStart orderedListMarker
+ -- Ordered list markers allowed in org-mode
+ where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)")
+
+listItem :: OrgParser Int
+ -> OrgParser Blocks
+listItem start = try $ do
+ (markerLength, first) <- try (start >>= rawListItem)
+ rest <- many (listContinuation markerLength)
+ parseFromString parseBlocks $ concat (first:rest)
+
+-- parse raw text for one list item, excluding start marker and continuations
+rawListItem :: Int
+ -> OrgParser (Int, String)
+rawListItem markerLength = try $ do
+ firstLine <- anyLine
+ restLines <- many (listLine markerLength)
+ return (markerLength, (firstLine ++ "\n" ++ (concat restLines)))
+
+-- continuation of a list item - indented and separated by blankline or endline.
+-- Note: nested lists are parsed as continuations.
+listContinuation :: Int
+ -> OrgParser String
+listContinuation markerLength = try $
+ mappend <$> many blankline
+ <*> (concat <$> many1 (listLine markerLength))
+
+-- parse a line of a list item
+listLine :: Int
+ -> OrgParser String
+listLine markerLength = try $
+ indentWith markerLength *> anyLine
+ <**> pure (++ "\n")
+
+
+--
+-- inline
+--
+
+inline :: OrgParser Inlines
+inline = choice inlineParsers <?> "inline"
+ where inlineParsers = [ whitespace
+ , link
+ , str
+ , endline
+ , emph
+ , strong
+ , strikeout
+ , underline
+ , code
+ , verbatim
+ , subscript
+ , superscript
+ , symbol
+ ]
+
+-- treat these as potentially non-text when parsing inline:
+specialChars :: [Char]
+specialChars = "\"$'()*+-./:<=>[\\]^_{|}~"
+
+
+whitespace :: OrgParser Inlines
+whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace"
+
+str :: OrgParser Inlines
+str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
+ <* updateLastStrPos
+
+-- an endline character that can be treated as a space, not a structural break
+endline :: OrgParser Inlines
+endline = try $ do
+ newline
+ notFollowedBy blankline
+ notFollowedBy' exampleLine
+ notFollowedBy' hline
+ notFollowedBy' tableStart
+ notFollowedBy' drawerStart
+ notFollowedBy' headerStart
+ notFollowedBy' metaLineStart
+ notFollowedBy' commentLineStart
+ notFollowedBy' bulletListStart
+ notFollowedBy' orderedListStart
+ return B.space
+
+link :: OrgParser Inlines
+link = explicitLink <|> selfLink <?> "link"
+
+explicitLink :: OrgParser Inlines
+explicitLink = try $ do
+ char '['
+ src <- enclosedRaw (char '[') (char ']')
+ title <- enclosedInlines (char '[') (char ']')
+ char ']'
+ return $ B.link src "" title
+
+selfLink :: OrgParser Inlines
+selfLink = try $ do
+ src <- enclosedRaw (string "[[") (string "]]")
+ return $ B.link src "" (B.str src)
+
+emph :: OrgParser Inlines
+emph = B.emph <$> inlinesEnclosedBy '/'
+
+strong :: OrgParser Inlines
+strong = B.strong <$> inlinesEnclosedBy '*'
+
+strikeout :: OrgParser Inlines
+strikeout = B.strikeout <$> inlinesEnclosedBy '+'
+
+-- There is no underline, so we use strong instead.
+underline :: OrgParser Inlines
+underline = B.strong <$> inlinesEnclosedBy '_'
+
+code :: OrgParser Inlines
+code = B.code <$> rawEnclosedBy '='
+
+verbatim :: OrgParser Inlines
+verbatim = B.rawInline "" <$> rawEnclosedBy '~'
+
+subscript :: OrgParser Inlines
+subscript = B.subscript <$> (try $ char '_' *> maybeGroupedByBraces)
+
+superscript :: OrgParser Inlines
+superscript = B.superscript <$> (try $ char '^' *> maybeGroupedByBraces)
+
+maybeGroupedByBraces :: OrgParser Inlines
+maybeGroupedByBraces = try $
+ choice [ try $ enclosedInlines (char '{') (char '}')
+ , B.str . (:"") <$> anyChar
+ ]
+
+symbol :: OrgParser Inlines
+symbol = B.str . (: "") <$> oneOf specialChars
+
+enclosedInlines :: OrgParser a
+ -> OrgParser b
+ -> OrgParser Inlines
+enclosedInlines start end = try $
+ trimInlines . mconcat <$> enclosed start end inline
+
+-- FIXME: This is a hack
+inlinesEnclosedBy :: Char
+ -> OrgParser Inlines
+inlinesEnclosedBy c = enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c)
+ (atEnd $ char c)
+
+enclosedRaw :: OrgParser a
+ -> OrgParser b
+ -> OrgParser String
+enclosedRaw start end = try $
+ start *> (onSingleLine <|> spanningTwoLines)
+ where onSingleLine = try $ many1Till (noneOf "\n\r") end
+ spanningTwoLines = try $
+ anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine
+
+rawEnclosedBy :: Char
+ -> OrgParser String
+rawEnclosedBy c = enclosedRaw (atStart $ char c) (atEnd $ char c)
+
+-- succeeds only if we're not right after a str (ie. in middle of word)
+atStart :: OrgParser a -> OrgParser a
+atStart p = do
+ pos <- getPosition
+ st <- getState
+ guard $ stateLastStrPos st /= Just pos
+ p
+
+-- | succeeds only if we're at the end of a word
+atEnd :: OrgParser a -> OrgParser a
+atEnd p = try $ p <* lookingAtEndOfWord
+ where lookingAtEndOfWord = lookAhead . oneOf $ postWordChars
+
+postWordChars :: [Char]
+postWordChars = "\t\n\r !\"'),-.:?}"
+
+-- FIXME: These functions are hacks and should be replaced
+endsOnThisOrNextLine :: Char
+ -> OrgParser ()
+endsOnThisOrNextLine c = do
+ inp <- getInput
+ let doOtherwise = \rest -> endsOnThisLine rest c (const mzero)
+ endsOnThisLine inp c doOtherwise
+
+endsOnThisLine :: [Char]
+ -> Char
+ -> ([Char] -> OrgParser ())
+ -> OrgParser ()
+endsOnThisLine input c doOnOtherLines = do
+ case break (`elem` c:"\n") input of
+ (_,'\n':rest) -> doOnOtherLines rest
+ (_,_:rest@(n:_)) -> if n `elem` postWordChars
+ then return ()
+ else endsOnThisLine rest c doOnOtherLines
+ _ -> mzero
+
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 32893128a..127eae167 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -36,12 +36,13 @@ import Text.Pandoc.Builder (setMeta, fromList)
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
import Text.Pandoc.Options
-import Control.Monad ( when, liftM, guard, mzero )
+import Control.Monad ( when, liftM, guard, mzero, mplus )
import Data.List ( findIndex, intersperse, intercalate,
transpose, sort, deleteFirstsBy, isSuffixOf )
+import Data.Maybe (fromMaybe)
import qualified Data.Map as M
import Text.Printf ( printf )
-import Control.Applicative ((<$>), (<$), (<*), (*>))
+import Control.Applicative ((<$>), (<$), (<*), (*>), (<*>))
import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
import qualified Text.Pandoc.Builder as B
import Data.Monoid (mconcat, mempty)
@@ -347,14 +348,25 @@ lhsCodeBlock = try $ do
getPosition >>= guard . (==1) . sourceColumn
guardEnabled Ext_literate_haskell
optional codeBlockStart
- lns <- many1 birdTrackLine
- -- if (as is normal) there is always a space after >, drop it
- let lns' = if all (\ln -> null ln || take 1 ln == " ") lns
- then map (drop 1) lns
- else lns
+ lns <- latexCodeBlock <|> birdCodeBlock
blanklines
return $ B.codeBlockWith ("", ["sourceCode", "literate", "haskell"], [])
- $ intercalate "\n" lns'
+ $ intercalate "\n" lns
+
+latexCodeBlock :: Parser [Char] st [[Char]]
+latexCodeBlock = try $ do
+ try (latexBlockLine "\\begin{code}")
+ many1Till anyLine (try $ latexBlockLine "\\end{code}")
+ where
+ latexBlockLine s = skipMany spaceChar >> string s >> blankline
+
+birdCodeBlock :: Parser [Char] st [[Char]]
+birdCodeBlock = filterSpace <$> many1 birdTrackLine
+ where filterSpace lns =
+ -- if (as is normal) there is always a space after >, drop it
+ if all (\ln -> null ln || take 1 ln == " ") lns
+ then map (drop 1) lns
+ else lns
birdTrackLine :: Parser [Char] st [Char]
birdTrackLine = char '>' >> anyLine
@@ -519,7 +531,7 @@ directive' = do
let body' = body ++ "\n\n"
case label of
"raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body)
- "role" -> return mempty
+ "role" -> addNewRole top $ map (\(k,v) -> (k, trim v)) fields
"container" -> parseFromString parseBlocks body'
"replace" -> B.para <$> -- consumed by substKey
parseFromString (trimInlines . mconcat <$> many inline)
@@ -569,7 +581,7 @@ directive' = do
"figure" -> do
(caption, legend) <- parseFromString extractCaption body'
let src = escapeURI $ trim top
- return $ B.para (B.image src "" caption) <> legend
+ return $ B.para (B.image src "fig:" caption) <> legend
"image" -> do
let src = escapeURI $ trim top
let alt = B.str $ maybe "image" trim $ lookup "alt" fields
@@ -580,7 +592,38 @@ directive' = do
Nothing -> B.image src "" alt
_ -> return mempty
--- Can contain haracter codes as decimal numbers or
+-- TODO:
+-- - Silently ignores illegal fields
+-- - Silently drops classes
+-- - Only supports :format: fields with a single format for :raw: roles,
+-- change Text.Pandoc.Definition.Format to fix
+addNewRole :: String -> [(String, String)] -> RSTParser Blocks
+addNewRole roleString fields = do
+ (role, parentRole) <- parseFromString inheritedRole roleString
+ customRoles <- stateRstCustomRoles <$> getState
+ baseRole <- case M.lookup parentRole customRoles of
+ Just (base, _, _) -> return base
+ Nothing -> return parentRole
+
+ let fmt = if baseRole == "raw" then lookup "format" fields else Nothing
+ annotate = maybe id addLanguage $
+ if baseRole == "code"
+ then lookup "language" fields
+ else Nothing
+
+ updateState $ \s -> s {
+ stateRstCustomRoles =
+ M.insert role (baseRole, fmt, (,) parentRole . annotate) customRoles
+ }
+
+ return $ B.singleton Null
+ where
+ addLanguage lang (ident, classes, keyValues) =
+ (ident, "sourceCode" : lang : classes, keyValues)
+ inheritedRole =
+ (,) <$> roleNameEndingIn (char '(') <*> roleNameEndingIn (char ')')
+
+-- Can contain character codes as decimal numbers or
-- hexadecimal numbers, prefixed by 0x, x, \x, U+, u, or \u
-- or as XML-style hexadecimal character entities, e.g. &#x1a2b;
-- or text, which is used as-is. Comments start with ..
@@ -919,17 +962,56 @@ strong = B.strong . trimInlines . mconcat <$>
-- Note, this doesn't precisely implement the complex rule in
-- http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
-- but it should be good enough for most purposes
+--
+-- TODO:
+-- - Classes are silently discarded in addNewRole
+-- - Lacks sensible implementation for title-reference (which is the default)
+-- - Allows direct use of the :raw: role, rST only allows inherited use.
interpretedRole :: RSTParser Inlines
interpretedRole = try $ do
(role, contents) <- roleBefore <|> roleAfter
- case role of
- "sup" -> return $ B.superscript $ B.str contents
- "sub" -> return $ B.subscript $ B.str contents
- "math" -> return $ B.math contents
- _ -> return $ B.str contents --unknown
+ renderRole contents Nothing role nullAttr
+
+renderRole :: String -> Maybe String -> String -> Attr -> RSTParser Inlines
+renderRole contents fmt role attr = case role of
+ "sup" -> return $ B.superscript $ B.str contents
+ "superscript" -> return $ B.superscript $ B.str contents
+ "sub" -> return $ B.subscript $ B.str contents
+ "subscript" -> return $ B.subscript $ B.str contents
+ "emphasis" -> return $ B.emph $ B.str contents
+ "strong" -> return $ B.strong $ B.str contents
+ "rfc-reference" -> return $ rfcLink contents
+ "RFC" -> return $ rfcLink contents
+ "pep-reference" -> return $ pepLink contents
+ "PEP" -> return $ pepLink contents
+ "literal" -> return $ B.str contents
+ "math" -> return $ B.math contents
+ "title-reference" -> titleRef contents
+ "title" -> titleRef contents
+ "t" -> titleRef contents
+ "code" -> return $ B.codeWith attr contents
+ "raw" -> return $ B.rawInline (fromMaybe "" fmt) contents
+ custom -> do
+ customRole <- stateRstCustomRoles <$> getState
+ case M.lookup custom customRole of
+ Just (_, newFmt, inherit) -> let
+ fmtStr = fmt `mplus` newFmt
+ (newRole, newAttr) = inherit attr
+ in renderRole contents fmtStr newRole newAttr
+ Nothing -> return $ B.str contents -- Undefined role
+ where
+ titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour
+ rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo)
+ where rfcUrl = "http://www.faqs.org/rfcs/rfc" ++ rfcNo ++ ".html"
+ pepLink pepNo = B.link pepUrl ("PEP " ++ pepNo) $ B.str ("PEP " ++ pepNo)
+ where padNo = replicate (4 - length pepNo) '0' ++ pepNo
+ pepUrl = "http://http://www.python.org/dev/peps/pep-" ++ padNo ++ "/"
+
+roleNameEndingIn :: RSTParser Char -> RSTParser String
+roleNameEndingIn end = many1Till (letter <|> char '-') end
roleMarker :: RSTParser String
-roleMarker = char ':' *> many1Till (letter <|> char '-') (char ':')
+roleMarker = char ':' *> roleNameEndingIn (char ':')
roleBefore :: RSTParser (String,String)
roleBefore = try $ do
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 23e07f621..93658cdea 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -594,7 +594,7 @@ surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try bo
simpleInline :: Parser [Char] ParserState t -- ^ surrounding parser
-> ([Inline] -> Inline) -- ^ Inline constructor
-> Parser [Char] ParserState Inline -- ^ content parser (to be used repeatedly)
-simpleInline border construct = surrounded border (inlineWithAttribute) >>=
+simpleInline border construct = surrounded border inlineWithAttribute >>=
return . construct . normalizeSpaces
where inlineWithAttribute = (try $ optional attributes) >> inline
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
index 6112e764f..7fc9c2966 100644
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -50,14 +50,16 @@ isOk c = isAscii c && isAlphaNum c
convertTag :: Maybe FilePath -> Tag String -> IO (Tag String)
convertTag userdata t@(TagOpen tagname as)
- | tagname `elem` ["img", "embed", "video", "input", "audio", "source"] =
- case fromAttrib "src" t of
- [] -> return t
- src -> do
- (raw, mime) <- getRaw userdata (fromAttrib "type" t) src
- let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw)
- return $ TagOpen tagname
- (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"])
+ | tagname `elem` ["img", "embed", "video", "input", "audio", "source"] = do
+ as' <- mapM processAttribute as
+ return $ TagOpen tagname as'
+ where processAttribute (x,y) =
+ if x == "src" || x == "href" || x == "poster"
+ then do
+ (raw, mime) <- getRaw userdata (fromAttrib "type" t) y
+ let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw)
+ return (x, enc)
+ else return (x,y)
convertTag userdata t@(TagOpen "script" as) =
case fromAttrib "src" t of
[] -> return t
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 8dcd88148..714402e42 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -91,7 +91,8 @@ import Data.Char ( toLower, isLower, isUpper, isAlpha,
isLetter, isDigit, isSpace )
import Data.List ( find, isPrefixOf, intercalate )
import qualified Data.Map as M
-import Network.URI ( escapeURIString, isURI, unEscapeString )
+import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo,
+ unEscapeString, parseURIReference )
import System.Directory
import Text.Pandoc.MIME (getMimeType)
import System.FilePath ( (</>), takeExtension, dropExtension )
@@ -121,6 +122,7 @@ import Data.ByteString.Lazy (toChunks)
import Network.HTTP.Conduit (httpLbs, parseUrl, withManager,
responseBody, responseHeaders)
import Network.HTTP.Types.Header ( hContentType)
+import Network (withSocketsDo)
#else
import Network.URI (parseURI)
import Network.HTTP (findHeader, rspBody,
@@ -270,7 +272,7 @@ normalizeDate s = fmap (formatTime defaultTimeLocale "%F")
(msum $ map (\fs -> parsetimeWith fs s) formats :: Maybe Day)
where parsetimeWith = parseTime defaultTimeLocale
formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y",
- "%d %B %Y", "%b. %d, %Y", "%B %d, %Y"]
+ "%d %B %Y", "%b. %d, %Y", "%B %d, %Y", "%Y"]
--
-- Pandoc block and inline list processing
@@ -531,7 +533,7 @@ headerShift n = walk shift
-- | Detect if a list is tight.
isTightList :: [[Block]] -> Bool
-isTightList = and . map firstIsPlain
+isTightList = all firstIsPlain
where firstIsPlain (Plain _ : _) = True
firstIsPlain _ = False
@@ -563,14 +565,10 @@ makeMeta title authors date =
-- | Render HTML tags.
renderTags' :: [Tag String] -> String
renderTags' = renderTagsOptions
- renderOptions{ optMinimize = \x ->
- let y = map toLower x
- in y == "hr" || y == "br" ||
- y == "img" || y == "meta" ||
- y == "link"
- , optRawTag = \x ->
- let y = map toLower x
- in y == "script" || y == "style" }
+ renderOptions{ optMinimize = matchTags ["hr", "br", "img",
+ "meta", "link"]
+ , optRawTag = matchTags ["script", "style"] }
+ where matchTags = \tags -> flip elem tags . map toLower
--
-- File handling
@@ -626,9 +624,13 @@ fetchItem :: Maybe String -> String
-> IO (Either E.SomeException (BS.ByteString, Maybe String))
fetchItem sourceURL s
| isURI s = openURL s
- | otherwise = case sourceURL of
- Just u -> openURL (u ++ "/" ++ s)
- Nothing -> E.try readLocalFile
+ | otherwise =
+ case sourceURL >>= parseURIReference of
+ Just u -> case parseURIReference s of
+ Just s' -> openURL $ show $
+ s' `nonStrictRelativeTo` u
+ Nothing -> openURL $ show u ++ "/" ++ s
+ Nothing -> E.try readLocalFile
where readLocalFile = do
let mime = case takeExtension s of
".gz" -> getMimeType $ dropExtension s
@@ -644,7 +646,7 @@ openURL u
contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u
in return $ Right (decodeLenient contents, Just mime)
#ifdef HTTP_CONDUIT
- | otherwise = E.try $ do
+ | otherwise = withSocketsDo $ E.try $ do
req <- parseUrl u
resp <- withManager $ httpLbs req
return (BS.concat $ toChunks $ responseBody resp,
diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
index ad8838f72..52625abf6 100644
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@@ -190,6 +190,7 @@ resolveVar var' val =
Just (String t) -> T.stripEnd t
Just (Number n) -> T.pack $ show n
Just (Bool True) -> "true"
+ Just (Object _) -> "true"
Just _ -> mempty
Nothing -> mempty
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 179d9bc5b..3095cf508 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -130,7 +130,7 @@ blockToConTeXt (Plain lst) = inlineListToConTeXt lst
-- title beginning with fig: indicates that the image is a figure
blockToConTeXt (Para [Image txt (src,'f':'i':'g':':':_)]) = do
capt <- inlineListToConTeXt txt
- return $ blankline $$ "\\placefigure[here]" <> braces capt <>
+ return $ blankline $$ "\\placefigure" <> braces capt <>
braces ("\\externalfigure" <> brackets (text src)) <> blankline
blockToConTeXt (Para lst) = do
contents <- inlineListToConTeXt lst
@@ -205,9 +205,9 @@ blockToConTeXt (Table caption aligns widths heads rows) = do
else liftM ($$ "\\HL") $ tableRowToConTeXt heads
captionText <- inlineListToConTeXt caption
rows' <- mapM tableRowToConTeXt rows
- return $ "\\placetable" <> brackets ("here" <> if null caption
- then ",none"
- else "")
+ return $ "\\placetable" <> (if null caption
+ then brackets "none"
+ else empty)
<> braces captionText $$
"\\starttable" <> brackets (text colDescriptors) $$
"\\HL" $$ headers $$
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 5c7341b69..2a834c2da 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -29,7 +29,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' documents to docx.
-}
module Text.Pandoc.Writers.Docx ( writeDocx ) where
-import Data.List ( intercalate, groupBy )
+import Data.Maybe (fromMaybe)
+import Data.List ( intercalate, isPrefixOf, isSuffixOf )
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Lazy.Char8 as BL8
@@ -42,6 +43,7 @@ import Text.Pandoc.Definition
import Text.Pandoc.Generic
import Text.Pandoc.ImageSize
import Text.Pandoc.Shared hiding (Element)
+import Text.Pandoc.Writers.Shared (fixDisplayMath)
import Text.Pandoc.Options
import Text.Pandoc.Readers.TeXMath
import Text.Pandoc.Highlighting ( highlight )
@@ -130,7 +132,8 @@ writeDocx opts doc@(Pandoc meta _) = do
let mkOverrideNode (part', contentType') = mknode "Override"
[("PartName",part'),("ContentType",contentType')] ()
let mkImageOverride (_, imgpath, mbMimeType, _, _) =
- mkOverrideNode ("/word/" ++ imgpath, maybe "application/octet-stream" id mbMimeType)
+ mkOverrideNode ("/word/" ++ imgpath,
+ fromMaybe "application/octet-stream" mbMimeType)
let overrides = map mkOverrideNode
[("/word/webSettings.xml",
"application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml")
@@ -260,6 +263,12 @@ writeDocx opts doc@(Pandoc meta _) = do
fontTableEntry <- entryFromArchive "word/fontTable.xml"
settingsEntry <- entryFromArchive "word/settings.xml"
webSettingsEntry <- entryFromArchive "word/webSettings.xml"
+ let miscRels = [ f | f <- filesInArchive refArchive
+ , "word/_rels/" `isPrefixOf` f
+ , ".xml.rels" `isSuffixOf` f
+ , f /= "word/_rels/document.xml.rels"
+ , f /= "word/_rels/footnotes.xml.rels" ]
+ miscRelEntries <- mapM entryFromArchive miscRels
-- Create archive
let archive = foldr addEntryToArchive emptyArchive $
@@ -267,7 +276,7 @@ writeDocx opts doc@(Pandoc meta _) = do
footnoteRelEntry : numEntry : styleEntry : footnotesEntry :
docPropsEntry : docPropsAppEntry : themeEntry :
fontTableEntry : settingsEntry : webSettingsEntry :
- imageEntries
+ imageEntries ++ miscRelEntries
return $ fromArchive archive
styleToOpenXml :: Style -> [Element]
@@ -322,7 +331,7 @@ mkNum markers marker numid =
NumberMarker _ _ start ->
map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))]
$ mknode "w:startOverride" [("w:val",show start)] ()) [0..6]
- where absnumid = maybe 0 id $ M.lookup marker markers
+ where absnumid = fromMaybe 0 $ M.lookup marker markers
mkAbstractNum :: (ListMarker,Int) -> IO Element
mkAbstractNum (marker,numid) = do
@@ -807,30 +816,8 @@ br = mknode "w:r" [] [mknode "w:br" [("w:type","textWrapping")] () ]
parseXml :: Archive -> String -> IO Element
parseXml refArchive relpath =
- case (findEntryByPath relpath refArchive >>= parseXMLDoc . UTF8.toStringLazy . fromEntry) of
- Just d -> return d
+ case findEntryByPath relpath refArchive of
+ Just e -> case parseXMLDoc $ UTF8.toStringLazy $ fromEntry e of
+ Just d -> return d
+ Nothing -> fail $ relpath ++ " corrupt in reference docx"
Nothing -> fail $ relpath ++ " missing in reference docx"
-
-isDisplayMath :: Inline -> Bool
-isDisplayMath (Math DisplayMath _) = True
-isDisplayMath _ = False
-
-stripLeadingTrailingSpace :: [Inline] -> [Inline]
-stripLeadingTrailingSpace = go . reverse . go . reverse
- where go (Space:xs) = xs
- go xs = xs
-
-fixDisplayMath :: Block -> Block
-fixDisplayMath (Plain lst)
- | any isDisplayMath lst && not (all isDisplayMath lst) =
- -- chop into several paragraphs so each displaymath is its own
- Div ("",["math"],[]) $ map (Plain . stripLeadingTrailingSpace) $
- groupBy (\x y -> (isDisplayMath x && isDisplayMath y) ||
- not (isDisplayMath x || isDisplayMath y)) lst
-fixDisplayMath (Para lst)
- | any isDisplayMath lst && not (all isDisplayMath lst) =
- -- chop into several paragraphs so each displaymath is its own
- Div ("",["math"],[]) $ map (Para . stripLeadingTrailingSpace) $
- groupBy (\x y -> (isDisplayMath x && isDisplayMath y) ||
- not (isDisplayMath x || isDisplayMath y)) lst
-fixDisplayMath x = x
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index ef4c7be23..dae45b90f 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE PatternGuards, CPP #-}
+{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables #-}
{-
Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu>
@@ -30,16 +30,18 @@ Conversion of 'Pandoc' documents to EPUB.
-}
module Text.Pandoc.Writers.EPUB ( writeEPUB ) where
import Data.IORef
-import Data.Maybe ( fromMaybe, isNothing )
+import qualified Data.Map as M
+import Data.Maybe ( fromMaybe )
import Data.List ( isInfixOf, intercalate )
import System.Environment ( getEnv )
import Text.Printf (printf)
import System.FilePath ( (</>), takeBaseName, takeExtension, takeFileName )
import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Lazy.Char8 as B8
-import Text.Pandoc.UTF8 ( fromStringLazy, toString )
+import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.SelfContained ( makeSelfContained )
import Codec.Archive.Zip
+import Control.Applicative ((<$>))
import Data.Time.Clock.POSIX
import Data.Time
import System.Locale
@@ -54,7 +56,7 @@ import Text.XML.Light hiding (ppTopElement)
import Text.Pandoc.UUID
import Text.Pandoc.Writers.HTML
import Text.Pandoc.Writers.Markdown ( writePlain )
-import Data.Char ( toLower )
+import Data.Char ( toLower, isDigit )
import Network.URI ( unEscapeString )
import Text.Pandoc.MIME (getMimeType)
#if MIN_VERSION_base(4,6,0)
@@ -63,6 +65,7 @@ import Prelude hiding (catch)
#endif
import Control.Exception (catch, SomeException)
import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
+import Text.HTML.TagSoup
-- A Chapter includes a list of blocks and maybe a section
-- number offset. Note, some chapters are unnumbered. The section
@@ -70,12 +73,237 @@ import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
-- in filenames, chapter0003.xhtml.
data Chapter = Chapter (Maybe [Int]) [Block]
+data EPUBMetadata = EPUBMetadata{
+ epubIdentifier :: [Identifier]
+ , epubTitle :: [Title]
+ , epubDate :: String
+ , epubLanguage :: String
+ , epubCreator :: [Creator]
+ , epubContributor :: [Creator]
+ , epubSubject :: [String]
+ , epubDescription :: Maybe String
+ , epubType :: Maybe String
+ , epubFormat :: Maybe String
+ , epubPublisher :: Maybe String
+ , epubSource :: Maybe String
+ , epubRelation :: Maybe String
+ , epubCoverage :: Maybe String
+ , epubRights :: Maybe String
+ , epubCoverImage :: Maybe String
+ , epubStylesheet :: Maybe Stylesheet
+ } deriving Show
+
+data Stylesheet = StylesheetPath FilePath
+ | StylesheetContents String
+ deriving Show
+
+data Creator = Creator{
+ creatorText :: String
+ , creatorRole :: Maybe String
+ , creatorFileAs :: Maybe String
+ } deriving Show
+
+data Identifier = Identifier{
+ identifierText :: String
+ , identifierScheme :: Maybe String
+ } deriving Show
+
+data Title = Title{
+ titleText :: String
+ , titleFileAs :: Maybe String
+ , titleType :: Maybe String
+ } deriving Show
+
+dcName :: String -> QName
+dcName n = QName n Nothing (Just "dc")
+
+dcNode :: Node t => String -> t -> Element
+dcNode = node . dcName
+
+opfName :: String -> QName
+opfName n = QName n Nothing (Just "opf")
+
+plainify :: [Inline] -> String
+plainify t =
+ trimr $ writePlain def{ writerStandalone = False }
+ $ Pandoc nullMeta [Plain $ walk removeNote t]
+
+removeNote :: Inline -> Inline
+removeNote (Note _) = Str ""
+removeNote x = x
+
+getEPUBMetadata :: WriterOptions -> Meta -> IO EPUBMetadata
+getEPUBMetadata opts meta = do
+ let md = metadataFromMeta opts meta
+ let elts = onlyElems $ parseXML $ writerEpubMetadata opts
+ let md' = foldr addMetadataFromXML md elts
+ let addIdentifier m =
+ if null (epubIdentifier m)
+ then do
+ randomId <- fmap show getRandomUUID
+ return $ m{ epubIdentifier = [Identifier randomId Nothing] }
+ else return m
+ let addLanguage m =
+ if null (epubLanguage m)
+ then case lookup "lang" (writerVariables opts) of
+ Just x -> return m{ epubLanguage = x }
+ Nothing -> do
+ localeLang <- catch (liftM
+ (map (\c -> if c == '_' then '-' else c) .
+ takeWhile (/='.')) $ getEnv "LANG")
+ (\e -> let _ = (e :: SomeException) in return "en-US")
+ return m{ epubLanguage = localeLang }
+ else return m
+ let fixDate m =
+ if null (epubDate m)
+ then do
+ currentTime <- getCurrentTime
+ return $ m{ epubDate = showDateTimeISO8601 currentTime }
+ else return m
+ let addAuthor m =
+ if any (\c -> creatorRole c == Just "aut") $ epubCreator m
+ then return m
+ else do
+ let authors' = map plainify $ docAuthors meta
+ let toAuthor name = Creator{ creatorText = name
+ , creatorRole = Just "aut"
+ , creatorFileAs = Nothing }
+ return $ m{ epubCreator = map toAuthor authors' ++ epubCreator m }
+ addIdentifier md' >>= fixDate >>= addAuthor >>= addLanguage
+
+addMetadataFromXML :: Element -> EPUBMetadata -> EPUBMetadata
+addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md
+ | name == "identifier" = md{ epubIdentifier =
+ Identifier{ identifierText = strContent e
+ , identifierScheme = lookupAttr (opfName "scheme") attrs
+ } : epubIdentifier md }
+ | name == "title" = md{ epubTitle =
+ Title{ titleText = strContent e
+ , titleFileAs = getAttr "file-as"
+ , titleType = getAttr "type"
+ } : epubTitle md }
+ | name == "date" = md{ epubDate = fromMaybe "" $ normalizeDate'
+ $ strContent e }
+ | name == "language" = md{ epubLanguage = strContent e }
+ | name == "creator" = md{ epubCreator =
+ Creator{ creatorText = strContent e
+ , creatorRole = getAttr "role"
+ , creatorFileAs = getAttr "file-as"
+ } : epubCreator md }
+ | name == "contributor" = md{ epubContributor =
+ Creator { creatorText = strContent e
+ , creatorRole = getAttr "role"
+ , creatorFileAs = getAttr "file-as"
+ } : epubContributor md }
+ | name == "subject" = md{ epubSubject = strContent e : epubSubject md }
+ | name == "description" = md { epubDescription = Just $ strContent e }
+ | name == "type" = md { epubType = Just $ strContent e }
+ | name == "format" = md { epubFormat = Just $ strContent e }
+ | name == "type" = md { epubType = Just $ strContent e }
+ | name == "publisher" = md { epubPublisher = Just $ strContent e }
+ | name == "source" = md { epubSource = Just $ strContent e }
+ | name == "relation" = md { epubRelation = Just $ strContent e }
+ | name == "coverage" = md { epubCoverage = Just $ strContent e }
+ | name == "rights" = md { epubRights = Just $ strContent e }
+ | otherwise = md
+ where getAttr n = lookupAttr (opfName n) attrs
+addMetadataFromXML _ md = md
+
+metaValueToString :: MetaValue -> String
+metaValueToString (MetaString s) = s
+metaValueToString (MetaInlines ils) = plainify ils
+metaValueToString (MetaBlocks bs) = plainify $ query (:[]) bs
+metaValueToString (MetaBool b) = show b
+metaValueToString _ = ""
+
+getList :: String -> Meta -> (MetaValue -> a) -> [a]
+getList s meta handleMetaValue =
+ case lookupMeta s meta of
+ Just (MetaList xs) -> map handleMetaValue xs
+ Just mv -> [handleMetaValue mv]
+ Nothing -> []
+
+getIdentifier :: Meta -> [Identifier]
+getIdentifier meta = getList "identifier" meta handleMetaValue
+ where handleMetaValue (MetaMap m) =
+ Identifier{ identifierText = maybe "" metaValueToString
+ $ M.lookup "text" m
+ , identifierScheme = metaValueToString <$>
+ M.lookup "scheme" m }
+ handleMetaValue mv = Identifier (metaValueToString mv) Nothing
+
+getTitle :: Meta -> [Title]
+getTitle meta = getList "title" meta handleMetaValue
+ where handleMetaValue (MetaMap m) =
+ Title{ titleText = maybe "" metaValueToString $ M.lookup "text" m
+ , titleFileAs = metaValueToString <$> M.lookup "file-as" m
+ , titleType = metaValueToString <$> M.lookup "type" m }
+ handleMetaValue mv = Title (metaValueToString mv) Nothing Nothing
+
+getCreator :: String -> Meta -> [Creator]
+getCreator s meta = getList s meta handleMetaValue
+ where handleMetaValue (MetaMap m) =
+ Creator{ creatorText = maybe "" metaValueToString $ M.lookup "text" m
+ , creatorFileAs = metaValueToString <$> M.lookup "file-as" m
+ , creatorRole = metaValueToString <$> M.lookup "role" m }
+ handleMetaValue mv = Creator (metaValueToString mv) Nothing Nothing
+
+simpleList :: String -> Meta -> [String]
+simpleList s meta =
+ case lookupMeta s meta of
+ Just (MetaList xs) -> map metaValueToString xs
+ Just x -> [metaValueToString x]
+ Nothing -> []
+
+metadataFromMeta :: WriterOptions -> Meta -> EPUBMetadata
+metadataFromMeta opts meta = EPUBMetadata{
+ epubIdentifier = identifiers
+ , epubTitle = titles
+ , epubDate = date
+ , epubLanguage = language
+ , epubCreator = creators
+ , epubContributor = contributors
+ , epubSubject = subjects
+ , epubDescription = description
+ , epubType = epubtype
+ , epubFormat = format
+ , epubPublisher = publisher
+ , epubSource = source
+ , epubRelation = relation
+ , epubCoverage = coverage
+ , epubRights = rights
+ , epubCoverImage = coverImage
+ , epubStylesheet = stylesheet
+ }
+ where identifiers = getIdentifier meta
+ titles = getTitle meta
+ date = fromMaybe "" $
+ (metaValueToString <$> lookupMeta "date" meta) >>= normalizeDate'
+ language = maybe "" metaValueToString $
+ lookupMeta "language" meta `mplus` lookupMeta "lang" meta
+ creators = getCreator "creator" meta
+ contributors = getCreator "contributor" meta
+ subjects = simpleList "subject" meta
+ description = metaValueToString <$> lookupMeta "description" meta
+ epubtype = metaValueToString <$> lookupMeta "type" meta
+ format = metaValueToString <$> lookupMeta "format" meta
+ publisher = metaValueToString <$> lookupMeta "publisher" meta
+ source = metaValueToString <$> lookupMeta "source" meta
+ relation = metaValueToString <$> lookupMeta "relation" meta
+ coverage = metaValueToString <$> lookupMeta "coverage" meta
+ rights = metaValueToString <$> lookupMeta "rights" meta
+ coverImage = lookup "epub-cover-image" (writerVariables opts) `mplus`
+ (metaValueToString <$> lookupMeta "cover-image" meta)
+ stylesheet = (StylesheetContents <$> writerEpubStylesheet opts) `mplus`
+ ((StylesheetPath . metaValueToString) <$>
+ lookupMeta "stylesheet" meta)
+
-- | Produce an EPUB file from a Pandoc document.
writeEPUB :: WriterOptions -- ^ Writer options
-> Pandoc -- ^ Document to convert
-> IO B.ByteString
writeEPUB opts doc@(Pandoc meta _) = do
- let version = maybe EPUB2 id (writerEpubVersion opts)
+ let version = fromMaybe EPUB2 (writerEpubVersion opts)
let epub3 = version == EPUB3
epochtime <- floor `fmap` getPOSIXTime
let mkEntry path content = toEntry path epochtime content
@@ -93,11 +321,11 @@ writeEPUB opts doc@(Pandoc meta _) = do
then MathML Nothing
else writerHTMLMathMethod opts
, writerWrapText = False }
- let mbCoverImage = lookup "epub-cover-image" vars
+ metadata <- getEPUBMetadata opts' meta
-- cover page
(cpgEntry, cpicEntry) <-
- case mbCoverImage of
+ case epubCoverImage metadata of
Nothing -> return ([],[])
Just img -> do
let coverImage = "cover-image" ++ takeExtension img
@@ -114,15 +342,15 @@ writeEPUB opts doc@(Pandoc meta _) = do
let tpEntry = mkEntry "title_page.xhtml" tpContent
-- handle pictures
- picsRef <- newIORef []
- Pandoc _ blocks <- walkM
- (transformInline opts' picsRef) doc
- pics <- readIORef picsRef
+ mediaRef <- newIORef []
+ Pandoc _ blocks <- walkM (transformInline opts' mediaRef) doc >>=
+ walkM (transformBlock opts' mediaRef)
+ pics <- readIORef mediaRef
let readPicEntry entries (oldsrc, newsrc) = do
res <- fetchItem (writerSourceURL opts') oldsrc
case res of
Left _ -> do
- warn $ "Could not find image `" ++ oldsrc ++ "', skipping..."
+ warn $ "Could not find media `" ++ oldsrc ++ "', skipping..."
return entries
Right (img,_) -> return $
(toEntry newsrc epochtime $ B.fromChunks . (:[]) $ img) : entries
@@ -179,64 +407,60 @@ writeEPUB opts doc@(Pandoc meta _) = do
chapToEntry num (Chapter mbnum bs) = mkEntry (showChapter num)
$ renderHtml
$ writeHtml opts'{ writerNumberOffset =
- maybe [] id mbnum }
+ fromMaybe [] mbnum }
$ case bs of
(Header _ _ xs : _) ->
- Pandoc (setMeta "title" (fromList xs) nullMeta) bs
+ -- remove notes or we get doubled footnotes
+ Pandoc (setMeta "title" (walk removeNote $ fromList xs)
+ nullMeta) bs
_ ->
Pandoc nullMeta bs
let chapterEntries = zipWith chapToEntry [1..] chapters
-- incredibly inefficient (TODO):
- let containsMathML ent = "<math" `isInfixOf` (B8.unpack $ fromEntry ent)
+ let containsMathML ent = epub3 &&
+ "<math" `isInfixOf` (B8.unpack $ fromEntry ent)
+ let containsSVG ent = epub3 &&
+ "<svg" `isInfixOf` (B8.unpack $ fromEntry ent)
+ let props ent = ["mathml" | containsMathML ent] ++ ["svg" | containsSVG ent]
-- contents.opf
- localeLang <- catch (liftM (map (\c -> if c == '_' then '-' else c) .
- takeWhile (/='.')) $ getEnv "LANG")
- (\e -> let _ = (e :: SomeException) in return "en-US")
- let lang = case lookup "lang" (writerVariables opts') of
- Just x -> x
- Nothing -> localeLang
- let userNodes = onlyElems $ parseXML $ writerEpubMetadata opts'
- let mbIdent = findElement (QName "identifier" Nothing (Just "dc"))
- $ unode "dummy" ! [] $ userNodes
- uuid <- case mbIdent of
- Just id' -> return $ trim $ strContent id'
- Nothing -> fmap show getRandomUUID
let chapterNode ent = unode "item" !
([("id", takeBaseName $ eRelativePath ent),
("href", eRelativePath ent),
("media-type", "application/xhtml+xml")]
- ++ [("properties","mathml") | epub3 &&
- containsMathML ent]) $ ()
+ ++ case props ent of
+ [] -> []
+ xs -> [("properties", unwords xs)])
+ $ ()
let chapterRefNode ent = unode "itemref" !
[("idref", takeBaseName $ eRelativePath ent)] $ ()
let pictureNode ent = unode "item" !
[("id", takeBaseName $ eRelativePath ent),
("href", eRelativePath ent),
("media-type", fromMaybe "application/octet-stream"
- $ imageTypeOf $ eRelativePath ent)] $ ()
+ $ mediaTypeOf $ eRelativePath ent)] $ ()
let fontNode ent = unode "item" !
[("id", takeBaseName $ eRelativePath ent),
("href", eRelativePath ent),
- ("media-type", maybe "" id $ getMimeType $ eRelativePath ent)] $ ()
- let plainify t = trimr $
- writePlain opts'{ writerStandalone = False } $
- Pandoc meta [Plain t]
- let plainTitle = plainify $ docTitle meta
- let plainAuthors = map plainify $ docAuthors meta
+ ("media-type", fromMaybe "" $ getMimeType $ eRelativePath ent)] $ ()
+ let plainTitle = case docTitle meta of
+ [] -> case epubTitle metadata of
+ [] -> "UNTITLED"
+ (x:_) -> titleText x
+ x -> plainify x
+ let uuid = case epubIdentifier metadata of
+ (x:_) -> identifierText x -- use first identifier as UUID
+ [] -> error "epubIdentifier is null" -- shouldn't happen
currentTime <- getCurrentTime
- let plainDate = maybe (showDateTimeISO8601 currentTime) id
- $ normalizeDate $ stringify $ docDate meta
- let contentsData = fromStringLazy $ ppTopElement $
+ let contentsData = UTF8.fromStringLazy $ ppTopElement $
unode "package" ! [("version", case version of
EPUB2 -> "2.0"
EPUB3 -> "3.0")
,("xmlns","http://www.idpf.org/2007/opf")
- ,("unique-identifier","BookId")] $
- [ metadataElement version userNodes
- uuid lang plainTitle plainAuthors plainDate currentTime mbCoverImage
+ ,("unique-identifier","epub-id-1")] $
+ [ metadataElement version metadata currentTime
, unode "manifest" $
[ unode "item" ! [("id","ncx"), ("href","toc.ncx")
,("media-type","application/x-dtbncx+xml")] $ ()
@@ -248,10 +472,15 @@ writeEPUB opts doc@(Pandoc meta _) = do
[("properties","nav") | epub3 ]) $ ()
] ++
map chapterNode (cpgEntry ++ (tpEntry : chapterEntries)) ++
- map pictureNode (cpicEntry ++ picEntries) ++
+ (case cpicEntry of
+ [] -> []
+ (x:_) -> [add_attrs
+ [Attr (unqual "properties") "cover-image" | epub3]
+ (pictureNode x)]) ++
+ map pictureNode picEntries ++
map fontNode fontEntries
, unode "spine" ! [("toc","ncx")] $
- case mbCoverImage of
+ case epubCoverImage metadata of
Nothing -> []
Just _ -> [ unode "itemref" !
[("idref", "cover"),("linear","no")] $ () ]
@@ -266,10 +495,11 @@ writeEPUB opts doc@(Pandoc meta _) = do
map chapterRefNode chapterEntries)
, unode "guide" $
[ unode "reference" !
- [("type","toc"),("title",plainTitle),("href","nav.xhtml")] $ ()
+ [("type","toc"),("title",plainTitle),
+ ("href","nav.xhtml")] $ ()
] ++
[ unode "reference" !
- [("type","cover"),("title","Cover"),("href","cover.xhtml")] $ () | mbCoverImage /= Nothing
+ [("type","cover"),("title","Cover"),("href","cover.xhtml")] $ () | epubCoverImage metadata /= Nothing
]
]
let contentsEntry = mkEntry "content.opf" contentsData
@@ -312,7 +542,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
[ unode "navLabel" $ unode "text" (plainify $ docTitle meta)
, unode "content" ! [("src","title_page.xhtml")] $ () ]
- let tocData = fromStringLazy $ ppTopElement $
+ let tocData = UTF8.fromStringLazy $ ppTopElement $
unode "ncx" ! [("version","2005-1")
,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $
[ unode "head" $
@@ -324,7 +554,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
,("content", "0")] $ ()
, unode "meta" ! [("name","dtb:maxPageNumber")
,("content", "0")] $ ()
- ] ++ case mbCoverImage of
+ ] ++ case epubCoverImage metadata of
Nothing -> []
Just _ -> [unode "meta" ! [("name","cover"),
("content","cover-image")] $ ()]
@@ -344,7 +574,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
(_:_) -> [unode "ol" ! [("class","toc")] $ subs]
let navtag = if epub3 then "nav" else "div"
- let navData = fromStringLazy $ ppTopElement $
+ let navData = UTF8.fromStringLazy $ ppTopElement $
unode "html" ! [("xmlns","http://www.w3.org/1999/xhtml")
,("xmlns:epub","http://www.idpf.org/2007/ops")] $
[ unode "head" $
@@ -358,10 +588,10 @@ writeEPUB opts doc@(Pandoc meta _) = do
let navEntry = mkEntry "nav.xhtml" navData
-- mimetype
- let mimetypeEntry = mkEntry "mimetype" $ fromStringLazy "application/epub+zip"
+ let mimetypeEntry = mkEntry "mimetype" $ UTF8.fromStringLazy "application/epub+zip"
-- container.xml
- let containerData = fromStringLazy $ ppTopElement $
+ let containerData = UTF8.fromStringLazy $ ppTopElement $
unode "container" ! [("version","1.0")
,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $
unode "rootfiles" $
@@ -370,18 +600,19 @@ writeEPUB opts doc@(Pandoc meta _) = do
let containerEntry = mkEntry "META-INF/container.xml" containerData
-- com.apple.ibooks.display-options.xml
- let apple = fromStringLazy $ ppTopElement $
+ let apple = UTF8.fromStringLazy $ ppTopElement $
unode "display_options" $
unode "platform" ! [("name","*")] $
unode "option" ! [("name","specified-fonts")] $ "true"
let appleEntry = mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple
-- stylesheet
- stylesheet <- case writerEpubStylesheet opts of
- Just s -> return s
- Nothing -> toString `fmap`
+ stylesheet <- case epubStylesheet metadata of
+ Just (StylesheetPath fp) -> UTF8.readFile fp
+ Just (StylesheetContents s) -> return s
+ Nothing -> UTF8.toString `fmap`
readDataFile (writerUserDataDir opts) "epub.css"
- let stylesheetEntry = mkEntry "stylesheet.css" $ fromStringLazy stylesheet
+ let stylesheetEntry = mkEntry "stylesheet.css" $ UTF8.fromStringLazy stylesheet
-- construct archive
let archive = foldr addEntryToArchive emptyArchive
@@ -390,51 +621,150 @@ writeEPUB opts doc@(Pandoc meta _) = do
(picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries ++ fontEntries))
return $ fromArchive archive
-metadataElement :: EPUBVersion -> [Element] -> String -> String -> String -> [String]
- -> String -> UTCTime -> Maybe a -> Element
-metadataElement version userNodes uuid lang title authors date currentTime mbCoverImage =
- let elt = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/")
- ,("xmlns:opf","http://www.idpf.org/2007/opf")] $
- filter isMetadataElement userNodes
- dublinElements = ["contributor","coverage","creator","date",
- "description","format","identifier","language","publisher",
- "relation","rights","source","subject","title","type"]
- isMetadataElement e = (qPrefix (elName e) == Just "dc" &&
- qName (elName e) `elem` dublinElements) ||
- (qPrefix (elName e) == Nothing &&
- qName (elName e) `elem` ["link","meta"])
- contains e n = not (null (findElements (QName n Nothing (Just "dc")) e))
- newNodes = [ unode "dc:title" title | not (elt `contains` "title") ] ++
- [ unode "dc:language" lang | not (elt `contains` "language") ] ++
- [ unode "dc:identifier" ! [("id","BookId")] $ uuid |
- not (elt `contains` "identifier") ] ++
- [ unode "dc:creator" ! [("opf:role","aut") | version == EPUB2]
- $ a | a <- authors, not (elt `contains` "creator") ] ++
- [ unode "dc:date" date | not (elt `contains` "date") ] ++
- [ unode "meta" ! [("property", "dcterms:modified")] $
- (showDateTimeISO8601 currentTime) | version == EPUB3] ++
- [ unode "meta" ! [("name","cover"), ("content","cover-image")] $ () |
- not (isNothing mbCoverImage) ]
- in elt{ elContent = elContent elt ++ map Elem newNodes }
+metadataElement :: EPUBVersion -> EPUBMetadata -> UTCTime -> Element
+metadataElement version md currentTime =
+ unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/")
+ ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ mdNodes
+ where mdNodes = identifierNodes ++ titleNodes ++ dateNodes ++ languageNodes
+ ++ creatorNodes ++ contributorNodes ++ subjectNodes
+ ++ descriptionNodes ++ typeNodes ++ formatNodes
+ ++ publisherNodes ++ sourceNodes ++ relationNodes
+ ++ coverageNodes ++ rightsNodes ++ coverImageNodes
+ ++ modifiedNodes
+ withIds base f = concat . zipWith f (map (\x -> base ++ ('-' : show x))
+ ([1..] :: [Int]))
+ identifierNodes = withIds "epub-id" toIdentifierNode $
+ epubIdentifier md
+ titleNodes = withIds "epub-title" toTitleNode $ epubTitle md
+ dateNodes = dcTag' "date" $ epubDate md
+ languageNodes = [dcTag "language" $ epubLanguage md]
+ creatorNodes = withIds "epub-creator" (toCreatorNode "creator") $
+ epubCreator md
+ contributorNodes = withIds "epub-contributor"
+ (toCreatorNode "contributor") $ epubContributor md
+ subjectNodes = map (dcTag "subject") $ epubSubject md
+ descriptionNodes = maybe [] (dcTag' "description") $ epubDescription md
+ typeNodes = maybe [] (dcTag' "type") $ epubType md
+ formatNodes = maybe [] (dcTag' "format") $ epubFormat md
+ publisherNodes = maybe [] (dcTag' "publisher") $ epubPublisher md
+ sourceNodes = maybe [] (dcTag' "source") $ epubSource md
+ relationNodes = maybe [] (dcTag' "relation") $ epubRelation md
+ coverageNodes = maybe [] (dcTag' "coverage") $ epubCoverage md
+ rightsNodes = maybe [] (dcTag' "rights") $ epubRights md
+ coverImageNodes = maybe []
+ (const $ [unode "meta" ! [("name","cover"),
+ ("content","cover-image")] $ ()])
+ $ epubCoverImage md
+ modifiedNodes = [ unode "meta" ! [("property", "dcterms:modified")] $
+ (showDateTimeISO8601 currentTime) | version == EPUB3 ]
+ dcTag n s = unode ("dc:" ++ n) s
+ dcTag' n s = [dcTag n s]
+ toIdentifierNode id' (Identifier txt scheme)
+ | version == EPUB2 = [dcNode "identifier" !
+ ([("id",id')] ++ maybe [] (\x -> [("opf:scheme", x)]) scheme) $
+ txt]
+ | otherwise = [dcNode "identifier" ! [("id",id')] $ txt] ++
+ maybe [] (\x -> [unode "meta" !
+ [("refines",'#':id'),("property","identifier-type"),
+ ("scheme","onix:codelist5")] $ x])
+ (schemeToOnix `fmap` scheme)
+ toCreatorNode s id' creator
+ | version == EPUB2 = [dcNode s !
+ ([("id",id')] ++
+ maybe [] (\x -> [("opf:file-as",x)]) (creatorFileAs creator) ++
+ maybe [] (\x -> [("opf:role",x)])
+ (creatorRole creator >>= toRelator)) $ creatorText creator]
+ | otherwise = [dcNode s ! [("id",id')] $ creatorText creator] ++
+ maybe [] (\x -> [unode "meta" !
+ [("refines",'#':id'),("property","file-as")] $ x])
+ (creatorFileAs creator) ++
+ maybe [] (\x -> [unode "meta" !
+ [("refines",'#':id'),("property","role"),
+ ("scheme","marc:relators")] $ x])
+ (creatorRole creator >>= toRelator)
+ toTitleNode id' title
+ | version == EPUB2 = [dcNode "title" !
+ ([("id",id')] ++
+ maybe [] (\x -> [("opf:file-as",x)]) (titleFileAs title) ++
+ maybe [] (\x -> [("opf:title-type",x)]) (titleType title)) $
+ titleText title]
+ | otherwise = [dcNode "title" ! [("id",id')] $ titleText title]
+ ++
+ maybe [] (\x -> [unode "meta" !
+ [("refines",'#':id'),("property","file-as")] $ x])
+ (titleFileAs title) ++
+ maybe [] (\x -> [unode "meta" !
+ [("refines",'#':id'),("property","title-type")] $ x])
+ (titleType title)
+ schemeToOnix "ISBN-10" = "02"
+ schemeToOnix "GTIN-13" = "03"
+ schemeToOnix "UPC" = "04"
+ schemeToOnix "ISMN-10" = "05"
+ schemeToOnix "DOI" = "06"
+ schemeToOnix "LCCN" = "13"
+ schemeToOnix "GTIN-14" = "14"
+ schemeToOnix "ISBN-13" = "15"
+ schemeToOnix "Legal deposit number" = "17"
+ schemeToOnix "URN" = "22"
+ schemeToOnix "OCLC" = "23"
+ schemeToOnix "ISMN-13" = "25"
+ schemeToOnix "ISBN-A" = "26"
+ schemeToOnix "JP" = "27"
+ schemeToOnix "OLCC" = "28"
+ schemeToOnix _ = "01"
showDateTimeISO8601 :: UTCTime -> String
showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ"
+transformTag :: WriterOptions
+ -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media
+ -> Tag String
+ -> IO (Tag String)
+transformTag opts mediaRef tag@(TagOpen name attr)
+ | name == "video" || name == "source" || name == "img" = do
+ let src = fromAttrib "src" tag
+ let poster = fromAttrib "poster" tag
+ let oldsrc = maybe src (</> src) $ writerSourceURL opts
+ let oldposter = maybe poster (</> poster) $ writerSourceURL opts
+ newsrc <- modifyMediaRef mediaRef oldsrc
+ newposter <- modifyMediaRef mediaRef oldposter
+ let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++
+ [("src", newsrc) | not (null newsrc)] ++
+ [("poster", newposter) | not (null newposter)]
+ return $ TagOpen name attr'
+transformTag _ _ tag = return tag
+
+modifyMediaRef :: IORef [(FilePath, FilePath)] -> FilePath -> IO FilePath
+modifyMediaRef _ "" = return ""
+modifyMediaRef mediaRef oldsrc = do
+ media <- readIORef mediaRef
+ case lookup oldsrc media of
+ Just n -> return n
+ Nothing -> do
+ let new = "media/file" ++ show (length media) ++
+ takeExtension oldsrc
+ modifyIORef mediaRef ( (oldsrc, new): )
+ return new
+
+transformBlock :: WriterOptions
+ -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media
+ -> Block
+ -> IO Block
+transformBlock opts mediaRef (RawBlock fmt raw)
+ | fmt == Format "html" = do
+ let tags = parseTags raw
+ tags' <- mapM (transformTag opts mediaRef) tags
+ return $ RawBlock fmt (renderTags tags')
+transformBlock _ _ b = return b
+
transformInline :: WriterOptions
- -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) images
+ -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media
-> Inline
-> IO Inline
-transformInline opts picsRef (Image lab (src,tit)) = do
+transformInline opts mediaRef (Image lab (src,tit)) = do
let src' = unEscapeString src
- pics <- readIORef picsRef
let oldsrc = maybe src' (</> src) $ writerSourceURL opts
- let ext = takeExtension src'
- newsrc <- case lookup oldsrc pics of
- Just n -> return n
- Nothing -> do
- let new = "images/img" ++ show (length pics) ++ ext
- modifyIORef picsRef ( (oldsrc, new): )
- return new
+ newsrc <- modifyMediaRef mediaRef oldsrc
return $ Image lab (newsrc, tit)
transformInline opts _ (x@(Math _ _))
| WebTeX _ <- writerHTMLMathMethod opts = do
@@ -464,16 +794,12 @@ ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity .
Nothing -> '&':'#':unEntity xs
unEntity (x:xs) = x : unEntity xs
-imageTypeOf :: FilePath -> Maybe String
-imageTypeOf x = case drop 1 (map toLower (takeExtension x)) of
- "jpg" -> Just "image/jpeg"
- "jpeg" -> Just "image/jpeg"
- "jfif" -> Just "image/jpeg"
- "png" -> Just "image/png"
- "gif" -> Just "image/gif"
- "svg" -> Just "image/svg+xml"
- _ -> Nothing
-
+mediaTypeOf :: FilePath -> Maybe String
+mediaTypeOf x = case getMimeType x of
+ Just y@('i':'m':'a':'g':'e':_) -> Just y
+ Just y@('v':'i':'d':'e':'o':_) -> Just y
+ Just y@('a':'u':'d':'i':'o':_) -> Just y
+ _ -> Nothing
data IdentState = IdentState{
chapterNumber :: Int,
@@ -527,3 +853,287 @@ replaceRefs refTable = walk replaceOneRef
Just url -> Link lab (url,tit)
Nothing -> x
replaceOneRef x = x
+
+-- Variant of normalizeDate that allows partial dates: YYYY, YYYY-MM
+normalizeDate' :: String -> Maybe String
+normalizeDate' xs =
+ let xs' = trim xs in
+ case xs' of
+ [y1,y2,y3,y4] | all isDigit [y1,y2,y3,y4] -> Just xs' -- YYYY
+ [y1,y2,y3,y4,'-',m1,m2] | all isDigit [y1,y2,y3,y4,m1,m2] -- YYYY-MM
+ -> Just xs'
+ _ -> normalizeDate xs'
+
+toRelator :: String -> Maybe String
+toRelator x
+ | x `elem` relators = Just x
+ | otherwise = lookup (map toLower x) relatorMap
+
+relators :: [String]
+relators = map snd relatorMap
+
+relatorMap :: [(String, String)]
+relatorMap =
+ [("abridger", "abr")
+ ,("actor", "act")
+ ,("adapter", "adp")
+ ,("addressee", "rcp")
+ ,("analyst", "anl")
+ ,("animator", "anm")
+ ,("annotator", "ann")
+ ,("appellant", "apl")
+ ,("appellee", "ape")
+ ,("applicant", "app")
+ ,("architect", "arc")
+ ,("arranger", "arr")
+ ,("art copyist", "acp")
+ ,("art director", "adi")
+ ,("artist", "art")
+ ,("artistic director", "ard")
+ ,("assignee", "asg")
+ ,("associated name", "asn")
+ ,("attributed name", "att")
+ ,("auctioneer", "auc")
+ ,("author", "aut")
+ ,("author in quotations or text abstracts", "aqt")
+ ,("author of afterword, colophon, etc.", "aft")
+ ,("author of dialog", "aud")
+ ,("author of introduction, etc.", "aui")
+ ,("autographer", "ato")
+ ,("bibliographic antecedent", "ant")
+ ,("binder", "bnd")
+ ,("binding designer", "bdd")
+ ,("blurb writer", "blw")
+ ,("book designer", "bkd")
+ ,("book producer", "bkp")
+ ,("bookjacket designer", "bjd")
+ ,("bookplate designer", "bpd")
+ ,("bookseller", "bsl")
+ ,("braille embosser", "brl")
+ ,("broadcaster", "brd")
+ ,("calligrapher", "cll")
+ ,("cartographer", "ctg")
+ ,("caster", "cas")
+ ,("censor", "cns")
+ ,("choreographer", "chr")
+ ,("cinematographer", "cng")
+ ,("client", "cli")
+ ,("collection registrar", "cor")
+ ,("collector", "col")
+ ,("collotyper", "clt")
+ ,("colorist", "clr")
+ ,("commentator", "cmm")
+ ,("commentator for written text", "cwt")
+ ,("compiler", "com")
+ ,("complainant", "cpl")
+ ,("complainant-appellant", "cpt")
+ ,("complainant-appellee", "cpe")
+ ,("composer", "cmp")
+ ,("compositor", "cmt")
+ ,("conceptor", "ccp")
+ ,("conductor", "cnd")
+ ,("conservator", "con")
+ ,("consultant", "csl")
+ ,("consultant to a project", "csp")
+ ,("contestant", "cos")
+ ,("contestant-appellant", "cot")
+ ,("contestant-appellee", "coe")
+ ,("contestee", "cts")
+ ,("contestee-appellant", "ctt")
+ ,("contestee-appellee", "cte")
+ ,("contractor", "ctr")
+ ,("contributor", "ctb")
+ ,("copyright claimant", "cpc")
+ ,("copyright holder", "cph")
+ ,("corrector", "crr")
+ ,("correspondent", "crp")
+ ,("costume designer", "cst")
+ ,("court governed", "cou")
+ ,("court reporter", "crt")
+ ,("cover designer", "cov")
+ ,("creator", "cre")
+ ,("curator", "cur")
+ ,("dancer", "dnc")
+ ,("data contributor", "dtc")
+ ,("data manager", "dtm")
+ ,("dedicatee", "dte")
+ ,("dedicator", "dto")
+ ,("defendant", "dfd")
+ ,("defendant-appellant", "dft")
+ ,("defendant-appellee", "dfe")
+ ,("degree granting institution", "dgg")
+ ,("delineator", "dln")
+ ,("depicted", "dpc")
+ ,("depositor", "dpt")
+ ,("designer", "dsr")
+ ,("director", "drt")
+ ,("dissertant", "dis")
+ ,("distribution place", "dbp")
+ ,("distributor", "dst")
+ ,("donor", "dnr")
+ ,("draftsman", "drm")
+ ,("dubious author", "dub")
+ ,("editor", "edt")
+ ,("editor of compilation", "edc")
+ ,("editor of moving image work", "edm")
+ ,("electrician", "elg")
+ ,("electrotyper", "elt")
+ ,("enacting jurisdiction", "enj")
+ ,("engineer", "eng")
+ ,("engraver", "egr")
+ ,("etcher", "etr")
+ ,("event place", "evp")
+ ,("expert", "exp")
+ ,("facsimilist", "fac")
+ ,("field director", "fld")
+ ,("film director", "fmd")
+ ,("film distributor", "fds")
+ ,("film editor", "flm")
+ ,("film producer", "fmp")
+ ,("filmmaker", "fmk")
+ ,("first party", "fpy")
+ ,("forger", "frg")
+ ,("former owner", "fmo")
+ ,("funder", "fnd")
+ ,("geographic information specialist", "gis")
+ ,("honoree", "hnr")
+ ,("host", "hst")
+ ,("host institution", "his")
+ ,("illuminator", "ilu")
+ ,("illustrator", "ill")
+ ,("inscriber", "ins")
+ ,("instrumentalist", "itr")
+ ,("interviewee", "ive")
+ ,("interviewer", "ivr")
+ ,("inventor", "inv")
+ ,("issuing body", "isb")
+ ,("judge", "jud")
+ ,("jurisdiction governed", "jug")
+ ,("laboratory", "lbr")
+ ,("laboratory director", "ldr")
+ ,("landscape architect", "lsa")
+ ,("lead", "led")
+ ,("lender", "len")
+ ,("libelant", "lil")
+ ,("libelant-appellant", "lit")
+ ,("libelant-appellee", "lie")
+ ,("libelee", "lel")
+ ,("libelee-appellant", "let")
+ ,("libelee-appellee", "lee")
+ ,("librettist", "lbt")
+ ,("licensee", "lse")
+ ,("licensor", "lso")
+ ,("lighting designer", "lgd")
+ ,("lithographer", "ltg")
+ ,("lyricist", "lyr")
+ ,("manufacture place", "mfp")
+ ,("manufacturer", "mfr")
+ ,("marbler", "mrb")
+ ,("markup editor", "mrk")
+ ,("metadata contact", "mdc")
+ ,("metal-engraver", "mte")
+ ,("moderator", "mod")
+ ,("monitor", "mon")
+ ,("music copyist", "mcp")
+ ,("musical director", "msd")
+ ,("musician", "mus")
+ ,("narrator", "nrt")
+ ,("onscreen presenter", "osp")
+ ,("opponent", "opn")
+ ,("organizer of meeting", "orm")
+ ,("originator", "org")
+ ,("other", "oth")
+ ,("owner", "own")
+ ,("panelist", "pan")
+ ,("papermaker", "ppm")
+ ,("patent applicant", "pta")
+ ,("patent holder", "pth")
+ ,("patron", "pat")
+ ,("performer", "prf")
+ ,("permitting agency", "pma")
+ ,("photographer", "pht")
+ ,("plaintiff", "ptf")
+ ,("plaintiff-appellant", "ptt")
+ ,("plaintiff-appellee", "pte")
+ ,("platemaker", "plt")
+ ,("praeses", "pra")
+ ,("presenter", "pre")
+ ,("printer", "prt")
+ ,("printer of plates", "pop")
+ ,("printmaker", "prm")
+ ,("process contact", "prc")
+ ,("producer", "pro")
+ ,("production company", "prn")
+ ,("production designer", "prs")
+ ,("production manager", "pmn")
+ ,("production personnel", "prd")
+ ,("production place", "prp")
+ ,("programmer", "prg")
+ ,("project director", "pdr")
+ ,("proofreader", "pfr")
+ ,("provider", "prv")
+ ,("publication place", "pup")
+ ,("publisher", "pbl")
+ ,("publishing director", "pbd")
+ ,("puppeteer", "ppt")
+ ,("radio director", "rdd")
+ ,("radio producer", "rpc")
+ ,("recording engineer", "rce")
+ ,("recordist", "rcd")
+ ,("redaktor", "red")
+ ,("renderer", "ren")
+ ,("reporter", "rpt")
+ ,("repository", "rps")
+ ,("research team head", "rth")
+ ,("research team member", "rtm")
+ ,("researcher", "res")
+ ,("respondent", "rsp")
+ ,("respondent-appellant", "rst")
+ ,("respondent-appellee", "rse")
+ ,("responsible party", "rpy")
+ ,("restager", "rsg")
+ ,("restorationist", "rsr")
+ ,("reviewer", "rev")
+ ,("rubricator", "rbr")
+ ,("scenarist", "sce")
+ ,("scientific advisor", "sad")
+ ,("screenwriter", "aus")
+ ,("scribe", "scr")
+ ,("sculptor", "scl")
+ ,("second party", "spy")
+ ,("secretary", "sec")
+ ,("seller", "sll")
+ ,("set designer", "std")
+ ,("setting", "stg")
+ ,("signer", "sgn")
+ ,("singer", "sng")
+ ,("sound designer", "sds")
+ ,("speaker", "spk")
+ ,("sponsor", "spn")
+ ,("stage director", "sgd")
+ ,("stage manager", "stm")
+ ,("standards body", "stn")
+ ,("stereotyper", "str")
+ ,("storyteller", "stl")
+ ,("supporting host", "sht")
+ ,("surveyor", "srv")
+ ,("teacher", "tch")
+ ,("technical director", "tcd")
+ ,("television director", "tld")
+ ,("television producer", "tlp")
+ ,("thesis advisor", "ths")
+ ,("transcriber", "trc")
+ ,("translator", "trl")
+ ,("type designer", "tyd")
+ ,("typographer", "tyg")
+ ,("university place", "uvp")
+ ,("videographer", "vdg")
+ ,("witness", "wit")
+ ,("wood engraver", "wde")
+ ,("woodcutter", "wdc")
+ ,("writer of accompanying material", "wam")
+ ,("writer of added commentary", "wac")
+ ,("writer of added lyrics", "wal")
+ ,("writer of added text", "wat")
+ ]
+
diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs
index adbe948be..803617f95 100644
--- a/src/Text/Pandoc/Writers/FB2.hs
+++ b/src/Text/Pandoc/Writers/FB2.hs
@@ -44,7 +44,7 @@ import qualified Text.XML.Light.Cursor as XC
import Text.Pandoc.Definition
import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..), def)
-import Text.Pandoc.Shared (orderedListMarkers)
+import Text.Pandoc.Shared (orderedListMarkers, isHeaderBlock)
import Text.Pandoc.Walk
-- | Data to be written at the end of the document:
@@ -157,9 +157,7 @@ renderSection level (ttl, body) = do
else cMapM blockToXml body
return $ el "section" (title ++ content)
where
- hasSubsections = any isHeader
- isHeader (Header _ _ _) = True
- isHeader _ = False
+ hasSubsections = any isHeaderBlock
-- | Only <p> and <empty-line> are allowed within <title> in FB2.
formatTitle :: [Inline] -> [Content]
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 641652276..e0385af25 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -45,7 +45,7 @@ import Numeric ( showHex )
import Data.Char ( ord, toLower )
import Data.List ( isPrefixOf, intersperse )
import Data.String ( fromString )
-import Data.Maybe ( catMaybes )
+import Data.Maybe ( catMaybes, fromMaybe )
import Control.Monad.State
import Text.Blaze.Html hiding(contents)
import Text.Blaze.Internal(preEscapedString)
@@ -118,7 +118,7 @@ pandocToHtml opts (Pandoc meta blocks) = do
let stringifyHTML = escapeStringForXML . stringify
let authsMeta = map stringifyHTML $ docAuthors meta
let dateMeta = stringifyHTML $ docDate meta
- let slideLevel = maybe (getSlideLevel blocks) id $ writerSlideLevel opts
+ let slideLevel = fromMaybe (getSlideLevel blocks) $ writerSlideLevel opts
let sects = hierarchicalize $
if writerSlideVariant opts == NoSlides
then blocks
@@ -475,28 +475,22 @@ blockToHtml opts (BlockQuote blocks) =
else do
contents <- blockListToHtml opts blocks
return $ H.blockquote $ nl opts >> contents >> nl opts
-blockToHtml opts (Header level (ident,_,_) lst) = do
+blockToHtml opts (Header level (_,classes,_) lst) = do
contents <- inlineListToHtml opts lst
secnum <- liftM stSecNum get
let contents' = if writerNumberSections opts && not (null secnum)
+ && "unnumbered" `notElem` classes
then (H.span ! A.class_ "header-section-number" $ toHtml
$ showSecNum secnum) >> strToHtml " " >> contents
else contents
- let revealSlash = ['/' | writerSlideVariant opts == RevealJsSlides]
- let contents'' = if writerTableOfContents opts && not (null ident)
- then H.a ! A.href (toValue $
- '#' : revealSlash ++
- writerIdentifierPrefix opts ++
- ident) $ contents'
- else contents'
return $ case level of
- 1 -> H.h1 contents''
- 2 -> H.h2 contents''
- 3 -> H.h3 contents''
- 4 -> H.h4 contents''
- 5 -> H.h5 contents''
- 6 -> H.h6 contents''
- _ -> H.p contents''
+ 1 -> H.h1 contents'
+ 2 -> H.h2 contents'
+ 3 -> H.h3 contents'
+ 4 -> H.h4 contents'
+ 5 -> H.h5 contents'
+ 6 -> H.h6 contents'
+ _ -> H.p contents'
blockToHtml opts (BulletList lst) = do
contents <- mapM (blockListToHtml opts) lst
return $ unordList opts contents
@@ -524,7 +518,7 @@ blockToHtml opts (DefinitionList lst) = do
contents <- mapM (\(term, defs) ->
do term' <- if null term
then return mempty
- else liftM (H.dt) $ inlineListToHtml opts term
+ else liftM H.dt $ inlineListToHtml opts term
defs' <- mapM ((liftM (\x -> H.dd $ (x >> nl opts))) .
blockListToHtml opts) defs
return $ mconcat $ nl opts : term' : nl opts :
@@ -539,11 +533,16 @@ blockToHtml opts (Table capt aligns widths headers rows') = do
let percent w = show (truncate (100*w) :: Integer) ++ "%"
let coltags = if all (== 0.0) widths
then mempty
- else mconcat $ map (\w ->
- if writerHtml5 opts
- then H.col ! A.style (toValue $ "width: " ++ percent w)
- else H.col ! A.width (toValue $ percent w) >> nl opts)
- widths
+ else do
+ H.colgroup $ do
+ nl opts
+ mapM_ (\w -> do
+ if writerHtml5 opts
+ then H.col ! A.style (toValue $ "width: " ++
+ percent w)
+ else H.col ! A.width (toValue $ percent w)
+ nl opts) widths
+ nl opts
head' <- if all null headers
then return mempty
else do
@@ -756,7 +755,9 @@ inlineToHtml opts inline =
else [A.title $ toValue tit])
return $ foldl (!) H5.embed attributes
-- note: null title included, as in Markdown.pl
- (Note contents) -> do
+ (Note contents)
+ | writerIgnoreNotes opts -> return mempty
+ | otherwise -> do
st <- get
let notes = stNotes st
let number = (length notes) + 1
diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs
new file mode 100644
index 000000000..19d486b25
--- /dev/null
+++ b/src/Text/Pandoc/Writers/ICML.hs
@@ -0,0 +1,525 @@
+{-# LANGUAGE OverloadedStrings #-}
+
+{- |
+ Module : Text.Pandoc.Writers.ICML
+ Copyright : Copyright (C) 2013 github.com/mb21
+ License : GNU GPL, version 2 or above
+
+ Stability : alpha
+
+Conversion of 'Pandoc' documents to Adobe InCopy ICML, a stand-alone XML format
+which is a subset of the zipped IDML format for which the documentation is
+available here: http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/indesign/sdk/cs6/idml/idml-specification.pdf
+InCopy is the companion word-processor to Adobe InDesign and ICML documents can be integrated
+into InDesign with File -> Place.
+-}
+module Text.Pandoc.Writers.ICML (writeICML) where
+import Text.Pandoc.Definition
+import Text.Pandoc.XML
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Shared (splitBy)
+import Text.Pandoc.Options
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.Pretty
+import Data.List (isPrefixOf, isInfixOf, stripPrefix)
+import Data.Text as Text (breakOnAll, pack)
+import Data.Monoid (mappend)
+import Control.Monad.State
+import qualified Data.Set as Set
+
+type Style = [String]
+type Hyperlink = [(Int, String)]
+
+data WriterState = WriterState{
+ blockStyles :: Set.Set String
+ , inlineStyles :: Set.Set String
+ , links :: Hyperlink
+ , listDepth :: Int
+ , maxListDepth :: Int
+ }
+
+type WS a = State WriterState a
+
+defaultWriterState :: WriterState
+defaultWriterState = WriterState{
+ blockStyles = Set.empty
+ , inlineStyles = Set.empty
+ , links = []
+ , listDepth = 1
+ , maxListDepth = 0
+ }
+
+-- inline names (appear in InDesign's character styles pane)
+emphName :: String
+strongName :: String
+strikeoutName :: String
+superscriptName :: String
+subscriptName :: String
+smallCapsName :: String
+codeName :: String
+linkName :: String
+emphName = "Italic"
+strongName = "Bold"
+strikeoutName = "Strikeout"
+superscriptName = "Superscript"
+subscriptName = "Subscript"
+smallCapsName = "SmallCaps"
+codeName = "Code"
+linkName = "Link"
+
+-- block element names (appear in InDesign's paragraph styles pane)
+paragraphName :: String
+codeBlockName :: String
+rawBlockName :: String
+blockQuoteName :: String
+orderedListName :: String
+bulletListName :: String
+defListTermName :: String
+defListDefName :: String
+headerName :: String
+tableName :: String
+tableHeaderName :: String
+tableCaptionName :: String
+alignLeftName :: String
+alignRightName :: String
+alignCenterName :: String
+firstListItemName :: String
+beginsWithName :: String
+lowerRomanName :: String
+upperRomanName :: String
+lowerAlphaName :: String
+upperAlphaName :: String
+subListParName :: String
+footnoteName :: String
+paragraphName = "Paragraph"
+codeBlockName = "CodeBlock"
+rawBlockName = "Rawblock"
+blockQuoteName = "Blockquote"
+orderedListName = "NumList"
+bulletListName = "BulList"
+defListTermName = "DefListTerm"
+defListDefName = "DefListDef"
+headerName = "Header"
+tableName = "TablePar"
+tableHeaderName = "TableHeader"
+tableCaptionName = "TableCaption"
+alignLeftName = "LeftAlign"
+alignRightName = "RightAlign"
+alignCenterName = "CenterAlign"
+firstListItemName = "first"
+beginsWithName = "beginsWith-"
+lowerRomanName = "lowerRoman"
+upperRomanName = "upperRoman"
+lowerAlphaName = "lowerAlpha"
+upperAlphaName = "upperAlpha"
+subListParName = "subParagraph"
+footnoteName = "Footnote"
+
+
+-- | Convert Pandoc document to string in ICML format.
+writeICML :: WriterOptions -> Pandoc -> String
+writeICML opts (Pandoc meta blocks) =
+ let colwidth = if writerWrapText opts
+ then Just $ writerColumns opts
+ else Nothing
+ render' = render colwidth
+ renderMeta f s = Just $ render' $ fst $ runState (f opts [] s) defaultWriterState
+ Just metadata = metaToJSON opts
+ (renderMeta blocksToICML)
+ (renderMeta inlinesToICML)
+ meta
+ (doc, st) = runState (blocksToICML opts [] blocks) defaultWriterState
+ main = render' doc
+ context = defField "body" main
+ $ defField "charStyles" (render' $ charStylesToDoc st)
+ $ defField "parStyles" (render' $ parStylesToDoc st)
+ $ defField "hyperlinks" (render' $ hyperlinksToDoc $ links st)
+ $ metadata
+ in if writerStandalone opts
+ then renderTemplate' (writerTemplate opts) context
+ else main
+
+-- | Auxilary functions for parStylesToDoc and charStylesToDoc.
+contains :: String -> (String, (String, String)) -> [(String, String)]
+contains s rule =
+ if isInfixOf (fst rule) s
+ then [snd rule]
+ else []
+
+-- | The monospaced font to use as default.
+monospacedFont :: Doc
+monospacedFont = inTags False "AppliedFont" [("type", "string")] $ text "Courier New"
+
+-- | How much to indent blockquotes etc.
+defaultIndent :: Int
+defaultIndent = 20
+
+-- | How much to indent numbered lists before the number.
+defaultListIndent :: Int
+defaultListIndent = 10
+
+-- other constants
+lineSeparator :: String
+lineSeparator = "&#x2028;"
+
+-- | Convert a WriterState with its block styles to the ICML listing of Paragraph Styles.
+parStylesToDoc :: WriterState -> Doc
+parStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ blockStyles st
+ where
+ makeStyle s =
+ let countSubStrs sub str = length $ Text.breakOnAll (Text.pack sub) (Text.pack str)
+ attrs = concat $ map (contains s) $ [
+ (defListTermName, ("BulletsAndNumberingListType", "BulletList"))
+ , (defListTermName, ("FontStyle", "Bold"))
+ , (tableHeaderName, ("FontStyle", "Bold"))
+ , (alignLeftName, ("Justification", "LeftAlign"))
+ , (alignRightName, ("Justification", "RightAlign"))
+ , (alignCenterName, ("Justification", "CenterAlign"))
+ , (headerName++"1", ("PointSize", "36"))
+ , (headerName++"2", ("PointSize", "30"))
+ , (headerName++"3", ("PointSize", "24"))
+ , (headerName++"4", ("PointSize", "18"))
+ , (headerName++"5", ("PointSize", "14"))
+ ]
+ -- what is the most nested list type, if any?
+ (isBulletList, isOrderedList) = findList $ reverse $ splitBy (==' ') s
+ where
+ findList [] = (False, False)
+ findList (x:xs) | x == bulletListName = (True, False)
+ | x == orderedListName = (False, True)
+ | otherwise = findList xs
+ nBuls = countSubStrs bulletListName s
+ nOrds = countSubStrs orderedListName s
+ attrs' = numbering ++ listType ++ indent ++ attrs
+ where
+ numbering | isOrderedList = [("NumberingExpression", "^#.^t"), ("NumberingLevel", show nOrds)]
+ | otherwise = []
+ listType | isOrderedList && (not $ isInfixOf subListParName s)
+ = [("BulletsAndNumberingListType", "NumberedList")]
+ | isBulletList && (not $ isInfixOf subListParName s)
+ = [("BulletsAndNumberingListType", "BulletList")]
+ | otherwise = []
+ indent = [("LeftIndent", show indt)]
+ where
+ nBlockQuotes = countSubStrs blockQuoteName s
+ nDefLists = countSubStrs defListDefName s
+ indt = max 0 $ defaultListIndent*(nBuls + nOrds - 1) + defaultIndent*(nBlockQuotes + nDefLists)
+ props = inTags True "Properties" [] $ (basedOn $$ tabList $$ numbForm)
+ where
+ font = if isInfixOf codeBlockName s
+ then monospacedFont
+ else empty
+ basedOn = inTags False "BasedOn" [("type", "object")] (text "$ID/NormalParagraphStyle") $$ font
+ tabList = if isBulletList
+ then inTags True "TabList" [("type","list")] $ inTags True "ListItem" [("type","record")]
+ $ vcat [
+ inTags False "Alignment" [("type","enumeration")] $ text "LeftAlign"
+ , inTags False "AlignmentCharacter" [("type","string")] $ text "."
+ , selfClosingTag "Leader" [("type","string")]
+ , inTags False "Position" [("type","unit")] $ text
+ $ show $ defaultListIndent * (nBuls + nOrds)
+ ]
+ else empty
+ makeNumb name = inTags False "NumberingFormat" [("type", "string")] (text name)
+ numbForm | isInfixOf lowerRomanName s = makeNumb "i, ii, iii, iv..."
+ | isInfixOf upperRomanName s = makeNumb "I, II, III, IV..."
+ | isInfixOf lowerAlphaName s = makeNumb "a, b, c, d..."
+ | isInfixOf upperAlphaName s = makeNumb "A, B, C, D..."
+ | otherwise = empty
+ in inTags True "ParagraphStyle" ([("Self", "ParagraphStyle/"++s), ("Name", s)] ++ attrs') props
+
+-- | Convert a WriterState with its inline styles to the ICML listing of Character Styles.
+charStylesToDoc :: WriterState -> Doc
+charStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ inlineStyles st
+ where
+ makeStyle s =
+ let attrs = concat $ map (contains s) [
+ (strikeoutName, ("StrikeThru", "true"))
+ , (superscriptName, ("Position", "Superscript"))
+ , (subscriptName, ("Position", "Subscript"))
+ , (smallCapsName, ("Capitalization", "SmallCaps"))
+ ]
+ attrs' | isInfixOf emphName s && isInfixOf strongName s = ("FontStyle", "Bold Italic") : attrs
+ | isInfixOf strongName s = ("FontStyle", "Bold") : attrs
+ | isInfixOf emphName s = ("FontStyle", "Italic") : attrs
+ | otherwise = attrs
+ props = inTags True "Properties" [] $
+ inTags False "BasedOn" [("type", "object")] (text "$ID/NormalCharacterStyle") $$ font
+ where
+ font =
+ if isInfixOf codeName s
+ then monospacedFont
+ else empty
+ in inTags True "CharacterStyle" ([("Self", "CharacterStyle/"++s), ("Name", s)] ++ attrs') props
+
+-- | Convert a list of (identifier, url) pairs to the ICML listing of hyperlinks.
+hyperlinksToDoc :: Hyperlink -> Doc
+hyperlinksToDoc [] = empty
+hyperlinksToDoc (x:xs) = hyp x $$ hyperlinksToDoc xs
+ where
+ hyp (ident, url) = hdest $$ hlink
+ where
+ hdest = selfClosingTag "HyperlinkURLDestination"
+ [("Self", "HyperlinkURLDestination/"++url), ("Name","link"), ("DestinationURL",url), ("DestinationUniqueKey","1")]
+ hlink = inTags True "Hyperlink" [("Self","uf-"++show ident), ("Name",url),
+ ("Source","htss-"++show ident), ("Visible","true"), ("DestinationUniqueKey","1")]
+ $ inTags True "Properties" []
+ $ inTags False "BorderColor" [("type","enumeration")] (text "Black")
+ $$ (inTags False "Destination" [("type","object")]
+ $ text $ "HyperlinkURLDestination/"++(escapeStringForXML url))
+
+
+-- | Convert a list of Pandoc blocks to ICML.
+blocksToICML :: WriterOptions -> Style -> [Block] -> WS Doc
+blocksToICML opts style lst = vcat `fmap` mapM (blockToICML opts style) lst
+
+-- | Convert a Pandoc block element to ICML.
+blockToICML :: WriterOptions -> Style -> Block -> WS Doc
+blockToICML opts style (Plain lst) = parStyle opts style lst
+blockToICML opts style (Para lst) = parStyle opts (paragraphName:style) lst
+blockToICML opts style (CodeBlock _ str) = parStyle opts (codeBlockName:style) $ [Str str]
+blockToICML opts style (RawBlock _ str) = parStyle opts (rawBlockName:style) $ [Str str]
+blockToICML opts style (BlockQuote blocks) = blocksToICML opts (blockQuoteName:style) blocks
+blockToICML opts style (OrderedList attribs lst) = listItemsToICML opts orderedListName style (Just attribs) lst
+blockToICML opts style (BulletList lst) = listItemsToICML opts bulletListName style Nothing lst
+blockToICML opts style (DefinitionList lst) = vcat `fmap` mapM (definitionListItemToICML opts style) lst
+blockToICML opts style (Header lvl _ lst) =
+ let stl = (headerName ++ show lvl):style
+ in parStyle opts stl lst
+blockToICML _ _ HorizontalRule = return empty -- we could insert a page break instead
+blockToICML opts style (Table caption aligns widths headers rows) =
+ let style' = tableName : style
+ noHeader = all null headers
+ nrHeaders = if noHeader
+ then "0"
+ else "1"
+ nrRows = length rows
+ nrCols = if null rows
+ then 0
+ else length $ head rows
+ rowsToICML [] _ = return empty
+ rowsToICML (col:rest) rowNr =
+ liftM2 ($$) (colsToICML col rowNr (0::Int)) $ rowsToICML rest (rowNr+1)
+ colsToICML [] _ _ = return empty
+ colsToICML (cell:rest) rowNr colNr = do
+ let stl = if rowNr == 0 && not noHeader
+ then tableHeaderName:style'
+ else style'
+ alig = aligns !! colNr
+ stl' | alig == AlignLeft = alignLeftName : stl
+ | alig == AlignRight = alignRightName : stl
+ | alig == AlignCenter = alignCenterName : stl
+ | otherwise = stl
+ c <- blocksToICML opts stl' cell
+ let cl = return $ inTags True "Cell"
+ [("Name", show colNr ++":"++ show rowNr), ("AppliedCellStyle","CellStyle/Cell")] c
+ liftM2 ($$) cl $ colsToICML rest rowNr (colNr+1)
+ in do
+ let tabl = if noHeader
+ then rows
+ else headers:rows
+ cells <- rowsToICML tabl (0::Int)
+ let colWidths w = if w > 0
+ then [("SingleColumnWidth",show $ 500 * w)]
+ else []
+ let tupToDoc tup = selfClosingTag "Column" $ [("Name",show $ fst tup)] ++ (colWidths $ snd tup)
+ let colDescs = vcat $ map tupToDoc $ zip [0..nrCols-1] widths
+ let tableDoc = return $ inTags True "Table" [
+ ("AppliedTableStyle","TableStyle/Table")
+ , ("HeaderRowCount", nrHeaders)
+ , ("BodyRowCount", show nrRows)
+ , ("ColumnCount", show nrCols)
+ ] (colDescs $$ cells)
+ liftM2 ($$) tableDoc $ parStyle opts (tableCaptionName:style) caption
+blockToICML opts style (Div _ lst) = blocksToICML opts style lst
+blockToICML _ _ Null = return empty
+
+-- | Convert a list of lists of blocks to ICML list items.
+listItemsToICML :: WriterOptions -> String -> Style -> Maybe ListAttributes -> [[Block]] -> WS Doc
+listItemsToICML _ _ _ _ [] = return empty
+listItemsToICML opts listType style attribs (first:rest) = do
+ st <- get
+ put st{ listDepth = 1 + listDepth st}
+ let stl = listType:style
+ let f = listItemToICML opts stl True attribs first
+ let r = map (listItemToICML opts stl False attribs) rest
+ docs <- sequence $ f:r
+ s <- get
+ let maxD = max (maxListDepth s) (listDepth s)
+ put s{ listDepth = 1, maxListDepth = maxD }
+ return $ vcat docs
+
+-- | Convert a list of blocks to ICML list items.
+listItemToICML :: WriterOptions -> Style -> Bool-> Maybe ListAttributes -> [Block] -> WS Doc
+listItemToICML opts style isFirst attribs item =
+ let makeNumbStart (Just (beginsWith, numbStl, _)) =
+ let doN DefaultStyle = []
+ doN LowerRoman = [lowerRomanName]
+ doN UpperRoman = [upperRomanName]
+ doN LowerAlpha = [lowerAlphaName]
+ doN UpperAlpha = [upperAlphaName]
+ doN _ = []
+ bw = if beginsWith > 1
+ then [beginsWithName ++ show beginsWith]
+ else []
+ in doN numbStl ++ bw
+ makeNumbStart Nothing = []
+ stl = if isFirst
+ then firstListItemName:style
+ else style
+ stl' = makeNumbStart attribs ++ stl
+ in if length item > 1
+ then do
+ let insertTab (Para lst) = blockToICML opts (subListParName:style) $ Para $ (Str "\t"):lst
+ insertTab block = blockToICML opts style block
+ f <- blockToICML opts stl' $ head item
+ r <- fmap vcat $ mapM insertTab $ tail item
+ return $ f $$ r
+ else blocksToICML opts stl' item
+
+definitionListItemToICML :: WriterOptions -> Style -> ([Inline],[[Block]]) -> WS Doc
+definitionListItemToICML opts style (term,defs) = do
+ term' <- parStyle opts (defListTermName:style) term
+ defs' <- vcat `fmap` mapM (blocksToICML opts (defListDefName:style)) defs
+ return $ term' $$ defs'
+
+
+-- | Convert a list of inline elements to ICML.
+inlinesToICML :: WriterOptions -> Style -> [Inline] -> WS Doc
+inlinesToICML opts style lst = vcat `fmap` mapM (inlineToICML opts style) (mergeSpaces lst)
+
+-- | Convert an inline element to ICML.
+inlineToICML :: WriterOptions -> Style -> Inline -> WS Doc
+inlineToICML _ style (Str str) = charStyle style $ text $ escapeStringForXML str
+inlineToICML opts style (Emph lst) = inlinesToICML opts (emphName:style) lst
+inlineToICML opts style (Strong lst) = inlinesToICML opts (strongName:style) lst
+inlineToICML opts style (Strikeout lst) = inlinesToICML opts (strikeoutName:style) lst
+inlineToICML opts style (Superscript lst) = inlinesToICML opts (superscriptName:style) lst
+inlineToICML opts style (Subscript lst) = inlinesToICML opts (subscriptName:style) lst
+inlineToICML opts style (SmallCaps lst) = inlinesToICML opts (smallCapsName:style) lst
+inlineToICML opts style (Quoted SingleQuote lst) = inlinesToICML opts style $ [Str "‘"] ++ lst ++ [Str "’"]
+inlineToICML opts style (Quoted DoubleQuote lst) = inlinesToICML opts style $ [Str "“"] ++ lst ++ [Str "”"]
+inlineToICML opts style (Cite _ lst) = footnoteToICML opts style [Para lst]
+inlineToICML _ style (Code _ str) = charStyle (codeName:style) $ text $ escapeStringForXML str
+inlineToICML _ style Space = charStyle style space
+inlineToICML _ style LineBreak = charStyle style $ text lineSeparator
+inlineToICML _ style (Math _ str) = charStyle style $ text $ escapeStringForXML str --InDesign doesn't really do math
+inlineToICML _ style (RawInline _ str) = charStyle style $ text $ escapeStringForXML str
+inlineToICML opts style (Link lst (url, title)) = do
+ content <- inlinesToICML opts (linkName:style) lst
+ state $ \st ->
+ let ident = if null $ links st
+ then 1::Int
+ else 1 + (fst $ head $ links st)
+ newst = st{ links = (ident, url):(links st) }
+ cont = inTags True "HyperlinkTextSource"
+ [("Self","htss-"++show ident), ("Name",title), ("Hidden","false")] content
+ in (cont, newst)
+inlineToICML opts style (Image alt target) = imageICML opts style alt target
+inlineToICML opts style (Note lst) = footnoteToICML opts style lst
+inlineToICML opts style (Span _ lst) = inlinesToICML opts style lst
+
+-- | Convert a list of block elements to an ICML footnote.
+footnoteToICML :: WriterOptions -> Style -> [Block] -> WS Doc
+footnoteToICML opts style lst =
+ let insertTab (Para ls) = blockToICML opts (footnoteName:style) $ Para $ (Str "\t"):ls
+ insertTab block = blockToICML opts (footnoteName:style) block
+ in do
+ contents <- mapM insertTab lst
+ let number = inTags True "ParagraphStyleRange" [] $
+ inTags True "CharacterStyleRange" [] $ inTagsSimple "Content" "<?ACE 4?>"
+ return $ inTags True "CharacterStyleRange"
+ [("AppliedCharacterStyle","$ID/NormalCharacterStyle"), ("Position","Superscript")]
+ $ inTags True "Footnote" [] $ number $$ vcat contents
+
+-- | Auxiliary function to merge Space elements into the adjacent Strs.
+mergeSpaces :: [Inline] -> [Inline]
+mergeSpaces ((Str s):(Space:((Str s'):xs))) = mergeSpaces $ Str(s++" "++s') : xs
+mergeSpaces (Space:((Str s):xs)) = mergeSpaces $ Str (" "++s) : xs
+mergeSpaces ((Str s):(Space:xs)) = mergeSpaces $ Str (s++" ") : xs
+mergeSpaces (x:xs) = x : (mergeSpaces xs)
+mergeSpaces [] = []
+
+-- | Wrap a list of inline elements in an ICML Paragraph Style
+parStyle :: WriterOptions -> Style -> [Inline] -> WS Doc
+parStyle opts style lst =
+ let slipIn x y = if null y
+ then x
+ else x ++ " > " ++ y
+ stlStr = foldr slipIn [] $ reverse style
+ stl = if null stlStr
+ then ""
+ else "ParagraphStyle/" ++ stlStr
+ attrs = ("AppliedParagraphStyle", stl)
+ attrs' = if firstListItemName `elem` style
+ then let ats = attrs : [("NumberingContinue", "false")]
+ begins = filter (isPrefixOf beginsWithName) style
+ in if null begins
+ then ats
+ else let i = maybe "" id $ stripPrefix beginsWithName $ head begins
+ in ("NumberingStartAt", i) : ats
+ else [attrs]
+ in do
+ content <- inlinesToICML opts [] lst
+ let cont = inTags True "ParagraphStyleRange" attrs'
+ $ mappend content $ selfClosingTag "Br" []
+ state $ \st -> (cont, st{ blockStyles = Set.insert stlStr $ blockStyles st })
+
+-- | Wrap a Doc in an ICML Character Style.
+charStyle :: Style -> Doc -> WS Doc
+charStyle style content =
+ let (stlStr, attrs) = styleToStrAttr style
+ doc = inTags True "CharacterStyleRange" attrs $ inTagsSimple "Content" $ flush content
+ in do
+ state $ \st ->
+ let styles = if null stlStr
+ then st
+ else st{ inlineStyles = Set.insert stlStr $ inlineStyles st }
+ in (doc, styles)
+
+-- | Transform a Style to a tuple of String (eliminating duplicates and ordered) and corresponding attribute.
+styleToStrAttr :: Style -> (String, [(String, String)])
+styleToStrAttr style =
+ let stlStr = unwords $ Set.toAscList $ Set.fromList style
+ stl = if null style
+ then "$ID/NormalCharacterStyle"
+ else "CharacterStyle/" ++ stlStr
+ attrs = [("AppliedCharacterStyle", stl)]
+ in (stlStr, attrs)
+
+-- | Assemble an ICML Image.
+imageICML :: WriterOptions -> Style -> [Inline] -> Target -> WS Doc
+imageICML _ style _ (linkURI, _) =
+ let imgWidth = 300::Int --TODO: set width, height dynamically as in Docx.hs
+ imgHeight = 200::Int
+ scaleFact = show (1::Double) --TODO: set scaling factor so image is scaled exactly to imgWidth x imgHeight
+ hw = show $ imgWidth `div` 2
+ hh = show $ imgHeight `div` 2
+ qw = show $ imgWidth `div` 4
+ qh = show $ imgHeight `div` 4
+ (stlStr, attrs) = styleToStrAttr style
+ props = inTags True "Properties" [] $ inTags True "PathGeometry" []
+ $ inTags True "GeometryPathType" [("PathOpen","false")]
+ $ inTags True "PathPointArray" []
+ $ vcat [
+ selfClosingTag "PathPointType" [("Anchor", "-"++qw++" -"++qh),
+ ("LeftDirection", "-"++qw++" -"++qh), ("RightDirection", "-"++qw++" -"++qh)]
+ , selfClosingTag "PathPointType" [("Anchor", "-"++qw++" "++qh),
+ ("LeftDirection", "-"++qw++" "++qh), ("RightDirection", "-"++qw++" "++qh)]
+ , selfClosingTag "PathPointType" [("Anchor", qw++" "++qh),
+ ("LeftDirection", qw++" "++qh), ("RightDirection", qw++" "++qh)]
+ , selfClosingTag "PathPointType" [("Anchor", qw++" -"++qh),
+ ("LeftDirection", qw++" -"++qh), ("RightDirection", qw++" -"++qh)]
+ ]
+ image = inTags True "Image"
+ [("Self","ue6"), ("ItemTransform", scaleFact++" 0 0 "++scaleFact++" -"++qw++" -"++qh)]
+ $ vcat [
+ inTags True "Properties" [] $ inTags True "Profile" [("type","string")] $ text "$ID/Embedded"
+ $$ selfClosingTag "GraphicBounds" [("Left","0"), ("Top","0"), ("Right", hw), ("Bottom", hh)]
+ , selfClosingTag "Link" [("Self", "ueb"), ("LinkResourceURI", linkURI)]
+ ]
+ doc = inTags True "CharacterStyleRange" attrs
+ $ inTags True "Rectangle" [("Self","uec"), ("ItemTransform", "1 0 0 1 "++qw++" -"++qh)]
+ $ (props $$ image)
+ in do
+ state $ \st -> (doc, st{ inlineStyles = Set.insert stlStr $ inlineStyles st } )
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index f3cbcf19f..b2eff4490 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -40,6 +40,7 @@ import Network.URI ( isURI, unEscapeString )
import Data.List ( (\\), isSuffixOf, isInfixOf,
isPrefixOf, intercalate, intersperse )
import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, ord )
+import Data.Maybe ( fromMaybe )
import Control.Applicative ((<|>))
import Control.Monad.State
import Text.Pandoc.Pretty
@@ -50,6 +51,8 @@ import Text.Pandoc.Highlighting (highlight, styleToLaTeX,
data WriterState =
WriterState { stInNote :: Bool -- true if we're in a note
+ , stInMinipage :: Bool -- true if in minipage
+ , stNotes :: [Doc] -- notes in a minipage
, stOLLevel :: Int -- level of ordered list nesting
, stOptions :: WriterOptions -- writer options, so they don't have to be parameter
, stVerbInNote :: Bool -- true if document has verbatim text in note
@@ -70,7 +73,7 @@ data WriterState =
writeLaTeX :: WriterOptions -> Pandoc -> String
writeLaTeX options document =
evalState (pandocToLaTeX options document) $
- WriterState { stInNote = False,
+ WriterState { stInNote = False, stInMinipage = False, stNotes = [],
stOLLevel = 1, stOptions = options,
stVerbInNote = False,
stTable = False, stStrikeout = False,
@@ -188,7 +191,7 @@ stringToLaTeX _ [] = return ""
stringToLaTeX ctx (x:xs) = do
opts <- gets stOptions
rest <- stringToLaTeX ctx xs
- let ligatures = writerTeXLigatures opts && not (ctx == CodeString)
+ let ligatures = writerTeXLigatures opts && (ctx /= CodeString)
let isUrl = ctx == URLString
when (x == '€') $
modify $ \st -> st{ stUsesEuro = True }
@@ -203,7 +206,7 @@ stringToLaTeX ctx (x:xs) = do
'_' | not isUrl -> "\\_" ++ rest
'#' -> "\\#" ++ rest
'-' -> case xs of -- prevent adjacent hyphens from forming ligatures
- ('-':_) -> "-{}" ++ rest
+ ('-':_) -> "-\\/" ++ rest
_ -> '-' : rest
'~' | not isUrl -> "\\textasciitilde{}" ++ rest
'^' -> "\\^{}" ++ rest
@@ -238,7 +241,7 @@ inCmd cmd contents = char '\\' <> text cmd <> braces contents
toSlides :: [Block] -> State WriterState [Block]
toSlides bs = do
opts <- gets stOptions
- let slideLevel = maybe (getSlideLevel bs) id $ writerSlideLevel opts
+ let slideLevel = fromMaybe (getSlideLevel bs) $ writerSlideLevel opts
let bs' = prepSlides slideLevel bs
concat `fmap` (mapM (elementToBeamer slideLevel) $ hierarchicalize bs')
@@ -441,7 +444,7 @@ blockToLaTeX (DefinitionList lst) = do
incremental <- gets stIncremental
let inc = if incremental then "[<+->]" else ""
items <- mapM defListItemToLaTeX lst
- let spacing = if and $ map isTightList (map snd lst)
+ let spacing = if all isTightList (map snd lst)
then text "\\itemsep1pt\\parskip0pt\\parsep0pt"
else empty
return $ text ("\\begin{description}" ++ inc) $$ spacing $$ vcat items $$
@@ -453,12 +456,12 @@ blockToLaTeX (Header level (id',classes,_) lst) =
blockToLaTeX (Table caption aligns widths heads rows) = do
headers <- if all null heads
then return empty
- else ($$ "\\hline\\noalign{\\medskip}") `fmap`
+ else ($$ "\\midrule\\endhead") `fmap`
(tableRowToLaTeX True aligns widths) heads
captionText <- inlineListToLaTeX caption
let capt = if isEmpty captionText
then empty
- else text "\\noalign{\\medskip}"
+ else text "\\addlinespace"
$$ text "\\caption" <> braces captionText
rows' <- mapM (tableRowToLaTeX False aligns widths) rows
let colDescriptors = text $ concat $ map toColDescriptor aligns
@@ -466,10 +469,10 @@ blockToLaTeX (Table caption aligns widths heads rows) = do
return $ "\\begin{longtable}[c]" <>
braces ("@{}" <> colDescriptors <> "@{}")
-- the @{} removes extra space at beginning and end
- $$ "\\hline\\noalign{\\medskip}"
+ $$ "\\toprule\\addlinespace"
$$ headers
$$ vcat rows'
- $$ "\\hline"
+ $$ "\\bottomrule"
$$ capt
$$ "\\end{longtable}"
@@ -490,23 +493,42 @@ tableRowToLaTeX :: Bool
-> [[Block]]
-> State WriterState Doc
tableRowToLaTeX header aligns widths cols = do
- renderedCells <- mapM blockListToLaTeX cols
- let valign = text $ if header then "[b]" else "[t]"
- let halign x = case x of
- AlignLeft -> "\\raggedright"
- AlignRight -> "\\raggedleft"
- AlignCenter -> "\\centering"
- AlignDefault -> "\\raggedright"
-- scale factor compensates for extra space between columns
-- so the whole table isn't larger than columnwidth
let scaleFactor = 0.97 ** fromIntegral (length aligns)
- let toCell 0 _ c = c
- toCell w a c = "\\begin{minipage}" <> valign <>
- braces (text (printf "%.2f\\columnwidth"
- (w * scaleFactor))) <>
- (halign a <> cr <> c <> cr) <> "\\end{minipage}"
- let cells = zipWith3 toCell widths aligns renderedCells
- return $ hsep (intersperse "&" cells) $$ "\\\\\\noalign{\\medskip}"
+ let widths' = map (scaleFactor *) widths
+ cells <- mapM (tableCellToLaTeX header) $ zip3 widths' aligns cols
+ return $ hsep (intersperse "&" cells) $$ "\\\\\\addlinespace"
+
+tableCellToLaTeX :: Bool -> (Double, Alignment, [Block])
+ -> State WriterState Doc
+tableCellToLaTeX _ (0, _, blocks) = blockListToLaTeX blocks
+tableCellToLaTeX header (width, align, blocks) = do
+ modify $ \st -> st{ stInMinipage = True, stNotes = [] }
+ cellContents <- blockListToLaTeX blocks
+ notes <- gets stNotes
+ modify $ \st -> st{ stInMinipage = False, stNotes = [] }
+ let valign = text $ if header then "[b]" else "[t]"
+ let halign = case align of
+ AlignLeft -> "\\raggedright"
+ AlignRight -> "\\raggedleft"
+ AlignCenter -> "\\centering"
+ AlignDefault -> "\\raggedright"
+ return $ ("\\begin{minipage}" <> valign <>
+ braces (text (printf "%.2f\\columnwidth" width)) <>
+ (halign <> cr <> cellContents <> cr) <> "\\end{minipage}")
+ $$ case notes of
+ [] -> empty
+ ns -> (case length ns of
+ n | n > 1 -> "\\addtocounter" <>
+ braces "footnote" <>
+ braces (text $ show $ 1 - n)
+ | otherwise -> empty)
+ $$
+ vcat (intersperse
+ ("\\addtocounter" <> braces "footnote" <> braces "1")
+ $ map (\x -> "\\footnotetext" <> braces x)
+ $ reverse ns)
listItemToLaTeX :: [Block] -> State WriterState Doc
listItemToLaTeX lst = blockListToLaTeX lst >>= return . (text "\\item" $$) .
@@ -709,14 +731,20 @@ inlineToLaTeX (Image _ (source, _)) = do
source'' <- stringToLaTeX URLString source'
return $ "\\includegraphics" <> braces (text source'')
inlineToLaTeX (Note contents) = do
+ inMinipage <- gets stInMinipage
modify (\s -> s{stInNote = True})
contents' <- blockListToLaTeX contents
modify (\s -> s {stInNote = False})
let optnl = case reverse contents of
(CodeBlock _ _ : _) -> cr
_ -> empty
- return $ "\\footnote" <> braces (nest 2 contents' <> optnl)
- -- note: a \n before } needed when note ends with a Verbatim environment
+ let noteContents = nest 2 contents' <> optnl
+ modify $ \st -> st{ stNotes = noteContents : stNotes st }
+ return $
+ if inMinipage
+ then "\\footnotemark{}"
+ -- note: a \n before } needed when note ends with a Verbatim environment
+ else "\\footnote" <> braces noteContents
citationsToNatbib :: [Citation] -> State WriterState Doc
citationsToNatbib (one:[])
@@ -737,9 +765,9 @@ citationsToNatbib cits
| noPrefix (tail cits) && noSuffix (init cits) && ismode NormalCitation cits
= citeCommand "citep" p s ks
where
- noPrefix = and . map (null . citationPrefix)
- noSuffix = and . map (null . citationSuffix)
- ismode m = and . map (((==) m) . citationMode)
+ noPrefix = all (null . citationPrefix)
+ noSuffix = all (null . citationSuffix)
+ ismode m = all (((==) m) . citationMode)
p = citationPrefix $ head $ cits
s = citationSuffix $ last $ cits
ks = intercalate ", " $ map citationId cits
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
index b31cc2b70..680bfef44 100644
--- a/src/Text/Pandoc/Writers/Man.hs
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -283,7 +283,7 @@ definitionListItemToMan opts (label, defs) = do
mapM (\item -> blockToMan opts item) rest
first' <- blockToMan opts first
return $ first' $$ text ".RS" $$ rest' $$ text ".RE"
- return $ text ".TP" $$ text ".B " <> labelText $$ contents
+ return $ text ".TP" $$ nowrap (text ".B " <> labelText) $$ contents
-- | Convert list of Pandoc block elements to man.
blockListToMan :: WriterOptions -- ^ Options
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index eefcd547a..278e5cc9d 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -338,7 +338,7 @@ blockToMarkdown opts (RawBlock f str)
else return $ if isEnabled Ext_markdown_attribute opts
then text (addMarkdownAttribute str) <> text "\n"
else text str <> text "\n"
- | f == "latex" || f == "tex" || f == "markdown" = do
+ | f `elem` ["latex", "tex", "markdown"] = do
st <- get
if stPlain st
then return empty
@@ -381,13 +381,11 @@ blockToMarkdown opts (CodeBlock (_,classes,_) str)
isEnabled Ext_literate_haskell opts =
return $ prefixed "> " (text str) <> blankline
blockToMarkdown opts (CodeBlock attribs str) = return $
- case attribs of
- x | x /= nullAttr && isEnabled Ext_fenced_code_blocks opts ->
- tildes <> " " <> attrs <> cr <> text str <>
- cr <> tildes <> blankline
- (_,(cls:_),_) | isEnabled Ext_backtick_code_blocks opts ->
- backticks <> " " <> text cls <> cr <> text str <>
- cr <> backticks <> blankline
+ case attribs == nullAttr of
+ False | isEnabled Ext_backtick_code_blocks opts ->
+ backticks <> attrs <> cr <> text str <> cr <> backticks <> blankline
+ | isEnabled Ext_fenced_code_blocks opts ->
+ tildes <> attrs <> cr <> text str <> cr <> tildes <> blankline
_ -> nest (writerTabStop opts) (text str) <> blankline
where tildes = text $ case [ln | ln <- lines str, all (=='~') ln] of
[] -> "~~~~"
@@ -396,8 +394,10 @@ blockToMarkdown opts (CodeBlock attribs str) = return $
| otherwise -> replicate (n+1) '~'
backticks = text "```"
attrs = if isEnabled Ext_fenced_code_attributes opts
- then nowrap $ attrsToMarkdown attribs
- else empty
+ then nowrap $ " " <> attrsToMarkdown attribs
+ else case attribs of
+ (_,[cls],_) -> " " <> text cls
+ _ -> empty
blockToMarkdown opts (BlockQuote blocks) = do
st <- get
-- if we're writing literate haskell, put a space before the bird tracks
@@ -555,7 +555,14 @@ bulletListItemToMarkdown opts items = do
contents <- blockListToMarkdown opts items
let sps = replicate (writerTabStop opts - 2) ' '
let start = text ('-' : ' ' : sps)
- return $ hang (writerTabStop opts) start $ contents <> cr
+ -- remove trailing blank line if it is a tight list
+ let contents' = case reverse items of
+ (BulletList xs:_) | isTightList xs ->
+ chomp contents <> cr
+ (OrderedList _ xs:_) | isTightList xs ->
+ chomp contents <> cr
+ _ -> contents
+ return $ hang (writerTabStop opts) start $ contents' <> cr
-- | Convert ordered list item (a list of blocks) to markdown.
orderedListItemToMarkdown :: WriterOptions -- ^ options
@@ -621,10 +628,11 @@ getReference label (src, tit) = do
Nothing -> do
let label' = case find ((== label) . fst) (stRefs st) of
Just _ -> -- label is used; generate numerical label
- case find (\n -> not (any (== [Str (show n)])
- (map fst (stRefs st)))) [1..(10000 :: Integer)] of
- Just x -> [Str (show x)]
- Nothing -> error "no unique label"
+ case find (\n -> notElem [Str (show n)]
+ (map fst (stRefs st)))
+ [1..(10000 :: Integer)] of
+ Just x -> [Str (show x)]
+ Nothing -> error "no unique label"
Nothing -> label
modify (\s -> s{ stRefs = (label', (src,tit)) : stRefs st })
return label'
diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs
index 61741a61e..83fefaa29 100644
--- a/src/Text/Pandoc/Writers/MediaWiki.hs
+++ b/src/Text/Pandoc/Writers/MediaWiki.hs
@@ -51,7 +51,7 @@ data WriterState = WriterState {
writeMediaWiki :: WriterOptions -> Pandoc -> String
writeMediaWiki opts document =
evalState (pandocToMediaWiki opts document)
- (WriterState { stNotes = False, stListLevel = [], stUseTags = False })
+ WriterState { stNotes = False, stListLevel = [], stUseTags = False }
-- | Return MediaWiki representation of document.
pandocToMediaWiki :: WriterOptions -> Pandoc -> State WriterState String
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index cc0a06243..c3652d65d 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -30,7 +30,10 @@ Conversion of 'Pandoc' documents to ODT.
-}
module Text.Pandoc.Writers.ODT ( writeODT ) where
import Data.IORef
-import Data.List ( isPrefixOf )
+import Data.List ( isPrefixOf, isSuffixOf )
+import Data.Maybe ( fromMaybe )
+import Text.XML.Light.Output
+import Text.TeXMath
import qualified Data.ByteString.Lazy as B
import Text.Pandoc.UTF8 ( fromStringLazy )
import Codec.Archive.Zip
@@ -40,13 +43,14 @@ import Text.Pandoc.ImageSize ( imageSize, sizeInPoints )
import Text.Pandoc.MIME ( getMimeType )
import Text.Pandoc.Definition
import Text.Pandoc.Walk
+import Text.Pandoc.Writers.Shared ( fixDisplayMath )
import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument )
import Control.Monad (liftM)
import Text.Pandoc.XML
import Text.Pandoc.Pretty
import qualified Control.Exception as E
import Data.Time.Clock.POSIX ( getPOSIXTime )
-import System.FilePath ( takeExtension )
+import System.FilePath ( takeExtension, takeDirectory )
-- | Produce an ODT file from a Pandoc document.
writeODT :: WriterOptions -- ^ Writer options
@@ -60,9 +64,9 @@ writeODT opts doc@(Pandoc meta _) = do
Just f -> B.readFile f
Nothing -> (B.fromChunks . (:[])) `fmap`
readDataFile datadir "reference.odt"
- -- handle pictures
+ -- handle formulas and pictures
picEntriesRef <- newIORef ([] :: [Entry])
- doc' <- walkM (transformPic opts picEntriesRef) doc
+ doc' <- walkM (transformPicMath opts picEntriesRef) $ walk fixDisplayMath doc
let newContents = writeOpenDocument opts{writerWrapText = False} doc'
epochtime <- floor `fmap` getPOSIXTime
let contentEntry = toEntry "content.xml" epochtime
@@ -72,7 +76,11 @@ writeODT opts doc@(Pandoc meta _) = do
$ contentEntry : picEntries
-- construct META-INF/manifest.xml based on archive
let toFileEntry fp = case getMimeType fp of
- Nothing -> empty
+ Nothing -> if "Formula-" `isPrefixOf` fp && "/" `isSuffixOf` fp
+ then selfClosingTag "manifest:file-entry"
+ [("manifest:media-type","application/vnd.oasis.opendocument.formula")
+ ,("manifest:full-path",fp)]
+ else empty
Just m -> selfClosingTag "manifest:file-entry"
[("manifest:media-type", m)
,("manifest:full-path", fp)
@@ -80,6 +88,8 @@ writeODT opts doc@(Pandoc meta _) = do
]
let files = [ ent | ent <- filesInArchive archive,
not ("META-INF" `isPrefixOf` ent) ]
+ let formulas = [ takeDirectory ent ++ "/" | ent <- filesInArchive archive,
+ "Formula-" `isPrefixOf` ent, takeExtension ent == ".xml" ]
let manifestEntry = toEntry "META-INF/manifest.xml" epochtime
$ fromStringLazy $ render Nothing
$ text "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
@@ -91,6 +101,7 @@ writeODT opts doc@(Pandoc meta _) = do
[("manifest:media-type","application/vnd.oasis.opendocument.text")
,("manifest:full-path","/")]
$$ vcat ( map toFileEntry $ files )
+ $$ vcat ( map toFileEntry $ formulas )
)
)
let archive' = addEntryToArchive manifestEntry archive
@@ -118,8 +129,8 @@ writeODT opts doc@(Pandoc meta _) = do
$ addEntryToArchive metaEntry archive'
return $ fromArchive archive''
-transformPic :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline
-transformPic opts entriesRef (Image lab (src,_)) = do
+transformPicMath :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline
+transformPicMath opts entriesRef (Image lab (src,_)) = do
res <- fetchItem (writerSourceURL opts) src
case res of
Left (_ :: E.SomeException) -> do
@@ -127,7 +138,7 @@ transformPic opts entriesRef (Image lab (src,_)) = do
return $ Emph lab
Right (img, _) -> do
let size = imageSize img
- let (w,h) = maybe (0,0) id $ sizeInPoints `fmap` size
+ let (w,h) = fromMaybe (0,0) $ sizeInPoints `fmap` size
let tit' = show w ++ "x" ++ show h
entries <- readIORef entriesRef
let newsrc = "Pictures/" ++ show (length entries) ++ takeExtension src
@@ -136,5 +147,29 @@ transformPic opts entriesRef (Image lab (src,_)) = do
let entry = toEntry newsrc epochtime $ toLazy img
modifyIORef entriesRef (entry:)
return $ Image lab (newsrc, tit')
-transformPic _ _ x = return x
+transformPicMath _ entriesRef (Math t math) = do
+ entries <- readIORef entriesRef
+ let dt = if t == InlineMath then DisplayInline else DisplayBlock
+ case texMathToMathML dt math of
+ Left _ -> return $ Math t math
+ Right r -> do
+ let conf = useShortEmptyTags (const False) defaultConfigPP
+ let mathml = ppcTopElement conf r
+ epochtime <- floor `fmap` getPOSIXTime
+ let dirname = "Formula-" ++ show (length entries) ++ "/"
+ let fname = dirname ++ "content.xml"
+ let entry = toEntry fname epochtime (fromStringLazy mathml)
+ modifyIORef entriesRef (entry:)
+ return $ RawInline (Format "opendocument") $ render Nothing $
+ inTags False "draw:frame" [("text:anchor-type",
+ if t == DisplayMath
+ then "paragraph"
+ else "as-char")
+ ,("style:vertical-pos", "middle")
+ ,("style:vertical-rel", "text")] $
+ selfClosingTag "draw:object" [("xlink:href", dirname)
+ , ("xlink:type", "simple")
+ , ("xlink:show", "embed")
+ , ("xlink:actuate", "onLoad")]
+transformPicMath _ _ x = return x
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index 565f5f869..0029c3296 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -64,6 +64,7 @@ data WriterState =
, stInDefinition :: Bool
, stTight :: Bool
, stFirstPara :: Bool
+ , stImageId :: Int
}
defaultWriterState :: WriterState
@@ -78,6 +79,7 @@ defaultWriterState =
, stInDefinition = False
, stTight = False
, stFirstPara = False
+ , stImageId = 1
}
when :: Bool -> Doc -> Doc
@@ -283,8 +285,12 @@ blocksToOpenDocument o b = vcat <$> mapM (blockToOpenDocument o) b
-- | Convert a Pandoc block element to OpenDocument.
blockToOpenDocument :: WriterOptions -> Block -> State WriterState Doc
blockToOpenDocument o bs
- | Plain b <- bs = inParagraphTags =<< inlinesToOpenDocument o b
- | Para b <- bs = inParagraphTags =<< inlinesToOpenDocument o b
+ | Plain b <- bs = if null b
+ then return empty
+ else inParagraphTags =<< inlinesToOpenDocument o b
+ | Para b <- bs = if null b
+ then return empty
+ else inParagraphTags =<< inlinesToOpenDocument o b
| Div _ xs <- bs = blocksToOpenDocument o xs
| Header i _ b <- bs = setFirstPara >>
(inHeaderTags i =<< inlinesToOpenDocument o b)
@@ -296,8 +302,8 @@ blockToOpenDocument o bs
| Table c a w h r <- bs = setFirstPara >> table c a w h r
| HorizontalRule <- bs = setFirstPara >> return (selfClosingTag "text:p"
[ ("text:style-name", "Horizontal_20_Line") ])
- | RawBlock f s <- bs = if f == "opendocument"
- then preformatted s
+ | RawBlock f s <- bs = if f == Format "opendocument"
+ then return $ text s
else return empty
| Null <- bs = return empty
| otherwise = return empty
@@ -376,11 +382,11 @@ inlineToOpenDocument o ils
| Code _ s <- ils = withTextStyle Pre $ inTextStyle $ preformatted s
| Math t s <- ils = inlinesToOpenDocument o (readTeXMath' t s)
| Cite _ l <- ils = inlinesToOpenDocument o l
- | RawInline f s <- ils = if f == "opendocument"
- then return $ preformatted s
+ | RawInline f s <- ils = if f == Format "opendocument"
+ then return $ text s
else return empty
| Link l (s,t) <- ils = mkLink s t <$> inlinesToOpenDocument o l
- | Image _ (s,t) <- ils = return $ mkImg s t
+ | Image _ (s,t) <- ils = mkImg s t
| Note l <- ils = mkNote l
| otherwise = return empty
where
@@ -389,7 +395,11 @@ inlineToOpenDocument o ils
, ("xlink:href" , s )
, ("office:name", t )
] . inSpanTags "Definition"
- mkImg s t = inTags False "draw:frame" (attrsFromTitle t) $
+ mkImg s t = do
+ id' <- gets stImageId
+ modify (\st -> st{ stImageId = id' + 1 })
+ return $ inTags False "draw:frame"
+ (("draw:name", "img" ++ show id'):attrsFromTitle t) $
selfClosingTag "draw:image" [ ("xlink:href" , s )
, ("xlink:type" , "simple")
, ("xlink:show" , "embed" )
diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
index 51083f52b..d318c5f6a 100644
--- a/src/Text/Pandoc/Writers/Org.hs
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -129,7 +129,7 @@ blockToOrg (Para inlines) = do
blockToOrg (RawBlock "html" str) =
return $ blankline $$ "#+BEGIN_HTML" $$
nest 2 (text str) $$ "#+END_HTML" $$ blankline
-blockToOrg (RawBlock f str) | f == "org" || f == "latex" || f == "tex" =
+blockToOrg (RawBlock f str) | f `elem` ["org", "latex", "tex"] =
return $ text str
blockToOrg (RawBlock _ _) = return empty
blockToOrg HorizontalRule = return $ blankline $$ "--------------" $$ blankline
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index 1a62f7250..1e7596b21 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -219,11 +219,15 @@ blockToRST (Table caption _ widths headers rows) = do
else blankline <> text "Table: " <> caption'
headers' <- mapM blockListToRST headers
rawRows <- mapM (mapM blockListToRST) rows
- let isSimple = all (==0) widths && all (all (\bs -> length bs <= 1)) rows
+ -- let isSimpleCell [Plain _] = True
+ -- isSimpleCell [Para _] = True
+ -- isSimpleCell [] = True
+ -- isSimpleCell _ = False
+ -- let isSimple = all (==0) widths && all (all isSimpleCell) rows
let numChars = maximum . map offset
opts <- get >>= return . stOptions
let widthsInChars =
- if isSimple
+ if all (== 0) widths
then map ((+2) . numChars) $ transpose (headers' : rawRows)
else map (floor . (fromIntegral (writerColumns opts) *)) widths
let hpipeBlocks blocks = hcat [beg, middle, end]
@@ -287,7 +291,7 @@ definitionListItemToRST (label, defs) = do
label' <- inlineListToRST label
contents <- liftM vcat $ mapM blockListToRST defs
tabstop <- get >>= (return . writerTabStop . stOptions)
- return $ label' $$ nest tabstop (contents <> cr)
+ return $ label' $$ nest tabstop (nestle contents <> cr)
-- | Convert list of Pandoc block elements to RST.
blockListToRST :: [Block] -- ^ List of block elements
diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs
index 9cb08803c..604aac1c9 100644
--- a/src/Text/Pandoc/Writers/Shared.hs
+++ b/src/Text/Pandoc/Writers/Shared.hs
@@ -34,6 +34,7 @@ module Text.Pandoc.Writers.Shared (
, setField
, defField
, tagWithAttrs
+ , fixDisplayMath
)
where
import Text.Pandoc.Definition
@@ -46,6 +47,7 @@ import qualified Data.Map as M
import qualified Data.Text as T
import Data.Aeson (FromJSON(..), fromJSON, ToJSON (..), Value(Object), Result(..))
import qualified Data.Traversable as Traversable
+import Data.List ( groupBy )
-- | Create JSON value for template from a 'Meta' and an association list
-- of variables, specified at the command line or in the writer.
@@ -65,8 +67,7 @@ metaToJSON opts blockWriter inlineWriter (Meta metamap)
renderedMap <- Traversable.mapM
(metaValueToJSON blockWriter inlineWriter)
metamap
- return $ M.foldWithKey (\key val obj -> defField key val obj)
- baseContext renderedMap
+ return $ M.foldWithKey defField baseContext renderedMap
| otherwise = return (Object H.empty)
metaValueToJSON :: Monad m
@@ -137,3 +138,28 @@ tagWithAttrs tag (ident,classes,kvs) = hsep
,hsep (map (\(k,v) -> text k <> "=" <>
doubleQuotes (text (escapeStringForXML v))) kvs)
] <> ">"
+
+isDisplayMath :: Inline -> Bool
+isDisplayMath (Math DisplayMath _) = True
+isDisplayMath _ = False
+
+stripLeadingTrailingSpace :: [Inline] -> [Inline]
+stripLeadingTrailingSpace = go . reverse . go . reverse
+ where go (Space:xs) = xs
+ go xs = xs
+
+-- Put display math in its own block (for ODT/DOCX).
+fixDisplayMath :: Block -> Block
+fixDisplayMath (Plain lst)
+ | any isDisplayMath lst && not (all isDisplayMath lst) =
+ -- chop into several paragraphs so each displaymath is its own
+ Div ("",["math"],[]) $ map (Plain . stripLeadingTrailingSpace) $
+ groupBy (\x y -> (isDisplayMath x && isDisplayMath y) ||
+ not (isDisplayMath x || isDisplayMath y)) lst
+fixDisplayMath (Para lst)
+ | any isDisplayMath lst && not (all isDisplayMath lst) =
+ -- chop into several paragraphs so each displaymath is its own
+ Div ("",["math"],[]) $ map (Para . stripLeadingTrailingSpace) $
+ groupBy (\x y -> (isDisplayMath x && isDisplayMath y) ||
+ not (isDisplayMath x || isDisplayMath y)) lst
+fixDisplayMath x = x
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
index d62e50880..bf3df8035 100644
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -293,7 +293,7 @@ blockListToTexinfo (x:xs) = do
case x of
Header level _ _ -> do
-- We need need to insert a menu for this node.
- let (before, after) = break isHeader xs
+ let (before, after) = break isHeaderBlock xs
before' <- blockListToTexinfo before
let menu = if level < 4
then collectNodes (level + 1) after
@@ -315,10 +315,6 @@ blockListToTexinfo (x:xs) = do
xs' <- blockListToTexinfo xs
return $ x' $$ xs'
-isHeader :: Block -> Bool
-isHeader (Header _ _ _) = True
-isHeader _ = False
-
collectNodes :: Int -> [Block] -> [Block]
collectNodes _ [] = []
collectNodes level (x:xs) =
diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs
index 7c102cc86..95aedf780 100644
--- a/src/Text/Pandoc/Writers/Textile.hs
+++ b/src/Text/Pandoc/Writers/Textile.hs
@@ -51,7 +51,7 @@ data WriterState = WriterState {
writeTextile :: WriterOptions -> Pandoc -> String
writeTextile opts document =
evalState (pandocToTextile opts document)
- (WriterState { stNotes = [], stListLevel = [], stUseTags = False })
+ WriterState { stNotes = [], stListLevel = [], stUseTags = False }
-- | Return Textile representation of document.
pandocToTextile :: WriterOptions -> Pandoc -> State WriterState String