diff options
Diffstat (limited to 'src')
36 files changed, 1675 insertions, 497 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 703bb876a..3ae81db00 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -267,7 +267,10 @@ getDefaultExtensions "markdown_strict" = strictExtensions getDefaultExtensions "markdown_phpextra" = phpMarkdownExtraExtensions getDefaultExtensions "markdown_mmd" = multimarkdownExtensions getDefaultExtensions "markdown_github" = githubMarkdownExtensions -getDefaultExtensions _ = pandocExtensions +getDefaultExtensions "markdown" = pandocExtensions +getDefaultExtensions "plain" = pandocExtensions +getDefaultExtensions "textile" = Set.fromList [Ext_auto_identifiers, Ext_raw_tex] +getDefaultExtensions _ = Set.fromList [Ext_auto_identifiers] -- | Retrieve reader based on formatSpec (format+extensions). getReader :: String -> Either String (ReaderOptions -> String -> IO Pandoc) diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index 9b0850efb..14575244d 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -32,9 +32,14 @@ module Text.Pandoc.ImageSize ( ImageType(..), imageType, imageSize, sizeInPixels, sizeInPoints ) where import Data.ByteString (ByteString, unpack) import qualified Data.ByteString.Char8 as B +import qualified Data.ByteString.Lazy as BL +import Control.Applicative import Control.Monad import Data.Bits +import Data.Binary +import Data.Binary.Get import Text.Pandoc.Shared (safeRead) +import qualified Data.Map as M -- quick and dirty functions to get image sizes -- algorithms borrowed from wwwis.pl @@ -53,7 +58,8 @@ imageType :: ByteString -> Maybe ImageType imageType img = case B.take 4 img of "\x89\x50\x4e\x47" -> return Png "\x47\x49\x46\x38" -> return Gif - "\xff\xd8\xff\xe0" -> return Jpeg + "\xff\xd8\xff\xe0" -> return Jpeg -- JFIF + "\xff\xd8\xff\xe1" -> return Jpeg -- Exif "%PDF" -> return Pdf "%!PS" | (B.take 4 $ B.drop 1 $ B.dropWhile (/=' ') img) == "EPSF" @@ -139,8 +145,14 @@ gifSize img = do jpegSize :: ByteString -> Maybe ImageSize jpegSize img = do let (hdr, rest) = B.splitAt 4 img - guard $ hdr == "\xff\xd8\xff\xe0" guard $ B.length rest >= 14 + case hdr of + "\xff\xd8\xff\xe0" -> jfifSize rest + "\xff\xd8\xff\xe1" -> exifSize $ B.takeWhile (/= '\xff') rest + _ -> mzero + +jfifSize :: ByteString -> Maybe ImageSize +jfifSize rest = do let [dpiDensity,dpix1,dpix2,dpiy1,dpiy2] = map fromIntegral $ unpack $ B.take 5 $ B.drop 9 $ rest let factor = case dpiDensity of @@ -149,11 +161,11 @@ jpegSize img = do _ -> const 72 let dpix = factor (shift dpix1 8 + dpix2) let dpiy = factor (shift dpiy1 8 + dpiy2) - (w,h) <- findJpegSize rest + (w,h) <- findJfifSize rest return $ ImageSize { pxX = w, pxY = h, dpiX = dpix, dpiY = dpiy } -findJpegSize :: ByteString -> Maybe (Integer,Integer) -findJpegSize bs = do +findJfifSize :: ByteString -> Maybe (Integer,Integer) +findJfifSize bs = do let bs' = B.dropWhile (=='\xff') $ B.dropWhile (/='\xff') bs case B.uncons bs' of Just (c,bs'') | c >= '\xc0' && c <= '\xc3' -> do @@ -165,8 +177,216 @@ findJpegSize bs = do [c1,c2] -> do let len = shift c1 8 + c2 -- skip variables - findJpegSize $ B.drop len bs'' + findJfifSize $ B.drop len bs'' _ -> fail "JPEG parse error" Nothing -> fail "Did not find length record" +exifSize :: ByteString -> Maybe ImageSize +exifSize = runGet (Just <$> exifHeader) . BL.fromChunks . (:[]) +-- NOTE: It would be nicer to do +-- runGet ((Just <$> exifHeader) <|> return Nothing) +-- which would prevent pandoc from raising an error when an exif header can't +-- be parsed. But we only get an Alternative instance for Get in binary 0.6, +-- and binary 0.5 ships with ghc 7.6. + +exifHeader :: Get ImageSize +exifHeader = do + _app1DataSize <- getWord16be + exifHdr <- getWord32be + unless (exifHdr == 0x45786966) $ fail "Did not find exif header" + zeros <- getWord16be + unless (zeros == 0) $ fail "Expected zeros after exif header" + -- beginning of tiff header -- we read whole thing to use + -- in getting data from offsets: + tiffHeader <- lookAhead getRemainingLazyByteString + byteAlign <- getWord16be + let bigEndian = byteAlign == 0x4d4d + let (getWord16, getWord32, getWord64) = + if bigEndian + then (getWord16be, getWord32be, getWord64be) + else (getWord16le, getWord32le, getWord64le) + let getRational = do + num <- getWord32 + den <- getWord32 + return $ fromIntegral num / fromIntegral den + tagmark <- getWord16 + unless (tagmark == 0x002a) $ fail "Failed alignment sanity check" + ifdOffset <- getWord32 + skip (fromIntegral ifdOffset - 8) -- skip to IDF + numentries <- getWord16 + let ifdEntry = do + tag <- getWord16 >>= \t -> + maybe (fail $ "Unknown tag type " ++ show t) return + (M.lookup t tagTypeTable) + dataFormat <- getWord16 + numComponents <- getWord32 + (fmt, bytesPerComponent) <- + case dataFormat of + 1 -> return (UnsignedByte . runGet getWord8, 1) + 2 -> return (AsciiString, 1) + 3 -> return (UnsignedShort . runGet getWord16, 2) + 4 -> return (UnsignedLong . runGet getWord32, 4) + 5 -> return (UnsignedRational . runGet getRational, 8) + 6 -> return (SignedByte . runGet getWord8, 1) + 7 -> return (Undefined . runGet getWord8, 1) + 8 -> return (SignedShort . runGet getWord16, 2) + 9 -> return (SignedLong . runGet getWord32, 4) + 10 -> return (SignedRational . runGet getRational, 8) + 11 -> return (SingleFloat . runGet getWord32 {- TODO -}, 4) + 12 -> return (DoubleFloat . runGet getWord64 {- TODO -}, 8) + _ -> fail $ "Unknown data format " ++ show dataFormat + let totalBytes = fromIntegral $ numComponents * bytesPerComponent + payload <- if totalBytes <= 4 -- data is right here + then fmt <$> + (getLazyByteString (fromIntegral totalBytes) <* + skip (4 - totalBytes)) + else do -- get data from offset + offs <- getWord32 + return $ fmt $ BL.take (fromIntegral totalBytes) $ + BL.drop (fromIntegral offs) tiffHeader + return (tag, payload) + entries <- sequence $ replicate (fromIntegral numentries) ifdEntry + subentries <- case lookup ExifOffset entries of + Just (UnsignedLong offset) -> do + pos <- bytesRead + skip (fromIntegral offset - (fromIntegral pos - 8)) + numsubentries <- getWord16 + sequence $ + replicate (fromIntegral numsubentries) ifdEntry + _ -> return [] + let allentries = entries ++ subentries + (width, height) <- case (lookup ExifImageWidth allentries, + lookup ExifImageHeight allentries) of + (Just (UnsignedLong w), Just (UnsignedLong h)) -> + return (fromIntegral w, fromIntegral h) + _ -> fail "Could not determine image width, height" + let resfactor = case lookup ResolutionUnit allentries of + Just (UnsignedShort 1) -> (100 / 254) + _ -> 1 + let xres = maybe 72 (\(UnsignedRational x) -> floor $ x * resfactor) + $ lookup XResolution allentries + let yres = maybe 72 (\(UnsignedRational x) -> floor $ x * resfactor) + $ lookup YResolution allentries + return $ ImageSize{ + pxX = width + , pxY = height + , dpiX = xres + , dpiY = yres } + +data DataFormat = UnsignedByte Word8 + | AsciiString BL.ByteString + | UnsignedShort Word16 + | UnsignedLong Word32 + | UnsignedRational Rational + | SignedByte Word8 + | Undefined Word8 + | SignedShort Word16 + | SignedLong Word32 + | SignedRational Rational + | SingleFloat Word32 + | DoubleFloat Word64 + deriving (Show) + +data TagType = ImageDescription + | Make + | Model + | Orientation + | XResolution + | YResolution + | ResolutionUnit + | Software + | DateTime + | WhitePoint + | PrimaryChromaticities + | YCbCrCoefficients + | YCbCrPositioning + | ReferenceBlackWhite + | Copyright + | ExifOffset + | ExposureTime + | FNumber + | ExposureProgram + | ISOSpeedRatings + | ExifVersion + | DateTimeOriginal + | DateTimeDigitized + | ComponentConfiguration + | CompressedBitsPerPixel + | ShutterSpeedValue + | ApertureValue + | BrightnessValue + | ExposureBiasValue + | MaxApertureValue + | SubjectDistance + | MeteringMode + | LightSource + | Flash + | FocalLength + | MakerNote + | UserComment + | FlashPixVersion + | ColorSpace + | ExifImageWidth + | ExifImageHeight + | RelatedSoundFile + | ExifInteroperabilityOffset + | FocalPlaneXResolution + | FocalPlaneYResolution + | FocalPlaneResolutionUnit + | SensingMethod + | FileSource + | SceneType + deriving (Show, Eq, Ord) +tagTypeTable :: M.Map Word16 TagType +tagTypeTable = M.fromList + [ (0x010e, ImageDescription) + , (0x010f, Make) + , (0x0110, Model) + , (0x0112, Orientation) + , (0x011a, XResolution) + , (0x011b, YResolution) + , (0x0128, ResolutionUnit) + , (0x0131, Software) + , (0x0132, DateTime) + , (0x013e, WhitePoint) + , (0x013f, PrimaryChromaticities) + , (0x0211, YCbCrCoefficients) + , (0x0213, YCbCrPositioning) + , (0x0214, ReferenceBlackWhite) + , (0x8298, Copyright) + , (0x8769, ExifOffset) + , (0x829a, ExposureTime) + , (0x829d, FNumber) + , (0x8822, ExposureProgram) + , (0x8827, ISOSpeedRatings) + , (0x9000, ExifVersion) + , (0x9003, DateTimeOriginal) + , (0x9004, DateTimeDigitized) + , (0x9101, ComponentConfiguration) + , (0x9102, CompressedBitsPerPixel) + , (0x9201, ShutterSpeedValue) + , (0x9202, ApertureValue) + , (0x9203, BrightnessValue) + , (0x9204, ExposureBiasValue) + , (0x9205, MaxApertureValue) + , (0x9206, SubjectDistance) + , (0x9207, MeteringMode) + , (0x9208, LightSource) + , (0x9209, Flash) + , (0x920a, FocalLength) + , (0x927c, MakerNote) + , (0x9286, UserComment) + , (0xa000, FlashPixVersion) + , (0xa001, ColorSpace) + , (0xa002, ExifImageWidth) + , (0xa003, ExifImageHeight) + , (0xa004, RelatedSoundFile) + , (0xa005, ExifInteroperabilityOffset) + , (0xa20e, FocalPlaneXResolution) + , (0xa20f, FocalPlaneYResolution) + , (0xa210, FocalPlaneResolutionUnit) + , (0xa217, SensingMethod) + , (0xa300, FileSource) + , (0xa301, SceneType) + ] diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs index d9cb94a33..44989ee94 100644 --- a/src/Text/Pandoc/MIME.hs +++ b/src/Text/Pandoc/MIME.hs @@ -40,7 +40,8 @@ getMimeType f = M.lookup (map toLower $ drop 1 $ takeExtension f) mimeTypes where mimeTypes = M.fromList mimeTypesList extensionFromMimeType :: String -> Maybe String -extensionFromMimeType mimetype = M.lookup mimetype reverseMimeTypes +extensionFromMimeType mimetype = M.lookup (takeWhile (/=';') mimetype) reverseMimeTypes + -- note: we just look up the basic mime type, dropping the content-encoding etc. where reverseMimeTypes = M.fromList $ map (\(k,v) -> (v,k)) mimeTypesList mimeTypesList :: [(String, String)] @@ -146,6 +147,7 @@ mimeTypesList = -- List borrowed from happstack-server. ,("dxr","application/x-director") ,("emb","chemical/x-embl-dl-nucleotide") ,("embl","chemical/x-embl-dl-nucleotide") + ,("emf","image/x-emf") ,("eml","message/rfc822") ,("ent","chemical/x-ncbi-asn1-ascii") ,("eot","application/vnd.ms-fontobject") @@ -219,6 +221,7 @@ mimeTypesList = -- List borrowed from happstack-server. ,("jnlp","application/x-java-jnlp-file") ,("jpe","image/jpeg") ,("jpeg","image/jpeg") + ,("jfif","image/jpeg") ,("jpg","image/jpeg") ,("js","application/x-javascript") ,("kar","audio/midi") @@ -463,6 +466,7 @@ mimeTypesList = -- List borrowed from happstack-server. ,("wm","video/x-ms-wm") ,("wma","audio/x-ms-wma") ,("wmd","application/x-ms-wmd") + ,("wmf","image/x-wmf") ,("wml","text/vnd.wap.wml") ,("wmlc","application/vnd.wap.wmlc") ,("wmls","text/vnd.wap.wmlscript") diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 48e418ab2..5f65abdde 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -80,6 +80,7 @@ data Extension = | Ext_link_attributes -- ^ MMD style reference link attributes | Ext_autolink_bare_uris -- ^ Make all absolute URIs into links | Ext_fancy_lists -- ^ Enable fancy list numbers and delimiters + | Ext_lists_without_preceding_blankline -- ^ Allow lists without preceding blank | Ext_startnum -- ^ Make start number of ordered list significant | Ext_definition_lists -- ^ Definition lists as in pandoc, mmd, php | Ext_example_lists -- ^ Markdown-style numbered examples @@ -169,6 +170,7 @@ githubMarkdownExtensions = Set.fromList , Ext_intraword_underscores , Ext_strikeout , Ext_hard_line_breaks + , Ext_lists_without_preceding_blankline ] multimarkdownExtensions :: Set Extension diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index a445e2991..360338f8f 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -41,6 +41,7 @@ import System.Directory import System.Environment import Control.Monad (unless) import Data.List (isInfixOf) +import Data.Maybe (fromMaybe) import qualified Data.ByteString.Base64 as B64 import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Definition @@ -87,7 +88,7 @@ handleImage' baseURL tmpdir (Image ils (src,tit)) = do res <- fetchItem baseURL src case res of Right (contents, Just mime) -> do - let ext = maybe (takeExtension src) id $ + let ext = fromMaybe (takeExtension src) $ extensionFromMimeType mime let basename = UTF8.toString $ B64.encode $ UTF8.fromString src let fname = tmpdir </> basename <.> ext @@ -107,7 +108,7 @@ tex2pdf' tmpDir program source = do then 3 -- to get page numbers else 2 -- 1 run won't give you PDF bookmarks (exit, log', mbPdf) <- runTeXProgram program numruns tmpDir source - let msg = "Error producing PDF from TeX source." + let msg = "Error producing PDF from TeX source.\n" case (exit, mbPdf) of (ExitFailure _, _) -> do let logmsg = extractMsg log' @@ -116,7 +117,7 @@ tex2pdf' tmpDir program source = do x | "! Package inputenc Error" `BC.isPrefixOf` x -> "\nTry running pandoc with --latex-engine=xelatex." _ -> "" - return $ Left $ msg <> "\n" <> extractMsg log' <> extramsg + return $ Left $ msg <> logmsg <> extramsg (ExitSuccess, Nothing) -> return $ Left msg (ExitSuccess, Just pdf) -> return $ Right pdf diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 701b2ef84..2f21e1253 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -1,4 +1,5 @@ -{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE GeneralizedNewtypeDeriving, TypeSynonymInstances, + FlexibleInstances#-} {- Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> @@ -47,6 +48,8 @@ module Text.Pandoc.Parsing ( (>>~), romanNumeral, emailAddress, uri, + mathInline, + mathDisplay, withHorizDisplacement, withRaw, escaped, @@ -65,6 +68,9 @@ module Text.Pandoc.Parsing ( (>>~), guardEnabled, guardDisabled, ParserState (..), + HasReaderOptions (..), + HasHeaderMap (..), + HasIdentifierList (..), defaultParserState, HeaderType (..), ParserContext (..), @@ -265,7 +271,7 @@ spaceChar = satisfy $ \c -> c == ' ' || c == '\t' -- | Parses a nonspace, nonnewline character. nonspaceChar :: Parser [Char] st Char -nonspaceChar = satisfy $ \x -> x /= '\t' && x /= '\n' && x /= ' ' && x /= '\r' +nonspaceChar = satisfy $ flip notElem ['\t', '\n', ' ', '\r'] -- | Skips zero or more spaces or tabs. skipSpaces :: Parser [Char] st () @@ -451,6 +457,39 @@ uri = try $ do let uri' = scheme ++ ":" ++ fromEntities str' return (uri', escapeURI uri') +mathInlineWith :: String -> String -> Parser [Char] st String +mathInlineWith op cl = try $ do + string op + notFollowedBy space + words' <- many1Till (count 1 (noneOf "\n\\") + <|> (char '\\' >> anyChar >>= \c -> return ['\\',c]) + <|> count 1 newline <* notFollowedBy' blankline + *> return " ") + (try $ string cl) + notFollowedBy digit -- to prevent capture of $5 + return $ concat words' + +mathDisplayWith :: String -> String -> Parser [Char] st String +mathDisplayWith op cl = try $ do + string op + many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string cl) + +mathDisplay :: Parser [Char] ParserState String +mathDisplay = + (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$") + <|> (guardEnabled Ext_tex_math_single_backslash >> + mathDisplayWith "\\[" "\\]") + <|> (guardEnabled Ext_tex_math_double_backslash >> + mathDisplayWith "\\\\[" "\\\\]") + +mathInline :: Parser [Char] ParserState String +mathInline = + (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$") + <|> (guardEnabled Ext_tex_math_single_backslash >> + mathInlineWith "\\(" "\\)") + <|> (guardEnabled Ext_tex_math_double_backslash >> + mathInlineWith "\\\\(" "\\\\)") + -- | Applies a parser, returns tuple of its results and its horizontal -- displacement (the difference between the source column at the end -- and the source column at the beginning). Vertical displacement @@ -826,6 +865,34 @@ instance HasMeta ParserState where deleteMeta field st = st{ stateMeta = deleteMeta field $ stateMeta st } +class Monad m => HasReaderOptions m where + askReaderOption :: (ReaderOptions -> b) -> m b + +class Monad m => HasHeaderMap m where + getHeaderMap :: m (M.Map Inlines String) + putHeaderMap :: M.Map Inlines String -> m () + modifyHeaderMap :: (M.Map Inlines String -> M.Map Inlines String) -> m () + -- default + modifyHeaderMap f = getHeaderMap >>= putHeaderMap . f + +class Monad m => HasIdentifierList m where + getIdentifierList :: m [String] + putIdentifierList :: [String] -> m () + modifyIdentifierList :: ([String] -> [String]) -> m () + -- default + modifyIdentifierList f = getIdentifierList >>= putIdentifierList . f + +instance HasReaderOptions (Parser s ParserState) where + askReaderOption = getOption + +instance HasHeaderMap (Parser s ParserState) where + getHeaderMap = fmap stateHeaders getState + putHeaderMap hm = updateState $ \st -> st{ stateHeaders = hm } + +instance HasIdentifierList (Parser s ParserState) where + getIdentifierList = fmap stateIdentifiers getState + putIdentifierList l = updateState $ \st -> st{ stateIdentifiers = l } + defaultParserState :: ParserState defaultParserState = ParserState { stateOptions = def, @@ -895,10 +962,11 @@ type SubstTable = M.Map Key Inlines -- and the auto_identifers extension is set, generate a new -- unique identifier, and update the list of identifiers -- in state. -registerHeader :: Attr -> Inlines -> Parser s ParserState Attr +registerHeader :: (HasReaderOptions m, HasHeaderMap m, HasIdentifierList m) + => Attr -> Inlines -> m Attr registerHeader (ident,classes,kvs) header' = do - ids <- stateIdentifiers `fmap` getState - exts <- getOption readerExtensions + ids <- getIdentifierList + exts <- askReaderOption readerExtensions let insert' = M.insertWith (\_new old -> old) if null ident && Ext_auto_identifiers `Set.member` exts then do @@ -906,16 +974,13 @@ registerHeader (ident,classes,kvs) header' = do let id'' = if Ext_ascii_identifiers `Set.member` exts then catMaybes $ map toAsciiChar id' else id' - updateState $ \st -> st{ - stateIdentifiers = if id' == id'' - then id' : ids - else id' : id'' : ids, - stateHeaders = insert' header' id' $ stateHeaders st } + putIdentifierList $ if id' == id'' + then id' : ids + else id' : id'' : ids + modifyHeaderMap $ insert' header' id' return (id'',classes,kvs) else do - unless (null ident) $ - updateState $ \st -> st{ - stateHeaders = insert' header' ident $ stateHeaders st } + unless (null ident) $ modifyHeaderMap $ insert' header' ident return (ident,classes,kvs) -- | Fail unless we're in "smart typography" mode. @@ -997,7 +1062,7 @@ doubleQuoteStart :: Parser [Char] ParserState () doubleQuoteStart = do failIfInQuoteContext InDoubleQuote try $ do charOrRef "\"\8220\147" - notFollowedBy (satisfy (\c -> c == ' ' || c == '\t' || c == '\n')) + notFollowedBy . satisfy $ flip elem [' ', '\t', '\n'] doubleQuoteEnd :: Parser [Char] st () doubleQuoteEnd = do diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs index faf2a6797..033511832 100644 --- a/src/Text/Pandoc/Pretty.hs +++ b/src/Text/Pandoc/Pretty.hs @@ -60,6 +60,7 @@ module Text.Pandoc.Pretty ( , hsep , vcat , vsep + , nestle , chomp , inside , braces @@ -72,7 +73,7 @@ module Text.Pandoc.Pretty ( ) where -import Data.Sequence (Seq, fromList, (<|), singleton, mapWithIndex) +import Data.Sequence (Seq, fromList, (<|), singleton, mapWithIndex, viewl, ViewL(..)) import Data.Foldable (toList) import Data.List (intercalate) import Data.Monoid @@ -186,6 +187,14 @@ vcat = foldr ($$) empty vsep :: [Doc] -> Doc vsep = foldr ($+$) empty +-- | Removes leading blank lines from a 'Doc'. +nestle :: Doc -> Doc +nestle (Doc d) = Doc $ go d + where go x = case viewl x of + (BlankLine :< rest) -> go rest + (NewLine :< rest) -> go rest + _ -> x + -- | Chomps trailing blank space off of a 'Doc'. chomp :: Doc -> Doc chomp d = Doc (fromList dl') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 6a799e270..56cb16b20 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -1,5 +1,6 @@ module Text.Pandoc.Readers.DocBook ( readDocBook ) where -import Data.Char (toUpper, isDigit) +import Data.Char (toUpper) +import Text.Pandoc.Shared (safeRead) import Text.Pandoc.Options import Text.Pandoc.Definition import Text.Pandoc.Builder @@ -11,6 +12,7 @@ import Data.Char (isSpace) import Control.Monad.State import Control.Applicative ((<$>)) import Data.List (intersperse) +import Data.Maybe (fromMaybe) {- @@ -682,10 +684,9 @@ parseBlock (Elem e) = "lowerroman" -> LowerRoman "upperroman" -> UpperRoman _ -> Decimal - let start = case attrValue "override" <$> - filterElement (named "listitem") e of - Just x@(_:_) | all isDigit x -> read x - _ -> 1 + let start = fromMaybe 1 $ + (attrValue "override" <$> filterElement (named "listitem") e) + >>= safeRead orderedListWith (start,listStyle,DefaultDelim) <$> listitems "variablelist" -> definitionList <$> deflistitems @@ -779,7 +780,7 @@ parseBlock (Elem e) = caption <- case filterChild isCaption e of Just t -> getInlines t Nothing -> return mempty - let e' = maybe e id $ filterChild (named "tgroup") e + let e' = fromMaybe e $ filterChild (named "tgroup") e let isColspec x = named "colspec" x || named "col" x let colspecs = case filterChild (named "colgroup") e' of Just c -> filterChildren isColspec c @@ -801,11 +802,14 @@ parseBlock (Elem e) = Just "center" -> AlignCenter _ -> AlignDefault let toWidth c = case findAttr (unqual "colwidth") c of - Just w -> read $ filter (\x -> + Just w -> fromMaybe 0 + $ safeRead $ '0': filter (\x -> (x >= '0' && x <= '9') || x == '.') w Nothing -> 0 :: Double - let numrows = maximum $ map length bodyrows + let numrows = case bodyrows of + [] -> 0 + xs -> maximum $ map length xs let aligns = case colspecs of [] -> replicate numrows AlignDefault cs -> map toAlignment cs diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 7ca554fa3..506fe7770 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -76,9 +76,18 @@ pBody :: TagParser [Block] pBody = pInTags "body" block pHead :: TagParser [Block] -pHead = pInTags "head" $ pTitle <|> ([] <$ pAnyTag) +pHead = pInTags "head" $ pTitle <|> pMetaTag <|> ([] <$ pAnyTag) where pTitle = pInTags "title" inline >>= setTitle . normalizeSpaces setTitle t = [] <$ (updateState $ B.setMeta "title" (B.fromList t)) + pMetaTag = do + mt <- pSatisfy (~== TagOpen "meta" []) + let name = fromAttrib "name" mt + if null name + then return [] + else do + let content = fromAttrib "content" mt + updateState $ B.setMeta name (B.text content) + return [] block :: TagParser [Block] block = choice @@ -92,6 +101,7 @@ block = choice , pHead , pBody , pPlain + , pDiv , pRawHtmlBlock ] @@ -177,6 +187,13 @@ pRawTag = do then return [] else return $ renderTags' [tag] +pDiv :: TagParser [Block] +pDiv = try $ do + getOption readerParseRaw >>= guard + TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="div") (const True) + contents <- pInTags "div" block + return [Div (mkAttr attr) contents] + pRawHtmlBlock :: TagParser [Block] pRawHtmlBlock = do raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag @@ -199,7 +216,7 @@ pHeader = try $ do let bodyTitle = TagOpen tagtype attr ~== TagOpen "h1" [("class","title")] let level = read (drop 1 tagtype) contents <- liftM concat $ manyTill inline (pCloses tagtype <|> eof) - let ident = maybe "" id $ lookup "id" attr + let ident = fromMaybe "" $ lookup "id" attr let classes = maybe [] words $ lookup "class" attr let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"] return $ if bodyTitle @@ -249,7 +266,7 @@ pCol = try $ do skipMany pBlank return $ case lookup "width" attribs of Just x | not (null x) && last x == '%' -> - maybe 0.0 id $ safeRead ('0':'.':init x) + fromMaybe 0.0 $ safeRead ('0':'.':init x) _ -> 0.0 pColgroup :: TagParser [Double] @@ -295,11 +312,7 @@ pCodeBlock = try $ do let result = case reverse result' of '\n':_ -> init result' _ -> result' - let attribsId = fromMaybe "" $ lookup "id" attr - let attribsClasses = words $ fromMaybe "" $ lookup "class" attr - let attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr - let attribs = (attribsId, attribsClasses, attribsKV) - return [CodeBlock attribs result] + return [CodeBlock (mkAttr attr) result] inline :: TagParser [Inline] inline = choice @@ -314,6 +327,7 @@ inline = choice , pLink , pImage , pCode + , pSpan , pRawHtmlInline ] @@ -397,11 +411,14 @@ pCode :: TagParser [Inline] pCode = try $ do (TagOpen open attr) <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True) result <- manyTill pAnyTag (pCloses open) - let ident = fromMaybe "" $ lookup "id" attr - let classes = words $ fromMaybe [] $ lookup "class" attr - let rest = filter (\(x,_) -> x /= "id" && x /= "class") attr - return [Code (ident,classes,rest) - $ intercalate " " $ lines $ innerText result] + return [Code (mkAttr attr) $ intercalate " " $ lines $ innerText result] + +pSpan :: TagParser [Inline] +pSpan = try $ do + getOption readerParseRaw >>= guard + TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True) + contents <- pInTags "span" inline + return [Span (mkAttr attr) contents] pRawHtmlInline :: TagParser [Inline] pRawHtmlInline = do @@ -459,7 +476,13 @@ pBlank = try $ do pTagContents :: Parser [Char] ParserState Inline pTagContents = - pStr <|> pSpace <|> smartPunctuation pTagContents <|> pSymbol <|> pBad + Math InlineMath `fmap` mathInline + <|> Math DisplayMath `fmap` mathDisplay + <|> pStr + <|> pSpace + <|> smartPunctuation pTagContents + <|> pSymbol + <|> pBad pStr :: Parser [Char] ParserState Inline pStr = do @@ -474,6 +497,7 @@ isSpecial '"' = True isSpecial '\'' = True isSpecial '.' = True isSpecial '-' = True +isSpecial '$' = True isSpecial '\8216' = True isSpecial '\8217' = True isSpecial '\8220' = True @@ -549,7 +573,7 @@ blockHtmlTags = ["address", "article", "aside", "blockquote", "body", "button", "noframes", "noscript", "object", "ol", "output", "p", "pre", "progress", "section", "table", "tbody", "textarea", "thead", "tfoot", "ul", "dd", "dt", "frameset", "li", "tbody", "td", "tfoot", - "th", "thead", "tr", "script", "style", "video"] + "th", "thead", "tr", "script", "style", "svg", "video"] -- We want to allow raw docbook in markdown documents, so we -- include docbook block tags here too. @@ -648,3 +672,10 @@ htmlTag f = try $ do _ -> do rendered <- manyTill anyChar (char '>') return (next, rendered ++ ">") + +mkAttr :: [(String, String)] -> Attr +mkAttr attr = (attribsId, attribsClasses, attribsKV) + where attribsId = fromMaybe "" $ lookup "id" attr + attribsClasses = words $ fromMaybe "" $ lookup "class" attr + attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr + diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index ff5b73348..51271edc5 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -38,12 +38,13 @@ import Text.Pandoc.Definition import Text.Pandoc.Walk import Text.Pandoc.Shared import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding ((<|>), many, optional, space) +import Text.Pandoc.Parsing hiding ((<|>), many, optional, space, + mathDisplay, mathInline) import qualified Text.Pandoc.UTF8 as UTF8 import Data.Char ( chr, ord ) import Control.Monad import Text.Pandoc.Builder -import Data.Char (isLetter) +import Data.Char (isLetter, isAlphaNum) import Control.Applicative import Data.Monoid import Data.Maybe (fromMaybe) @@ -163,28 +164,40 @@ mathChars = concat <$> <|> (\c -> ['\\',c]) <$> (try $ char '\\' *> anyChar) ) +quoted' :: (Inlines -> Inlines) -> LP String -> LP () -> LP Inlines +quoted' f starter ender = do + startchs <- starter + try ((f . mconcat) <$> manyTill inline ender) <|> lit startchs + double_quote :: LP Inlines -double_quote = (doubleQuoted . mconcat) <$> - (try $ string "``" *> manyTill inline (try $ string "''")) +double_quote = + ( quoted' doubleQuoted (try $ string "``") (void $ try $ string "''") + <|> quoted' doubleQuoted (string "“") (void $ char '”') + -- the following is used by babel for localized quotes: + <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'") + <|> quoted' doubleQuoted (string "\"") (void $ char '"') + ) single_quote :: LP Inlines -single_quote = (singleQuoted . mconcat) <$> - (try $ char '`' *> manyTill inline (try $ char '\'' >> notFollowedBy letter)) +single_quote = + ( quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter) + <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter) + ) inline :: LP Inlines inline = (mempty <$ comment) <|> (space <$ sp) <|> inlineText <|> inlineCommand - <|> grouped inline + <|> inlineGroup <|> (char '-' *> option (str "-") ((char '-') *> option (str "–") (str "—" <$ char '-'))) <|> double_quote <|> single_quote - <|> (str "“" <$ try (string "``")) -- nb. {``} won't be caught by double_quote <|> (str "”" <$ try (string "''")) - <|> (str "‘" <$ char '`') -- nb. {`} won't be caught by single_quote + <|> (str "”" <$ char '”') <|> (str "’" <$ char '\'') + <|> (str "’" <$ char '’') <|> (str "\160" <$ char '~') <|> (mathDisplay $ string "$$" *> mathChars <* string "$$") <|> (mathInline $ char '$' *> mathChars <* char '$') @@ -199,6 +212,15 @@ inline = (mempty <$ comment) inlines :: LP Inlines inlines = mconcat <$> many (notFollowedBy (char '}') *> inline) +inlineGroup :: LP Inlines +inlineGroup = do + ils <- grouped inline + if isNull ils + then return mempty + else return $ spanWith nullAttr ils + -- we need the span so we can detitlecase bibtex entries; + -- we need to know when something is {C}apitalized + block :: LP Blocks block = (mempty <$ comment) <|> (mempty <$ ((spaceChar <|> newline) *> spaces)) @@ -364,6 +386,7 @@ inlineCommands = M.fromList $ , ("backslash", lit "\\") , ("slash", lit "/") , ("textbf", strong <$> tok) + , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok) , ("ldots", lit "…") , ("dots", lit "…") , ("mdots", lit "…") @@ -434,6 +457,7 @@ inlineCommands = M.fromList $ , ("footnote", (note . mconcat) <$> (char '{' *> manyTill block (char '}'))) , ("verb", doverb) , ("lstinline", doverb) + , ("Verb", doverb) , ("texttt", (code . stringify . toList) <$> tok) , ("url", (unescapeURL <$> braced) >>= \url -> pure (link url "" (str url))) @@ -518,9 +542,7 @@ inNote ils = unescapeURL :: String -> String unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs - where isEscapable '%' = True - isEscapable '#' = True - isEscapable _ = False + where isEscapable c = c `elem` "#$%&~_^\\{}" unescapeURL (x:xs) = x:unescapeURL xs unescapeURL [] = "" @@ -747,7 +769,7 @@ inlineText :: LP Inlines inlineText = str <$> many1 inlineChar inlineChar :: LP Char -inlineChar = noneOf "\\$%^_&~#{}^'`-[] \t\n" +inlineChar = noneOf "\\$%^_&~#{}^'`\"‘’“”-[] \t\n" environment :: LP Blocks environment = do @@ -852,9 +874,8 @@ verbatimEnv = do (_,r) <- withRaw $ do controlSeq "begin" name <- braced - guard $ name == "verbatim" || name == "Verbatim" || - name == "lstlisting" || name == "minted" || - name == "alltt" + guard $ name `elem` ["verbatim", "Verbatim", "lstlisting", + "minted", "alltt"] verbEnv name rest <- getInput return (r,rest) @@ -1030,14 +1051,14 @@ paragraph = do preamble :: LP Blocks preamble = mempty <$> manyTill preambleBlock beginDoc where beginDoc = lookAhead $ controlSeq "begin" *> string "{document}" - preambleBlock = (mempty <$ comment) - <|> (mempty <$ sp) - <|> (mempty <$ blanklines) - <|> (mempty <$ macro) - <|> blockCommand - <|> (mempty <$ anyControlSeq) - <|> (mempty <$ braced) - <|> (mempty <$ anyChar) + preambleBlock = (void comment) + <|> (void sp) + <|> (void blanklines) + <|> (void macro) + <|> (void blockCommand) + <|> (void anyControlSeq) + <|> (void braced) + <|> (void anyChar) ------- @@ -1058,6 +1079,7 @@ simpleCiteArgs = try $ do first <- optionMaybe $ toList <$> opt second <- optionMaybe $ toList <$> opt char '{' + optional sp keys <- manyTill citationLabel (char '}') let (pre, suf) = case (first , second ) of (Just s , Nothing) -> (mempty, s ) @@ -1073,18 +1095,24 @@ simpleCiteArgs = try $ do return $ addPrefix pre $ addSuffix suf $ map conv keys citationLabel :: LP String -citationLabel = trim <$> - (many1 (satisfy $ \c -> c /=',' && c /='}') <* optional (char ',') <* optional sp) +citationLabel = optional sp *> + (many1 (satisfy isBibtexKeyChar) + <* optional sp + <* optional (char ',') + <* optional sp) + where isBibtexKeyChar c = isAlphaNum c || c `elem` ".:;?!`'()/*@_+=-[]*" cites :: CitationMode -> Bool -> LP [Citation] cites mode multi = try $ do cits <- if multi then many1 simpleCiteArgs else count 1 simpleCiteArgs - let (c:cs) = concat cits + let cs = concat cits return $ case mode of - AuthorInText -> c {citationMode = mode} : cs - _ -> map (\a -> a {citationMode = mode}) (c:cs) + AuthorInText -> case cs of + (c:rest) -> c {citationMode = mode} : rest + [] -> [] + _ -> map (\a -> a {citationMode = mode}) cs citation :: String -> CitationMode -> Bool -> LP Inlines citation name mode multi = do diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 9b98cbc3e..3feafd362 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -215,10 +215,10 @@ pandocTitleBlock = try $ do author' <- author date' <- date return $ - ( if B.isNull title' then id else B.setMeta "title" title' - . if null author' then id else B.setMeta "author" author' - . if B.isNull date' then id else B.setMeta "date" date' ) - nullMeta + (if B.isNull title' then id else B.setMeta "title" title') + . (if null author' then id else B.setMeta "author" author') + . (if B.isNull date' then id else B.setMeta "date" date') + $ nullMeta updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' } yamlMetaBlock :: MarkdownParser (F Blocks) @@ -227,6 +227,7 @@ yamlMetaBlock = try $ do pos <- getPosition string "---" blankline + notFollowedBy blankline -- if --- is followed by a blank it's an HRULE rawYamlLines <- manyTill anyLine stopLine -- by including --- and ..., we allow yaml blocks with just comments: let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."])) @@ -443,6 +444,9 @@ block = choice [ mempty <$ blanklines , codeBlockFenced , yamlMetaBlock , guardEnabled Ext_latex_macros *> (macro >>= return . return) + -- note: bulletList needs to be before header because of + -- the possibility of empty list items: - + , bulletList , header , lhsCodeBlock , rawTeXBlock @@ -453,7 +457,6 @@ block = choice [ mempty <$ blanklines , codeBlockIndented , blockQuote , hrule - , bulletList , orderedList , definitionList , noteBlock @@ -698,7 +701,7 @@ bulletListStart = try $ do skipNonindentSpaces notFollowedBy' (() <$ hrule) -- because hrules start out just like lists satisfy isBulletListMarker - spaceChar + spaceChar <|> lookAhead newline skipSpaces anyOrderedListStart :: MarkdownParser (Int, ListNumberStyle, ListNumberDelim) @@ -726,11 +729,15 @@ listStart = bulletListStart <|> (anyOrderedListStart >> return ()) -- parse a line of a list item (start = parser for beginning of list item) listLine :: MarkdownParser String listLine = try $ do - notFollowedBy blankline notFollowedBy' (do indentSpaces - many (spaceChar) + many spaceChar listStart) - chunks <- manyTill (liftM snd (htmlTag isCommentTag) <|> count 1 anyChar) newline + notFollowedBy' $ htmlTag (~== TagClose "div") + chunks <- manyTill + ( many1 (satisfy $ \c -> c /= '\n' && c /= '<') + <|> liftM snd (htmlTag isCommentTag) + <|> count 1 anyChar + ) newline return $ concat chunks -- parse raw text for one list item, excluding start marker and continuations @@ -739,7 +746,7 @@ rawListItem :: MarkdownParser a rawListItem start = try $ do start first <- listLine - rest <- many (notFollowedBy listStart >> listLine) + rest <- many (notFollowedBy listStart >> notFollowedBy blankline >> listLine) blanks <- many blankline return $ unlines (first:rest) ++ blanks @@ -757,6 +764,7 @@ listContinuationLine :: MarkdownParser String listContinuationLine = try $ do notFollowedBy blankline notFollowedBy' listStart + notFollowedBy' $ htmlTag (~== TagClose "div") optional indentSpaces result <- anyLine return $ result ++ "\n" @@ -781,8 +789,8 @@ listItem start = try $ do orderedList :: MarkdownParser (F Blocks) orderedList = try $ do (start, style, delim) <- lookAhead anyOrderedListStart - unless ((style == DefaultStyle || style == Decimal || style == Example) && - (delim == DefaultDelim || delim == Period)) $ + unless (style `elem` [DefaultStyle, Decimal, Example] && + delim `elem` [DefaultDelim, Period]) $ guardEnabled Ext_fancy_lists when (style == Example) $ guardEnabled Ext_example_lists items <- fmap sequence $ many1 $ listItem @@ -871,8 +879,11 @@ para = try $ do $ try $ do newline (blanklines >> return mempty) - <|> (guardDisabled Ext_blank_before_blockquote >> lookAhead blockQuote) - <|> (guardDisabled Ext_blank_before_header >> lookAhead header) + <|> (guardDisabled Ext_blank_before_blockquote >> () <$ lookAhead blockQuote) + <|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced) + <|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header) + <|> (guardEnabled Ext_lists_without_preceding_blankline >> + () <$ lookAhead listStart) return $ do result' <- result case B.toList result' of @@ -891,7 +902,9 @@ plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline -- htmlElement :: MarkdownParser String -htmlElement = strictHtmlBlock <|> liftM snd (htmlTag isBlockTag) +htmlElement = rawVerbatimBlock + <|> strictHtmlBlock + <|> liftM snd (htmlTag isBlockTag) htmlBlock :: MarkdownParser (F Blocks) htmlBlock = do @@ -912,8 +925,8 @@ strictHtmlBlock = htmlInBalanced (not . isInlineTag) rawVerbatimBlock :: MarkdownParser String rawVerbatimBlock = try $ do - (TagOpen tag _, open) <- htmlTag (tagOpen (\t -> - t == "pre" || t == "style" || t == "script") + (TagOpen tag _, open) <- htmlTag (tagOpen (flip elem + ["pre", "style", "script"]) (const True)) contents <- manyTill anyChar (htmlTag (~== TagClose tag)) return $ open ++ contents ++ renderTags [TagClose tag] @@ -1113,12 +1126,12 @@ multilineTableHeader headless = try $ do then liftM (map (:[]) . tail . splitStringByIndices (init indices)) $ lookAhead anyLine else return $ transpose $ map - (\ln -> tail $ splitStringByIndices (init indices) ln) + (tail . splitStringByIndices (init indices)) rawContent let aligns = zipWith alignType rawHeadsList lengths let rawHeads = if headless then replicate (length dashes) "" - else map unwords rawHeadsList + else map (unlines . map trim) rawHeadsList heads <- fmap sequence $ mapM (parseFromString (mconcat <$> many plain)) $ map trim rawHeads @@ -1175,7 +1188,7 @@ gridTableHeader headless = try $ do -- RST does not have a notion of alignments let rawHeads = if headless then replicate (length dashes) "" - else map unwords $ transpose + else map (unlines . map trim) $ transpose $ map (gridTableSplitLine indices) rawContent heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads return (heads, aligns, indices) @@ -1401,39 +1414,6 @@ math :: MarkdownParser (F Inlines) math = (return . B.displayMath <$> (mathDisplay >>= applyMacros')) <|> (return . B.math <$> (mathInline >>= applyMacros')) -mathDisplay :: MarkdownParser String -mathDisplay = - (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$") - <|> (guardEnabled Ext_tex_math_single_backslash >> - mathDisplayWith "\\[" "\\]") - <|> (guardEnabled Ext_tex_math_double_backslash >> - mathDisplayWith "\\\\[" "\\\\]") - -mathDisplayWith :: String -> String -> MarkdownParser String -mathDisplayWith op cl = try $ do - string op - many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string cl) - -mathInline :: MarkdownParser String -mathInline = - (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$") - <|> (guardEnabled Ext_tex_math_single_backslash >> - mathInlineWith "\\(" "\\)") - <|> (guardEnabled Ext_tex_math_double_backslash >> - mathInlineWith "\\\\(" "\\\\)") - -mathInlineWith :: String -> String -> MarkdownParser String -mathInlineWith op cl = try $ do - string op - notFollowedBy space - words' <- many1Till (count 1 (noneOf "\n\\") - <|> (char '\\' >> anyChar >>= \c -> return ['\\',c]) - <|> count 1 newline <* notFollowedBy' blankline - *> return " ") - (try $ string cl) - notFollowedBy digit -- to prevent capture of $5 - return $ concat words' - -- Parses material enclosed in *s, **s, _s, or __s. -- Designed to avoid backtracking. enclosure :: Char @@ -1450,6 +1430,7 @@ enclosure c = do -- Parse inlines til you hit one c or a sequence of two cs. -- If one c, emit emph and then parse two. -- If two cs, emit strong and then parse one. +-- Otherwise, emit ccc then the results. three :: Char -> MarkdownParser (F Inlines) three c = do contents <- mconcat <$> many (notFollowedBy (char c) >> inline) @@ -1474,7 +1455,7 @@ one c prefix' = do contents <- mconcat <$> many ( (notFollowedBy (char c) >> inline) <|> try (string [c,c] >> notFollowedBy (char c) >> - two c prefix') ) + two c mempty) ) (char c >> return (B.emph <$> (prefix' <> contents))) <|> return (return (B.str [c]) <> (prefix' <> contents)) @@ -1559,8 +1540,11 @@ endline :: MarkdownParser (F Inlines) endline = try $ do newline notFollowedBy blankline + guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header + guardEnabled Ext_backtick_code_blocks >> + notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced)) -- parse potential list-starts differently if in a list: st <- getState when (stateParserContext st == ListItemState) $ do @@ -1738,7 +1722,7 @@ spanHtml = try $ do guardEnabled Ext_markdown_in_html_blocks (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" []) contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span")) - let ident = maybe "" id $ lookup "id" attrs + let ident = fromMaybe "" $ lookup "id" attrs let classes = maybe [] words $ lookup "class" attrs let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] return $ B.spanWith (ident, classes, keyvals) <$> contents @@ -1748,7 +1732,7 @@ divHtml = try $ do guardEnabled Ext_markdown_in_html_blocks (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "div" []) contents <- mconcat <$> manyTill block (htmlTag (~== TagClose "div")) - let ident = maybe "" id $ lookup "id" attrs + let ident = fromMaybe "" $ lookup "id" attrs let classes = maybe [] words $ lookup "class" attrs let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] return $ B.divWith (ident, classes, keyvals) <$> contents @@ -1768,12 +1752,11 @@ rawHtmlInline = do cite :: MarkdownParser (F Inlines) cite = do guardEnabled Ext_citations - citations <- textualCite <|> (fmap (flip B.cite unknownC) <$> normalCite) + citations <- textualCite + <|> do (cs, raw) <- withRaw normalCite + return $ (flip B.cite (B.text raw)) <$> cs return citations -unknownC :: Inlines -unknownC = B.str "???" - textualCite :: MarkdownParser (F Inlines) textualCite = try $ do (_, key) <- citeKey @@ -1784,14 +1767,18 @@ textualCite = try $ do , citationNoteNum = 0 , citationHash = 0 } - mbrest <- option Nothing $ try $ spnl >> Just <$> normalCite + mbrest <- option Nothing $ try $ spnl >> Just <$> withRaw normalCite case mbrest of - Just rest -> return $ (flip B.cite unknownC . (first:)) <$> rest - Nothing -> (fmap (flip B.cite unknownC) <$> bareloc first) <|> - return (do st <- askF - return $ case M.lookup key (stateExamples st) of - Just n -> B.str (show n) - _ -> B.cite [first] unknownC) + Just (rest, raw) -> + return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw) . (first:)) + <$> rest + Nothing -> + (do (cs, raw) <- withRaw $ bareloc first + return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw)) <$> cs) + <|> return (do st <- askF + return $ case M.lookup key (stateExamples st) of + Just n -> B.str (show n) + _ -> B.cite [first] $ B.str $ '@':key) bareloc :: Citation -> MarkdownParser (F [Citation]) bareloc c = try $ do @@ -1817,11 +1804,17 @@ normalCite = try $ do citeKey :: MarkdownParser (Bool, String) citeKey = try $ do + -- make sure we're not right after an alphanumeric, + -- since foo@bar.baz is probably an email address + lastStrPos <- stateLastStrPos <$> getState + pos <- getPosition + guard $ lastStrPos /= Just pos suppress_author <- option False (char '-' >> return True) char '@' - first <- letter - let internal p = try $ p >>~ lookAhead (letter <|> digit) - rest <- many $ letter <|> digit <|> internal (oneOf ":.#$%&-_+?<>~/") + first <- letter <|> char '_' + let regchar = satisfy (\c -> isAlphaNum c || c == '_') + let internal p = try $ p >>~ lookAhead regchar + rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") let key = first:rest return (suppress_author, key) diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 2b938cd82..8d8ea0199 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -1,4 +1,5 @@ -{-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7 +{-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances #-} +-- RelaxedPolyRec needed for inlinesBetween on GHC < 7 {- Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu> @@ -43,7 +44,7 @@ import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isCommentTag ) import Text.Pandoc.XML ( fromEntities ) import Text.Pandoc.Parsing hiding ( nested ) import Text.Pandoc.Walk ( walk ) -import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead ) +import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead, stringify, trim ) import Data.Monoid (mconcat, mempty) import Control.Applicative ((<$>), (<*), (*>), (<$)) import Control.Monad @@ -51,7 +52,9 @@ import Data.List (intersperse, intercalate, isPrefixOf ) import Text.HTML.TagSoup import Data.Sequence (viewl, ViewL(..), (<|)) import qualified Data.Foldable as F +import qualified Data.Map as M import Data.Char (isDigit, isSpace) +import Data.Maybe (fromMaybe) -- | Read mediawiki from an input string and return a Pandoc document. readMediaWiki :: ReaderOptions -- ^ Reader options @@ -62,6 +65,8 @@ readMediaWiki opts s = , mwMaxNestingLevel = 4 , mwNextLinkNumber = 1 , mwCategoryLinks = [] + , mwHeaderMap = M.empty + , mwIdentifierList = [] } "source" (s ++ "\n") of Left err' -> error $ "\nError:\n" ++ show err' @@ -71,10 +76,23 @@ data MWState = MWState { mwOptions :: ReaderOptions , mwMaxNestingLevel :: Int , mwNextLinkNumber :: Int , mwCategoryLinks :: [Inlines] + , mwHeaderMap :: M.Map Inlines String + , mwIdentifierList :: [String] } type MWParser = Parser [Char] MWState +instance HasReaderOptions MWParser where + askReaderOption f = (f . mwOptions) `fmap` getState + +instance HasHeaderMap MWParser where + getHeaderMap = fmap mwHeaderMap getState + putHeaderMap hm = updateState $ \st -> st{ mwHeaderMap = hm } + +instance HasIdentifierList MWParser where + getIdentifierList = fmap mwIdentifierList getState + putIdentifierList l = updateState $ \st -> st{ mwIdentifierList = l } + -- -- auxiliary functions -- @@ -91,7 +109,7 @@ nested p = do return res specialChars :: [Char] -specialChars = "'[]<=&*{}|\"" +specialChars = "'[]<=&*{}|\":\\" spaceChars :: [Char] spaceChars = " \n\t" @@ -187,7 +205,7 @@ table = do tableStart styles <- option [] parseAttrs <* blankline let tableWidth = case lookup "width" styles of - Just w -> maybe 1.0 id $ parseWidth w + Just w -> fromMaybe 1.0 $ parseWidth w Nothing -> 1.0 caption <- option mempty tableCaption optional rowsep @@ -268,7 +286,7 @@ tableCell = try $ do Just "center" -> AlignCenter _ -> AlignDefault let width = case lookup "width" attrs of - Just xs -> maybe 0.0 id $ parseWidth xs + Just xs -> fromMaybe 0.0 $ parseWidth xs Nothing -> 0.0 return ((align, width), bs) @@ -351,7 +369,8 @@ header = try $ do let lev = length eqs guard $ lev <= 6 contents <- trimInlines . mconcat <$> manyTill inline (count lev $ char '=') - return $ B.header lev contents + attr <- registerHeader nullAttr contents + return $ B.headerWith attr lev contents bulletList :: MWParser Blocks bulletList = B.bulletList <$> @@ -369,7 +388,7 @@ orderedList = spaces items <- many (listItem '#' <|> li) optional (htmlTag (~== TagClose "ol")) - let start = maybe 1 id $ safeRead $ fromAttrib "start" tag + let start = fromMaybe 1 $ safeRead $ fromAttrib "start" tag return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items definitionList :: MWParser Blocks @@ -380,8 +399,9 @@ defListItem = try $ do terms <- mconcat . intersperse B.linebreak <$> many defListTerm -- we allow dd with no dt, or dt with no dd defs <- if B.isNull terms - then many1 $ listItem ':' - else many $ listItem ':' + then notFollowedBy (try $ string ":<math>") *> + many1 (listItem ':') + else many (listItem ':') return (terms, defs) defListTerm :: MWParser Inlines @@ -462,6 +482,7 @@ inline = whitespace <|> image <|> internalLink <|> externalLink + <|> math <|> inlineTag <|> B.singleton <$> charRef <|> inlineHtml @@ -472,6 +493,16 @@ inline = whitespace str :: MWParser Inlines str = B.str <$> many1 (noneOf $ specialChars ++ spaceChars) +math :: MWParser Inlines +math = (B.displayMath . trim <$> try (char ':' >> charsInTags "math")) + <|> (B.math . trim <$> charsInTags "math") + <|> (B.displayMath . trim <$> try (dmStart *> manyTill anyChar dmEnd)) + <|> (B.math . trim <$> try (mStart *> manyTill (satisfy (/='\n')) mEnd)) + where dmStart = string "\\[" + dmEnd = try (string "\\]") + mStart = string "\\(" + mEnd = try (string "\\)") + variable :: MWParser String variable = try $ do string "{{{" @@ -495,7 +526,6 @@ inlineTag = do TagOpen "del" _ -> B.strikeout <$> inlinesInTags "del" TagOpen "sub" _ -> B.subscript <$> inlinesInTags "sub" TagOpen "sup" _ -> B.superscript <$> inlinesInTags "sup" - TagOpen "math" _ -> B.math <$> charsInTags "math" TagOpen "code" _ -> B.code <$> charsInTags "code" TagOpen "tt" _ -> B.code <$> charsInTags "tt" TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> charsInTags "hask" @@ -528,7 +558,7 @@ image = try $ do _ <- many (try $ char '|' *> imageOption) caption <- (B.str fname <$ sym "]]") <|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]"))) - return $ B.image fname "image" caption + return $ B.image fname ("fig:" ++ stringify caption) caption imageOption :: MWParser String imageOption = diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs index 1f7088f72..6bd617f7e 100644 --- a/src/Text/Pandoc/Readers/TeXMath.hs +++ b/src/Text/Pandoc/Readers/TeXMath.hs @@ -27,16 +27,30 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of TeX math to a list of 'Pandoc' inline elements. -} -module Text.Pandoc.Readers.TeXMath ( readTeXMath ) where +module Text.Pandoc.Readers.TeXMath ( readTeXMath, readTeXMath' ) where import Text.Pandoc.Definition import Text.TeXMath -- | Converts a raw TeX math formula to a list of 'Pandoc' inlines. --- Defaults to raw formula between @$@ characters if entire formula +-- Defaults to raw formula between @$@ or @$$@ characters if entire formula -- can't be converted. +readTeXMath' :: MathType + -> String -- ^ String to parse (assumes @'\n'@ line endings) + -> [Inline] +readTeXMath' mt inp = case texMathToPandoc dt inp of + Left _ -> [Str (delim ++ inp ++ delim)] + Right res -> res + where (dt, delim) = case mt of + DisplayMath -> (DisplayBlock, "$$") + InlineMath -> (DisplayInline, "$") + +{-# DEPRECATED readTeXMath "Use readTeXMath' from Text.Pandoc.JSON instead" #-} +-- | Converts a raw TeX math formula to a list of 'Pandoc' inlines. +-- Defaults to raw formula between @$@ characters if entire formula +-- can't be converted. (This is provided for backwards compatibility; +-- it is better to use @readTeXMath'@, which properly distinguishes +-- between display and inline math.) readTeXMath :: String -- ^ String to parse (assumes @'\n'@ line endings) -> [Inline] -readTeXMath inp = case texMathToPandoc DisplayInline inp of - Left _ -> [Str ("$" ++ inp ++ "$")] - Right res -> res +readTeXMath = readTeXMath' InlineMath diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 23e07f621..93658cdea 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -594,7 +594,7 @@ surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try bo simpleInline :: Parser [Char] ParserState t -- ^ surrounding parser -> ([Inline] -> Inline) -- ^ Inline constructor -> Parser [Char] ParserState Inline -- ^ content parser (to be used repeatedly) -simpleInline border construct = surrounded border (inlineWithAttribute) >>= +simpleInline border construct = surrounded border inlineWithAttribute >>= return . construct . normalizeSpaces where inlineWithAttribute = (try $ optional attributes) >> inline diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index 0547bc065..6112e764f 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -32,7 +32,7 @@ the HTML using data URIs. -} module Text.Pandoc.SelfContained ( makeSelfContained ) where import Text.HTML.TagSoup -import Network.URI (isAbsoluteURI, escapeURIString) +import Network.URI (isURI, escapeURIString) import Data.ByteString.Base64 import qualified Data.ByteString.Char8 as B import Data.ByteString (ByteString) @@ -86,7 +86,7 @@ cssURLs userdata d orig = "\"" -> B.takeWhile (/='"') $ B.drop 1 u "'" -> B.takeWhile (/='\'') $ B.drop 1 u _ -> u - let url' = if isAbsoluteURI url + let url' = if isURI url then url else d </> url (raw, mime) <- getRaw userdata "" url' @@ -97,7 +97,7 @@ cssURLs userdata d orig = getItem :: Maybe FilePath -> String -> IO (ByteString, Maybe String) getItem userdata f = - if isAbsoluteURI f + if isURI f then openURL f >>= either handleErr return else do -- strip off trailing query or fragment part, if relative URL. diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 9a9a092fc..714402e42 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -91,7 +91,8 @@ import Data.Char ( toLower, isLower, isUpper, isAlpha, isLetter, isDigit, isSpace ) import Data.List ( find, isPrefixOf, intercalate ) import qualified Data.Map as M -import Network.URI ( escapeURIString, isAbsoluteURI, unEscapeString ) +import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo, + unEscapeString, parseURIReference ) import System.Directory import Text.Pandoc.MIME (getMimeType) import System.FilePath ( (</>), takeExtension, dropExtension ) @@ -108,6 +109,7 @@ import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..), import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as B8 import Text.Pandoc.Compat.Monoid +import Data.ByteString.Base64 (decodeLenient) #ifdef EMBED_DATA_FILES import Text.Pandoc.Data (dataFiles) @@ -120,6 +122,7 @@ import Data.ByteString.Lazy (toChunks) import Network.HTTP.Conduit (httpLbs, parseUrl, withManager, responseBody, responseHeaders) import Network.HTTP.Types.Header ( hContentType) +import Network (withSocketsDo) #else import Network.URI (parseURI) import Network.HTTP (findHeader, rspBody, @@ -269,7 +272,7 @@ normalizeDate s = fmap (formatTime defaultTimeLocale "%F") (msum $ map (\fs -> parsetimeWith fs s) formats :: Maybe Day) where parsetimeWith = parseTime defaultTimeLocale formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y", - "%d %B %Y", "%b. %d, %Y", "%B %d, %Y"] + "%d %B %Y", "%b. %d, %Y", "%B %d, %Y", "%Y"] -- -- Pandoc block and inline list processing @@ -530,7 +533,7 @@ headerShift n = walk shift -- | Detect if a list is tight. isTightList :: [[Block]] -> Bool -isTightList = and . map firstIsPlain +isTightList = all firstIsPlain where firstIsPlain (Plain _ : _) = True firstIsPlain _ = False @@ -562,14 +565,10 @@ makeMeta title authors date = -- | Render HTML tags. renderTags' :: [Tag String] -> String renderTags' = renderTagsOptions - renderOptions{ optMinimize = \x -> - let y = map toLower x - in y == "hr" || y == "br" || - y == "img" || y == "meta" || - y == "link" - , optRawTag = \x -> - let y = map toLower x - in y == "script" || y == "style" } + renderOptions{ optMinimize = matchTags ["hr", "br", "img", + "meta", "link"] + , optRawTag = matchTags ["script", "style"] } + where matchTags = \tags -> flip elem tags . map toLower -- -- File handling @@ -624,10 +623,14 @@ readDataFileUTF8 userDir fname = fetchItem :: Maybe String -> String -> IO (Either E.SomeException (BS.ByteString, Maybe String)) fetchItem sourceURL s - | isAbsoluteURI s = openURL s - | otherwise = case sourceURL of - Just u -> openURL (u ++ "/" ++ s) - Nothing -> E.try readLocalFile + | isURI s = openURL s + | otherwise = + case sourceURL >>= parseURIReference of + Just u -> case parseURIReference s of + Just s' -> openURL $ show $ + s' `nonStrictRelativeTo` u + Nothing -> openURL $ show u ++ "/" ++ s + Nothing -> E.try readLocalFile where readLocalFile = do let mime = case takeExtension s of ".gz" -> getMimeType $ dropExtension s @@ -641,9 +644,9 @@ openURL u | "data:" `isPrefixOf` u = let mime = takeWhile (/=',') $ drop 5 u contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u - in return $ Right (contents, Just mime) + in return $ Right (decodeLenient contents, Just mime) #ifdef HTTP_CONDUIT - | otherwise = E.try $ do + | otherwise = withSocketsDo $ E.try $ do req <- parseUrl u resp <- withManager $ httpLbs req return (BS.concat $ toChunks $ responseBody resp, diff --git a/src/Text/Pandoc/Slides.hs b/src/Text/Pandoc/Slides.hs index 2bbdb120f..50c46d17f 100644 --- a/src/Text/Pandoc/Slides.hs +++ b/src/Text/Pandoc/Slides.hs @@ -46,13 +46,18 @@ getSlideLevel = go 6 -- | Prepare a block list to be passed to hierarchicalize. prepSlides :: Int -> [Block] -> [Block] -prepSlides slideLevel = ensureStartWithH . splitHrule +prepSlides slideLevel = ensureStartWithH . splitHrule . extractRefsHeader where splitHrule (HorizontalRule : Header n attr xs : ys) | n == slideLevel = Header slideLevel attr xs : splitHrule ys splitHrule (HorizontalRule : xs) = Header slideLevel nullAttr [Str "\0"] : splitHrule xs splitHrule (x : xs) = x : splitHrule xs splitHrule [] = [] + extractRefsHeader bs = + case reverse bs of + (Div ("",["references"],[]) (Header n attrs xs : ys) : zs) + -> reverse zs ++ (Header n attrs xs : [Div ("",["references"],[]) ys]) + _ -> bs ensureStartWithH bs@(Header n _ _:_) | n <= slideLevel = bs ensureStartWithH bs = Header slideLevel nullAttr [Str "\0"] : bs diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index 22a44e735..ad8838f72 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -117,6 +117,7 @@ import Text.Blaze (preEscapedText, Html) #endif import Data.ByteString.Lazy (ByteString, fromChunks) import Text.Pandoc.Shared (readDataFileUTF8) +import Data.Vector ((!?)) -- | Get default template for the specified writer. getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first @@ -185,7 +186,7 @@ var = Template . resolveVar resolveVar :: Variable -> Value -> Text resolveVar var' val = case multiLookup var' val of - Just (Array vec) -> mconcat $ map (resolveVar []) $ toList vec + Just (Array vec) -> maybe mempty (resolveVar []) $ vec !? 0 Just (String t) -> T.stripEnd t Just (Number n) -> T.pack $ show n Just (Bool True) -> "true" @@ -212,7 +213,7 @@ iter var' template sep = Template $ \val -> unTemplate Just (Array vec) -> mconcat $ intersperse sep $ map (setVar template var') $ toList vec - Just x -> setVar template var' x + Just x -> cond var' (setVar template var' x) mempty Nothing -> mempty) val setVar :: Template -> Variable -> Value -> Template diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 0379f8b0a..3095cf508 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -130,7 +130,7 @@ blockToConTeXt (Plain lst) = inlineListToConTeXt lst -- title beginning with fig: indicates that the image is a figure blockToConTeXt (Para [Image txt (src,'f':'i':'g':':':_)]) = do capt <- inlineListToConTeXt txt - return $ blankline $$ "\\placefigure[here,nonumber]" <> braces capt <> + return $ blankline $$ "\\placefigure" <> braces capt <> braces ("\\externalfigure" <> brackets (text src)) <> blankline blockToConTeXt (Para lst) = do contents <- inlineListToConTeXt lst @@ -205,9 +205,9 @@ blockToConTeXt (Table caption aligns widths heads rows) = do else liftM ($$ "\\HL") $ tableRowToConTeXt heads captionText <- inlineListToConTeXt caption rows' <- mapM tableRowToConTeXt rows - return $ "\\placetable" <> brackets ("here" <> if null caption - then ",none" - else "") + return $ "\\placetable" <> (if null caption + then brackets "none" + else empty) <> braces captionText $$ "\\starttable" <> brackets (text colDescriptors) $$ "\\HL" $$ headers $$ diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 7c03c07dc..02d875be3 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -85,8 +85,9 @@ writeDocbook opts (Pandoc meta blocks) = auths' = map (authorToDocbook opts) $ docAuthors meta meta' = B.setMeta "author" auths' meta Just metadata = metaToJSON opts - (Just . render colwidth . blocksToDocbook opts) - (Just . render colwidth . inlinesToDocbook opts) + (Just . render colwidth . (vcat . + (map (elementToDocbook opts' startLvl)) . hierarchicalize)) + (Just . render colwidth . inlinesToDocbook opts') meta' main = render' $ vcat (map (elementToDocbook opts' startLvl) elements) context = defField "body" main @@ -281,8 +282,8 @@ inlineToDocbook opts (Math t str) $ fixNS $ removeAttr r Left _ -> inlinesToDocbook opts - $ readTeXMath str - | otherwise = inlinesToDocbook opts $ readTeXMath str + $ readTeXMath' t str + | otherwise = inlinesToDocbook opts $ readTeXMath' t str where (dt, tagtype) = case t of InlineMath -> (DisplayInline,"inlineequation") DisplayMath -> (DisplayBlock,"informalequation") diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index c8673ae48..2a834c2da 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -29,7 +29,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to docx. -} module Text.Pandoc.Writers.Docx ( writeDocx ) where -import Data.List ( intercalate, groupBy ) +import Data.Maybe (fromMaybe) +import Data.List ( intercalate, isPrefixOf, isSuffixOf ) import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BL8 @@ -42,6 +43,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.ImageSize import Text.Pandoc.Shared hiding (Element) +import Text.Pandoc.Writers.Shared (fixDisplayMath) import Text.Pandoc.Options import Text.Pandoc.Readers.TeXMath import Text.Pandoc.Highlighting ( highlight ) @@ -55,8 +57,8 @@ import Data.Unique (hashUnique, newUnique) import System.Random (randomRIO) import Text.Printf (printf) import qualified Control.Exception as E -import System.FilePath (takeExtension) -import Text.Pandoc.MIME (getMimeType) +import Text.Pandoc.MIME (getMimeType, extensionFromMimeType) +import Control.Applicative ((<|>)) data WriterState = WriterState{ stTextProperties :: [Element] @@ -130,7 +132,8 @@ writeDocx opts doc@(Pandoc meta _) = do let mkOverrideNode (part', contentType') = mknode "Override" [("PartName",part'),("ContentType",contentType')] () let mkImageOverride (_, imgpath, mbMimeType, _, _) = - mkOverrideNode ("/word/" ++ imgpath, maybe "application/octet-stream" id mbMimeType) + mkOverrideNode ("/word/" ++ imgpath, + fromMaybe "application/octet-stream" mbMimeType) let overrides = map mkOverrideNode [("/word/webSettings.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml") @@ -231,10 +234,11 @@ writeDocx opts doc@(Pandoc meta _) = do ,("xmlns:dcmitype","http://purl.org/dc/dcmitype/") ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")] $ mknode "dc:title" [] (stringify $ docTitle meta) - : mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] - (maybe "" id $ normalizeDate $ stringify $ docDate meta) - : mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] () -- put current time here - : map (mknode "dc:creator" [] . stringify) (docAuthors meta) + : mknode "dc:creator" [] (intercalate "; " (map stringify $ docAuthors meta)) + : maybe [] + (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] $ x + , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] $ x + ]) (normalizeDate $ stringify $ docDate meta) let docPropsEntry = toEntry docPropsPath epochtime $ renderXml docProps let relsPath = "_rels/.rels" @@ -247,7 +251,7 @@ writeDocx opts doc@(Pandoc meta _) = do ,("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties") ,("Target","docProps/app.xml")] , [("Id","rId3") - ,("Type","http://schemas.openxmlformats.org/officedocument/2006/relationships/metadata/core-properties") + ,("Type","http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties") ,("Target","docProps/core.xml")] ] let relsEntry = toEntry relsPath epochtime $ renderXml rels @@ -257,14 +261,22 @@ writeDocx opts doc@(Pandoc meta _) = do docPropsAppEntry <- entryFromArchive "docProps/app.xml" themeEntry <- entryFromArchive "word/theme/theme1.xml" fontTableEntry <- entryFromArchive "word/fontTable.xml" + settingsEntry <- entryFromArchive "word/settings.xml" webSettingsEntry <- entryFromArchive "word/webSettings.xml" + let miscRels = [ f | f <- filesInArchive refArchive + , "word/_rels/" `isPrefixOf` f + , ".xml.rels" `isSuffixOf` f + , f /= "word/_rels/document.xml.rels" + , f /= "word/_rels/footnotes.xml.rels" ] + miscRelEntries <- mapM entryFromArchive miscRels -- Create archive let archive = foldr addEntryToArchive emptyArchive $ contentTypesEntry : relsEntry : contentEntry : relEntry : footnoteRelEntry : numEntry : styleEntry : footnotesEntry : docPropsEntry : docPropsAppEntry : themeEntry : - fontTableEntry : webSettingsEntry : imageEntries + fontTableEntry : settingsEntry : webSettingsEntry : + imageEntries ++ miscRelEntries return $ fromArchive archive styleToOpenXml :: Style -> [Element] @@ -319,7 +331,7 @@ mkNum markers marker numid = NumberMarker _ _ start -> map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))] $ mknode "w:startOverride" [("w:val",show start)] ()) [0..6] - where absnumid = maybe 0 id $ M.lookup marker markers + where absnumid = fromMaybe 0 $ M.lookup marker markers mkAbstractNum :: (ListMarker,Int) -> IO Element mkAbstractNum (marker,numid) = do @@ -635,7 +647,12 @@ formattedString str = do inlineToOpenXML :: WriterOptions -> Inline -> WS [Element] inlineToOpenXML _ (Str str) = formattedString str inlineToOpenXML opts Space = inlineToOpenXML opts (Str " ") -inlineToOpenXML opts (Span _ ils) = inlinesToOpenXML opts ils +inlineToOpenXML opts (Span (_,classes,_) ils) = do + let off x = withTextProp (mknode x [("w:val","0")] ()) + ((if "csl-no-emph" `elem` classes then off "w:i" else id) . + (if "csl-no-strong" `elem` classes then off "w:b" else id) . + (if "csl-no-smallcaps" `elem` classes then off "w:smallCaps" else id)) + $ inlinesToOpenXML opts ils inlineToOpenXML opts (Strong lst) = withTextProp (mknode "w:b" [] ()) $ inlinesToOpenXML opts lst inlineToOpenXML opts (Emph lst) = @@ -667,7 +684,7 @@ inlineToOpenXML opts (Math mathType str) = do else DisplayInline case texMathToOMML displayType str of Right r -> return [r] - Left _ -> inlinesToOpenXML opts (readTeXMath str) + Left _ -> inlinesToOpenXML opts (readTeXMath' mathType str) inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst inlineToOpenXML opts (Code attrs str) = withTextProp (rStyle "VerbatimChar") @@ -734,7 +751,7 @@ inlineToOpenXML opts (Image alt (src, tit)) = do liftIO $ warn $ "Could not find image `" ++ src ++ "', skipping..." -- emit alt text inlinesToOpenXML opts alt - Right (img, _) -> do + Right (img, mt) -> do ident <- ("rId"++) `fmap` getUniqueId let size = imageSize img let (xpt,ypt) = maybe (120,120) sizeInPoints size @@ -773,19 +790,21 @@ inlineToOpenXML opts (Image alt (src, tit)) = do , mknode "wp:effectExtent" [("b","0"),("l","0"),("r","0"),("t","0")] () , mknode "wp:docPr" [("descr",tit),("id","1"),("name","Picture")] () , graphic ] - let imgext = case imageType img of - Just Png -> ".png" - Just Jpeg -> ".jpeg" - Just Gif -> ".gif" - Just Pdf -> ".pdf" - Just Eps -> ".eps" - Nothing -> takeExtension src + let imgext = case mt >>= extensionFromMimeType of + Just x -> '.':x + Nothing -> case imageType img of + Just Png -> ".png" + Just Jpeg -> ".jpeg" + Just Gif -> ".gif" + Just Pdf -> ".pdf" + Just Eps -> ".eps" + Nothing -> "" if null imgext then -- without an extension there is no rule for content type inlinesToOpenXML opts alt -- return alt to avoid corrupted docx else do let imgpath = "media/" ++ ident ++ imgext - let mbMimeType = getMimeType imgpath + let mbMimeType = mt <|> getMimeType imgpath -- insert mime type to use in constructing [Content_Types].xml modify $ \st -> st{ stImages = M.insert src (ident, imgpath, mbMimeType, imgElt, img) @@ -797,30 +816,8 @@ br = mknode "w:r" [] [mknode "w:br" [("w:type","textWrapping")] () ] parseXml :: Archive -> String -> IO Element parseXml refArchive relpath = - case (findEntryByPath relpath refArchive >>= parseXMLDoc . UTF8.toStringLazy . fromEntry) of - Just d -> return d + case findEntryByPath relpath refArchive of + Just e -> case parseXMLDoc $ UTF8.toStringLazy $ fromEntry e of + Just d -> return d + Nothing -> fail $ relpath ++ " corrupt in reference docx" Nothing -> fail $ relpath ++ " missing in reference docx" - -isDisplayMath :: Inline -> Bool -isDisplayMath (Math DisplayMath _) = True -isDisplayMath _ = False - -stripLeadingTrailingSpace :: [Inline] -> [Inline] -stripLeadingTrailingSpace = go . reverse . go . reverse - where go (Space:xs) = xs - go xs = xs - -fixDisplayMath :: Block -> Block -fixDisplayMath (Plain lst) - | any isDisplayMath lst && not (all isDisplayMath lst) = - -- chop into several paragraphs so each displaymath is its own - Div ("",["math"],[]) $ map (Plain . stripLeadingTrailingSpace) $ - groupBy (\x y -> (isDisplayMath x && isDisplayMath y) || - not (isDisplayMath x || isDisplayMath y)) lst -fixDisplayMath (Para lst) - | any isDisplayMath lst && not (all isDisplayMath lst) = - -- chop into several paragraphs so each displaymath is its own - Div ("",["math"],[]) $ map (Para . stripLeadingTrailingSpace) $ - groupBy (\x y -> (isDisplayMath x && isDisplayMath y) || - not (isDisplayMath x || isDisplayMath y)) lst -fixDisplayMath x = x diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index ac0e7610c..a48300939 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE PatternGuards, CPP #-} +{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables #-} {- Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu> @@ -30,16 +30,18 @@ Conversion of 'Pandoc' documents to EPUB. -} module Text.Pandoc.Writers.EPUB ( writeEPUB ) where import Data.IORef -import Data.Maybe ( fromMaybe, isNothing ) +import qualified Data.Map as M +import Data.Maybe ( fromMaybe ) import Data.List ( isInfixOf, intercalate ) import System.Environment ( getEnv ) import Text.Printf (printf) import System.FilePath ( (</>), takeBaseName, takeExtension, takeFileName ) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as B8 -import Text.Pandoc.UTF8 ( fromStringLazy, toString ) +import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.SelfContained ( makeSelfContained ) import Codec.Archive.Zip +import Control.Applicative ((<$>)) import Data.Time.Clock.POSIX import Data.Time import System.Locale @@ -54,7 +56,7 @@ import Text.XML.Light hiding (ppTopElement) import Text.Pandoc.UUID import Text.Pandoc.Writers.HTML import Text.Pandoc.Writers.Markdown ( writePlain ) -import Data.Char ( toLower ) +import Data.Char ( toLower, isDigit ) import Network.URI ( unEscapeString ) import Text.Pandoc.MIME (getMimeType) #if MIN_VERSION_base(4,6,0) @@ -70,12 +72,237 @@ import Text.Blaze.Html.Renderer.Utf8 (renderHtml) -- in filenames, chapter0003.xhtml. data Chapter = Chapter (Maybe [Int]) [Block] +data EPUBMetadata = EPUBMetadata{ + epubIdentifier :: [Identifier] + , epubTitle :: [Title] + , epubDate :: String + , epubLanguage :: String + , epubCreator :: [Creator] + , epubContributor :: [Creator] + , epubSubject :: [String] + , epubDescription :: Maybe String + , epubType :: Maybe String + , epubFormat :: Maybe String + , epubPublisher :: Maybe String + , epubSource :: Maybe String + , epubRelation :: Maybe String + , epubCoverage :: Maybe String + , epubRights :: Maybe String + , epubCoverImage :: Maybe String + , epubStylesheet :: Maybe Stylesheet + } deriving Show + +data Stylesheet = StylesheetPath FilePath + | StylesheetContents String + deriving Show + +data Creator = Creator{ + creatorText :: String + , creatorRole :: Maybe String + , creatorFileAs :: Maybe String + } deriving Show + +data Identifier = Identifier{ + identifierText :: String + , identifierScheme :: Maybe String + } deriving Show + +data Title = Title{ + titleText :: String + , titleFileAs :: Maybe String + , titleType :: Maybe String + } deriving Show + +dcName :: String -> QName +dcName n = QName n Nothing (Just "dc") + +dcNode :: Node t => String -> t -> Element +dcNode = node . dcName + +opfName :: String -> QName +opfName n = QName n Nothing (Just "opf") + +plainify :: [Inline] -> String +plainify t = + trimr $ writePlain def{ writerStandalone = False } + $ Pandoc nullMeta [Plain $ walk removeNote t] + +removeNote :: Inline -> Inline +removeNote (Note _) = Str "" +removeNote x = x + +getEPUBMetadata :: WriterOptions -> Meta -> IO EPUBMetadata +getEPUBMetadata opts meta = do + let md = metadataFromMeta opts meta + let elts = onlyElems $ parseXML $ writerEpubMetadata opts + let md' = foldr addMetadataFromXML md elts + let addIdentifier m = + if null (epubIdentifier m) + then do + randomId <- fmap show getRandomUUID + return $ m{ epubIdentifier = [Identifier randomId Nothing] } + else return m + let addLanguage m = + if null (epubLanguage m) + then case lookup "lang" (writerVariables opts) of + Just x -> return m{ epubLanguage = x } + Nothing -> do + localeLang <- catch (liftM + (map (\c -> if c == '_' then '-' else c) . + takeWhile (/='.')) $ getEnv "LANG") + (\e -> let _ = (e :: SomeException) in return "en-US") + return m{ epubLanguage = localeLang } + else return m + let fixDate m = + if null (epubDate m) + then do + currentTime <- getCurrentTime + return $ m{ epubDate = showDateTimeISO8601 currentTime } + else return m + let addAuthor m = + if any (\c -> creatorRole c == Just "aut") $ epubCreator m + then return m + else do + let authors' = map plainify $ docAuthors meta + let toAuthor name = Creator{ creatorText = name + , creatorRole = Just "aut" + , creatorFileAs = Nothing } + return $ m{ epubCreator = map toAuthor authors' ++ epubCreator m } + addIdentifier md' >>= fixDate >>= addAuthor >>= addLanguage + +addMetadataFromXML :: Element -> EPUBMetadata -> EPUBMetadata +addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md + | name == "identifier" = md{ epubIdentifier = + Identifier{ identifierText = strContent e + , identifierScheme = lookupAttr (opfName "scheme") attrs + } : epubIdentifier md } + | name == "title" = md{ epubTitle = + Title{ titleText = strContent e + , titleFileAs = getAttr "file-as" + , titleType = getAttr "type" + } : epubTitle md } + | name == "date" = md{ epubDate = fromMaybe "" $ normalizeDate' + $ strContent e } + | name == "language" = md{ epubLanguage = strContent e } + | name == "creator" = md{ epubCreator = + Creator{ creatorText = strContent e + , creatorRole = getAttr "role" + , creatorFileAs = getAttr "file-as" + } : epubCreator md } + | name == "contributor" = md{ epubContributor = + Creator { creatorText = strContent e + , creatorRole = getAttr "role" + , creatorFileAs = getAttr "file-as" + } : epubContributor md } + | name == "subject" = md{ epubSubject = strContent e : epubSubject md } + | name == "description" = md { epubDescription = Just $ strContent e } + | name == "type" = md { epubType = Just $ strContent e } + | name == "format" = md { epubFormat = Just $ strContent e } + | name == "type" = md { epubType = Just $ strContent e } + | name == "publisher" = md { epubPublisher = Just $ strContent e } + | name == "source" = md { epubSource = Just $ strContent e } + | name == "relation" = md { epubRelation = Just $ strContent e } + | name == "coverage" = md { epubCoverage = Just $ strContent e } + | name == "rights" = md { epubRights = Just $ strContent e } + | otherwise = md + where getAttr n = lookupAttr (opfName n) attrs +addMetadataFromXML _ md = md + +metaValueToString :: MetaValue -> String +metaValueToString (MetaString s) = s +metaValueToString (MetaInlines ils) = plainify ils +metaValueToString (MetaBlocks bs) = plainify $ query (:[]) bs +metaValueToString (MetaBool b) = show b +metaValueToString _ = "" + +getList :: String -> Meta -> (MetaValue -> a) -> [a] +getList s meta handleMetaValue = + case lookupMeta s meta of + Just (MetaList xs) -> map handleMetaValue xs + Just mv -> [handleMetaValue mv] + Nothing -> [] + +getIdentifier :: Meta -> [Identifier] +getIdentifier meta = getList "identifier" meta handleMetaValue + where handleMetaValue (MetaMap m) = + Identifier{ identifierText = maybe "" metaValueToString + $ M.lookup "text" m + , identifierScheme = metaValueToString <$> + M.lookup "scheme" m } + handleMetaValue mv = Identifier (metaValueToString mv) Nothing + +getTitle :: Meta -> [Title] +getTitle meta = getList "title" meta handleMetaValue + where handleMetaValue (MetaMap m) = + Title{ titleText = maybe "" metaValueToString $ M.lookup "text" m + , titleFileAs = metaValueToString <$> M.lookup "file-as" m + , titleType = metaValueToString <$> M.lookup "type" m } + handleMetaValue mv = Title (metaValueToString mv) Nothing Nothing + +getCreator :: String -> Meta -> [Creator] +getCreator s meta = getList s meta handleMetaValue + where handleMetaValue (MetaMap m) = + Creator{ creatorText = maybe "" metaValueToString $ M.lookup "text" m + , creatorFileAs = metaValueToString <$> M.lookup "file-as" m + , creatorRole = metaValueToString <$> M.lookup "role" m } + handleMetaValue mv = Creator (metaValueToString mv) Nothing Nothing + +simpleList :: String -> Meta -> [String] +simpleList s meta = + case lookupMeta s meta of + Just (MetaList xs) -> map metaValueToString xs + Just x -> [metaValueToString x] + Nothing -> [] + +metadataFromMeta :: WriterOptions -> Meta -> EPUBMetadata +metadataFromMeta opts meta = EPUBMetadata{ + epubIdentifier = identifiers + , epubTitle = titles + , epubDate = date + , epubLanguage = language + , epubCreator = creators + , epubContributor = contributors + , epubSubject = subjects + , epubDescription = description + , epubType = epubtype + , epubFormat = format + , epubPublisher = publisher + , epubSource = source + , epubRelation = relation + , epubCoverage = coverage + , epubRights = rights + , epubCoverImage = coverImage + , epubStylesheet = stylesheet + } + where identifiers = getIdentifier meta + titles = getTitle meta + date = fromMaybe "" $ + (metaValueToString <$> lookupMeta "date" meta) >>= normalizeDate' + language = maybe "" metaValueToString $ + lookupMeta "language" meta `mplus` lookupMeta "lang" meta + creators = getCreator "creator" meta + contributors = getCreator "contributor" meta + subjects = simpleList "subject" meta + description = metaValueToString <$> lookupMeta "description" meta + epubtype = metaValueToString <$> lookupMeta "type" meta + format = metaValueToString <$> lookupMeta "format" meta + publisher = metaValueToString <$> lookupMeta "publisher" meta + source = metaValueToString <$> lookupMeta "source" meta + relation = metaValueToString <$> lookupMeta "relation" meta + coverage = metaValueToString <$> lookupMeta "coverage" meta + rights = metaValueToString <$> lookupMeta "rights" meta + coverImage = lookup "epub-cover-image" (writerVariables opts) `mplus` + (metaValueToString <$> lookupMeta "cover-image" meta) + stylesheet = (StylesheetContents <$> writerEpubStylesheet opts) `mplus` + ((StylesheetPath . metaValueToString) <$> + lookupMeta "stylesheet" meta) + -- | Produce an EPUB file from a Pandoc document. writeEPUB :: WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert -> IO B.ByteString writeEPUB opts doc@(Pandoc meta _) = do - let version = maybe EPUB2 id (writerEpubVersion opts) + let version = fromMaybe EPUB2 (writerEpubVersion opts) let epub3 = version == EPUB3 epochtime <- floor `fmap` getPOSIXTime let mkEntry path content = toEntry path epochtime content @@ -93,11 +320,11 @@ writeEPUB opts doc@(Pandoc meta _) = do then MathML Nothing else writerHTMLMathMethod opts , writerWrapText = False } - let mbCoverImage = lookup "epub-cover-image" vars + metadata <- getEPUBMetadata opts' meta -- cover page (cpgEntry, cpicEntry) <- - case mbCoverImage of + case epubCoverImage metadata of Nothing -> return ([],[]) Just img -> do let coverImage = "cover-image" ++ takeExtension img @@ -179,32 +406,33 @@ writeEPUB opts doc@(Pandoc meta _) = do chapToEntry num (Chapter mbnum bs) = mkEntry (showChapter num) $ renderHtml $ writeHtml opts'{ writerNumberOffset = - maybe [] id mbnum } + fromMaybe [] mbnum } $ case bs of (Header _ _ xs : _) -> - Pandoc (setMeta "title" (fromList xs) nullMeta) bs + -- remove notes or we get doubled footnotes + Pandoc (setMeta "title" (walk removeNote $ fromList xs) + nullMeta) bs _ -> Pandoc nullMeta bs let chapterEntries = zipWith chapToEntry [1..] chapters -- incredibly inefficient (TODO): - let containsMathML ent = "<math" `isInfixOf` (B8.unpack $ fromEntry ent) + let containsMathML ent = epub3 && + "<math" `isInfixOf` (B8.unpack $ fromEntry ent) + let containsSVG ent = epub3 && + "<svg" `isInfixOf` (B8.unpack $ fromEntry ent) + let props ent = ["mathml" | containsMathML ent] ++ ["svg" | containsSVG ent] -- contents.opf - localeLang <- catch (liftM (map (\c -> if c == '_' then '-' else c) . - takeWhile (/='.')) $ getEnv "LANG") - (\e -> let _ = (e :: SomeException) in return "en-US") - let lang = case lookup "lang" (writerVariables opts') of - Just x -> x - Nothing -> localeLang - uuid <- getRandomUUID let chapterNode ent = unode "item" ! ([("id", takeBaseName $ eRelativePath ent), ("href", eRelativePath ent), ("media-type", "application/xhtml+xml")] - ++ [("properties","mathml") | epub3 && - containsMathML ent]) $ () + ++ case props ent of + [] -> [] + xs -> [("properties", unwords xs)]) + $ () let chapterRefNode ent = unode "itemref" ! [("idref", takeBaseName $ eRelativePath ent)] $ () let pictureNode ent = unode "item" ! @@ -215,23 +443,23 @@ writeEPUB opts doc@(Pandoc meta _) = do let fontNode ent = unode "item" ! [("id", takeBaseName $ eRelativePath ent), ("href", eRelativePath ent), - ("media-type", maybe "" id $ getMimeType $ eRelativePath ent)] $ () - let plainify t = trimr $ - writePlain opts'{ writerStandalone = False } $ - Pandoc meta [Plain t] - let plainTitle = plainify $ docTitle meta - let plainAuthors = map plainify $ docAuthors meta + ("media-type", fromMaybe "" $ getMimeType $ eRelativePath ent)] $ () + let plainTitle = case docTitle meta of + [] -> case epubTitle metadata of + [] -> "UNTITLED" + (x:_) -> titleText x + x -> plainify x + let uuid = case epubIdentifier metadata of + (x:_) -> identifierText x -- use first identifier as UUID + [] -> error "epubIdentifier is null" -- shouldn't happen currentTime <- getCurrentTime - let plainDate = maybe (showDateTimeISO8601 currentTime) id - $ normalizeDate $ stringify $ docDate meta - let contentsData = fromStringLazy $ ppTopElement $ + let contentsData = UTF8.fromStringLazy $ ppTopElement $ unode "package" ! [("version", case version of EPUB2 -> "2.0" EPUB3 -> "3.0") ,("xmlns","http://www.idpf.org/2007/opf") - ,("unique-identifier","BookId")] $ - [ metadataElement version (writerEpubMetadata opts') - uuid lang plainTitle plainAuthors plainDate currentTime mbCoverImage + ,("unique-identifier","epub-id-1")] $ + [ metadataElement version metadata currentTime , unode "manifest" $ [ unode "item" ! [("id","ncx"), ("href","toc.ncx") ,("media-type","application/x-dtbncx+xml")] $ () @@ -243,10 +471,15 @@ writeEPUB opts doc@(Pandoc meta _) = do [("properties","nav") | epub3 ]) $ () ] ++ map chapterNode (cpgEntry ++ (tpEntry : chapterEntries)) ++ - map pictureNode (cpicEntry ++ picEntries) ++ + (case cpicEntry of + [] -> [] + (x:_) -> [add_attrs + [Attr (unqual "properties") "cover-image" | epub3] + (pictureNode x)]) ++ + map pictureNode picEntries ++ map fontNode fontEntries , unode "spine" ! [("toc","ncx")] $ - case mbCoverImage of + case epubCoverImage metadata of Nothing -> [] Just _ -> [ unode "itemref" ! [("idref", "cover"),("linear","no")] $ () ] @@ -260,8 +493,13 @@ writeEPUB opts doc@(Pandoc meta _) = do else "no")] $ ()) : map chapterRefNode chapterEntries) , unode "guide" $ - unode "reference" ! - [("type","toc"),("title",plainTitle),("href","nav.xhtml")] $ () + [ unode "reference" ! + [("type","toc"),("title",plainTitle), + ("href","nav.xhtml")] $ () + ] ++ + [ unode "reference" ! + [("type","cover"),("title","Cover"),("href","cover.xhtml")] $ () | epubCoverImage metadata /= Nothing + ] ] let contentsEntry = mkEntry "content.opf" contentsData @@ -303,19 +541,19 @@ writeEPUB opts doc@(Pandoc meta _) = do [ unode "navLabel" $ unode "text" (plainify $ docTitle meta) , unode "content" ! [("src","title_page.xhtml")] $ () ] - let tocData = fromStringLazy $ ppTopElement $ + let tocData = UTF8.fromStringLazy $ ppTopElement $ unode "ncx" ! [("version","2005-1") ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $ [ unode "head" $ [ unode "meta" ! [("name","dtb:uid") - ,("content", show uuid)] $ () + ,("content", uuid)] $ () , unode "meta" ! [("name","dtb:depth") ,("content", "1")] $ () , unode "meta" ! [("name","dtb:totalPageCount") ,("content", "0")] $ () , unode "meta" ! [("name","dtb:maxPageNumber") ,("content", "0")] $ () - ] ++ case mbCoverImage of + ] ++ case epubCoverImage metadata of Nothing -> [] Just _ -> [unode "meta" ! [("name","cover"), ("content","cover-image")] $ ()] @@ -335,7 +573,7 @@ writeEPUB opts doc@(Pandoc meta _) = do (_:_) -> [unode "ol" ! [("class","toc")] $ subs] let navtag = if epub3 then "nav" else "div" - let navData = fromStringLazy $ ppTopElement $ + let navData = UTF8.fromStringLazy $ ppTopElement $ unode "html" ! [("xmlns","http://www.w3.org/1999/xhtml") ,("xmlns:epub","http://www.idpf.org/2007/ops")] $ [ unode "head" $ @@ -349,10 +587,10 @@ writeEPUB opts doc@(Pandoc meta _) = do let navEntry = mkEntry "nav.xhtml" navData -- mimetype - let mimetypeEntry = mkEntry "mimetype" $ fromStringLazy "application/epub+zip" + let mimetypeEntry = mkEntry "mimetype" $ UTF8.fromStringLazy "application/epub+zip" -- container.xml - let containerData = fromStringLazy $ ppTopElement $ + let containerData = UTF8.fromStringLazy $ ppTopElement $ unode "container" ! [("version","1.0") ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $ unode "rootfiles" $ @@ -361,18 +599,19 @@ writeEPUB opts doc@(Pandoc meta _) = do let containerEntry = mkEntry "META-INF/container.xml" containerData -- com.apple.ibooks.display-options.xml - let apple = fromStringLazy $ ppTopElement $ + let apple = UTF8.fromStringLazy $ ppTopElement $ unode "display_options" $ unode "platform" ! [("name","*")] $ unode "option" ! [("name","specified-fonts")] $ "true" let appleEntry = mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple -- stylesheet - stylesheet <- case writerEpubStylesheet opts of - Just s -> return s - Nothing -> toString `fmap` + stylesheet <- case epubStylesheet metadata of + Just (StylesheetPath fp) -> UTF8.readFile fp + Just (StylesheetContents s) -> return s + Nothing -> UTF8.toString `fmap` readDataFile (writerUserDataDir opts) "epub.css" - let stylesheetEntry = mkEntry "stylesheet.css" $ fromStringLazy stylesheet + let stylesheetEntry = mkEntry "stylesheet.css" $ UTF8.fromStringLazy stylesheet -- construct archive let archive = foldr addEntryToArchive emptyArchive @@ -381,33 +620,97 @@ writeEPUB opts doc@(Pandoc meta _) = do (picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries ++ fontEntries)) return $ fromArchive archive -metadataElement :: EPUBVersion -> String -> UUID -> String -> String -> [String] - -> String -> UTCTime -> Maybe a -> Element -metadataElement version metadataXML uuid lang title authors date currentTime mbCoverImage = - let userNodes = parseXML metadataXML - elt = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/") - ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ - filter isMetadataElement $ onlyElems userNodes - dublinElements = ["contributor","coverage","creator","date", - "description","format","identifier","language","publisher", - "relation","rights","source","subject","title","type"] - isMetadataElement e = (qPrefix (elName e) == Just "dc" && - qName (elName e) `elem` dublinElements) || - (qPrefix (elName e) == Nothing && - qName (elName e) `elem` ["link","meta"]) - contains e n = not (null (findElements (QName n Nothing (Just "dc")) e)) - newNodes = [ unode "dc:title" title | not (elt `contains` "title") ] ++ - [ unode "dc:language" lang | not (elt `contains` "language") ] ++ - [ unode "dc:identifier" ! [("id","BookId")] $ show uuid | - not (elt `contains` "identifier") ] ++ - [ unode "dc:creator" ! [("opf:role","aut") | version == EPUB2] - $ a | a <- authors, not (elt `contains` "creator") ] ++ - [ unode "dc:date" date | not (elt `contains` "date") ] ++ - [ unode "meta" ! [("property", "dcterms:modified")] $ - (showDateTimeISO8601 currentTime) | version == EPUB3] ++ - [ unode "meta" ! [("name","cover"), ("content","cover-image")] $ () | - not (isNothing mbCoverImage) ] - in elt{ elContent = elContent elt ++ map Elem newNodes } +metadataElement :: EPUBVersion -> EPUBMetadata -> UTCTime -> Element +metadataElement version md currentTime = + unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/") + ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ mdNodes + where mdNodes = identifierNodes ++ titleNodes ++ dateNodes ++ languageNodes + ++ creatorNodes ++ contributorNodes ++ subjectNodes + ++ descriptionNodes ++ typeNodes ++ formatNodes + ++ publisherNodes ++ sourceNodes ++ relationNodes + ++ coverageNodes ++ rightsNodes ++ coverImageNodes + ++ modifiedNodes + withIds base f = concat . zipWith f (map (\x -> base ++ ('-' : show x)) + ([1..] :: [Int])) + identifierNodes = withIds "epub-id" toIdentifierNode $ + epubIdentifier md + titleNodes = withIds "epub-title" toTitleNode $ epubTitle md + dateNodes = dcTag' "date" $ epubDate md + languageNodes = [dcTag "language" $ epubLanguage md] + creatorNodes = withIds "epub-creator" (toCreatorNode "creator") $ + epubCreator md + contributorNodes = withIds "epub-contributor" + (toCreatorNode "contributor") $ epubContributor md + subjectNodes = map (dcTag "subject") $ epubSubject md + descriptionNodes = maybe [] (dcTag' "description") $ epubDescription md + typeNodes = maybe [] (dcTag' "type") $ epubType md + formatNodes = maybe [] (dcTag' "format") $ epubFormat md + publisherNodes = maybe [] (dcTag' "publisher") $ epubPublisher md + sourceNodes = maybe [] (dcTag' "source") $ epubSource md + relationNodes = maybe [] (dcTag' "relation") $ epubRelation md + coverageNodes = maybe [] (dcTag' "coverage") $ epubCoverage md + rightsNodes = maybe [] (dcTag' "rights") $ epubRights md + coverImageNodes = maybe [] + (const $ [unode "meta" ! [("name","cover"), + ("content","cover-image")] $ ()]) + $ epubCoverImage md + modifiedNodes = [ unode "meta" ! [("property", "dcterms:modified")] $ + (showDateTimeISO8601 currentTime) | version == EPUB3 ] + dcTag n s = unode ("dc:" ++ n) s + dcTag' n s = [dcTag n s] + toIdentifierNode id' (Identifier txt scheme) + | version == EPUB2 = [dcNode "identifier" ! + ([("id",id')] ++ maybe [] (\x -> [("opf:scheme", x)]) scheme) $ + txt] + | otherwise = [dcNode "identifier" ! [("id",id')] $ txt] ++ + maybe [] (\x -> [unode "meta" ! + [("refines",'#':id'),("property","identifier-type"), + ("scheme","onix:codelist5")] $ x]) + (schemeToOnix `fmap` scheme) + toCreatorNode s id' creator + | version == EPUB2 = [dcNode s ! + ([("id",id')] ++ + maybe [] (\x -> [("opf:file-as",x)]) (creatorFileAs creator) ++ + maybe [] (\x -> [("opf:role",x)]) + (creatorRole creator >>= toRelator)) $ creatorText creator] + | otherwise = [dcNode s ! [("id",id')] $ creatorText creator] ++ + maybe [] (\x -> [unode "meta" ! + [("refines",'#':id'),("property","file-as")] $ x]) + (creatorFileAs creator) ++ + maybe [] (\x -> [unode "meta" ! + [("refines",'#':id'),("property","role"), + ("scheme","marc:relators")] $ x]) + (creatorRole creator >>= toRelator) + toTitleNode id' title + | version == EPUB2 = [dcNode "title" ! + ([("id",id')] ++ + maybe [] (\x -> [("opf:file-as",x)]) (titleFileAs title) ++ + maybe [] (\x -> [("opf:title-type",x)]) (titleType title)) $ + titleText title] + | otherwise = [dcNode "title" ! [("id",id')] $ titleText title] + ++ + maybe [] (\x -> [unode "meta" ! + [("refines",'#':id'),("property","file-as")] $ x]) + (titleFileAs title) ++ + maybe [] (\x -> [unode "meta" ! + [("refines",'#':id'),("property","title-type")] $ x]) + (titleType title) + schemeToOnix "ISBN-10" = "02" + schemeToOnix "GTIN-13" = "03" + schemeToOnix "UPC" = "04" + schemeToOnix "ISMN-10" = "05" + schemeToOnix "DOI" = "06" + schemeToOnix "LCCN" = "13" + schemeToOnix "GTIN-14" = "14" + schemeToOnix "ISBN-13" = "15" + schemeToOnix "Legal deposit number" = "17" + schemeToOnix "URN" = "22" + schemeToOnix "OCLC" = "23" + schemeToOnix "ISMN-13" = "25" + schemeToOnix "ISBN-A" = "26" + schemeToOnix "JP" = "27" + schemeToOnix "OLCC" = "28" + schemeToOnix _ = "01" showDateTimeISO8601 :: UTCTime -> String showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ" @@ -457,15 +760,9 @@ ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . unEntity (x:xs) = x : unEntity xs imageTypeOf :: FilePath -> Maybe String -imageTypeOf x = case drop 1 (map toLower (takeExtension x)) of - "jpg" -> Just "image/jpeg" - "jpeg" -> Just "image/jpeg" - "jfif" -> Just "image/jpeg" - "png" -> Just "image/png" - "gif" -> Just "image/gif" - "svg" -> Just "image/svg+xml" - _ -> Nothing - +imageTypeOf x = case getMimeType x of + Just y@('i':'m':'a':'g':'e':_) -> Just y + _ -> Nothing data IdentState = IdentState{ chapterNumber :: Int, @@ -519,3 +816,287 @@ replaceRefs refTable = walk replaceOneRef Just url -> Link lab (url,tit) Nothing -> x replaceOneRef x = x + +-- Variant of normalizeDate that allows partial dates: YYYY, YYYY-MM +normalizeDate' :: String -> Maybe String +normalizeDate' xs = + let xs' = trim xs in + case xs' of + [y1,y2,y3,y4] | all isDigit [y1,y2,y3,y4] -> Just xs' -- YYYY + [y1,y2,y3,y4,'-',m1,m2] | all isDigit [y1,y2,y3,y4,m1,m2] -- YYYY-MM + -> Just xs' + _ -> normalizeDate xs' + +toRelator :: String -> Maybe String +toRelator x + | x `elem` relators = Just x + | otherwise = lookup (map toLower x) relatorMap + +relators :: [String] +relators = map snd relatorMap + +relatorMap :: [(String, String)] +relatorMap = + [("abridger", "abr") + ,("actor", "act") + ,("adapter", "adp") + ,("addressee", "rcp") + ,("analyst", "anl") + ,("animator", "anm") + ,("annotator", "ann") + ,("appellant", "apl") + ,("appellee", "ape") + ,("applicant", "app") + ,("architect", "arc") + ,("arranger", "arr") + ,("art copyist", "acp") + ,("art director", "adi") + ,("artist", "art") + ,("artistic director", "ard") + ,("assignee", "asg") + ,("associated name", "asn") + ,("attributed name", "att") + ,("auctioneer", "auc") + ,("author", "aut") + ,("author in quotations or text abstracts", "aqt") + ,("author of afterword, colophon, etc.", "aft") + ,("author of dialog", "aud") + ,("author of introduction, etc.", "aui") + ,("autographer", "ato") + ,("bibliographic antecedent", "ant") + ,("binder", "bnd") + ,("binding designer", "bdd") + ,("blurb writer", "blw") + ,("book designer", "bkd") + ,("book producer", "bkp") + ,("bookjacket designer", "bjd") + ,("bookplate designer", "bpd") + ,("bookseller", "bsl") + ,("braille embosser", "brl") + ,("broadcaster", "brd") + ,("calligrapher", "cll") + ,("cartographer", "ctg") + ,("caster", "cas") + ,("censor", "cns") + ,("choreographer", "chr") + ,("cinematographer", "cng") + ,("client", "cli") + ,("collection registrar", "cor") + ,("collector", "col") + ,("collotyper", "clt") + ,("colorist", "clr") + ,("commentator", "cmm") + ,("commentator for written text", "cwt") + ,("compiler", "com") + ,("complainant", "cpl") + ,("complainant-appellant", "cpt") + ,("complainant-appellee", "cpe") + ,("composer", "cmp") + ,("compositor", "cmt") + ,("conceptor", "ccp") + ,("conductor", "cnd") + ,("conservator", "con") + ,("consultant", "csl") + ,("consultant to a project", "csp") + ,("contestant", "cos") + ,("contestant-appellant", "cot") + ,("contestant-appellee", "coe") + ,("contestee", "cts") + ,("contestee-appellant", "ctt") + ,("contestee-appellee", "cte") + ,("contractor", "ctr") + ,("contributor", "ctb") + ,("copyright claimant", "cpc") + ,("copyright holder", "cph") + ,("corrector", "crr") + ,("correspondent", "crp") + ,("costume designer", "cst") + ,("court governed", "cou") + ,("court reporter", "crt") + ,("cover designer", "cov") + ,("creator", "cre") + ,("curator", "cur") + ,("dancer", "dnc") + ,("data contributor", "dtc") + ,("data manager", "dtm") + ,("dedicatee", "dte") + ,("dedicator", "dto") + ,("defendant", "dfd") + ,("defendant-appellant", "dft") + ,("defendant-appellee", "dfe") + ,("degree granting institution", "dgg") + ,("delineator", "dln") + ,("depicted", "dpc") + ,("depositor", "dpt") + ,("designer", "dsr") + ,("director", "drt") + ,("dissertant", "dis") + ,("distribution place", "dbp") + ,("distributor", "dst") + ,("donor", "dnr") + ,("draftsman", "drm") + ,("dubious author", "dub") + ,("editor", "edt") + ,("editor of compilation", "edc") + ,("editor of moving image work", "edm") + ,("electrician", "elg") + ,("electrotyper", "elt") + ,("enacting jurisdiction", "enj") + ,("engineer", "eng") + ,("engraver", "egr") + ,("etcher", "etr") + ,("event place", "evp") + ,("expert", "exp") + ,("facsimilist", "fac") + ,("field director", "fld") + ,("film director", "fmd") + ,("film distributor", "fds") + ,("film editor", "flm") + ,("film producer", "fmp") + ,("filmmaker", "fmk") + ,("first party", "fpy") + ,("forger", "frg") + ,("former owner", "fmo") + ,("funder", "fnd") + ,("geographic information specialist", "gis") + ,("honoree", "hnr") + ,("host", "hst") + ,("host institution", "his") + ,("illuminator", "ilu") + ,("illustrator", "ill") + ,("inscriber", "ins") + ,("instrumentalist", "itr") + ,("interviewee", "ive") + ,("interviewer", "ivr") + ,("inventor", "inv") + ,("issuing body", "isb") + ,("judge", "jud") + ,("jurisdiction governed", "jug") + ,("laboratory", "lbr") + ,("laboratory director", "ldr") + ,("landscape architect", "lsa") + ,("lead", "led") + ,("lender", "len") + ,("libelant", "lil") + ,("libelant-appellant", "lit") + ,("libelant-appellee", "lie") + ,("libelee", "lel") + ,("libelee-appellant", "let") + ,("libelee-appellee", "lee") + ,("librettist", "lbt") + ,("licensee", "lse") + ,("licensor", "lso") + ,("lighting designer", "lgd") + ,("lithographer", "ltg") + ,("lyricist", "lyr") + ,("manufacture place", "mfp") + ,("manufacturer", "mfr") + ,("marbler", "mrb") + ,("markup editor", "mrk") + ,("metadata contact", "mdc") + ,("metal-engraver", "mte") + ,("moderator", "mod") + ,("monitor", "mon") + ,("music copyist", "mcp") + ,("musical director", "msd") + ,("musician", "mus") + ,("narrator", "nrt") + ,("onscreen presenter", "osp") + ,("opponent", "opn") + ,("organizer of meeting", "orm") + ,("originator", "org") + ,("other", "oth") + ,("owner", "own") + ,("panelist", "pan") + ,("papermaker", "ppm") + ,("patent applicant", "pta") + ,("patent holder", "pth") + ,("patron", "pat") + ,("performer", "prf") + ,("permitting agency", "pma") + ,("photographer", "pht") + ,("plaintiff", "ptf") + ,("plaintiff-appellant", "ptt") + ,("plaintiff-appellee", "pte") + ,("platemaker", "plt") + ,("praeses", "pra") + ,("presenter", "pre") + ,("printer", "prt") + ,("printer of plates", "pop") + ,("printmaker", "prm") + ,("process contact", "prc") + ,("producer", "pro") + ,("production company", "prn") + ,("production designer", "prs") + ,("production manager", "pmn") + ,("production personnel", "prd") + ,("production place", "prp") + ,("programmer", "prg") + ,("project director", "pdr") + ,("proofreader", "pfr") + ,("provider", "prv") + ,("publication place", "pup") + ,("publisher", "pbl") + ,("publishing director", "pbd") + ,("puppeteer", "ppt") + ,("radio director", "rdd") + ,("radio producer", "rpc") + ,("recording engineer", "rce") + ,("recordist", "rcd") + ,("redaktor", "red") + ,("renderer", "ren") + ,("reporter", "rpt") + ,("repository", "rps") + ,("research team head", "rth") + ,("research team member", "rtm") + ,("researcher", "res") + ,("respondent", "rsp") + ,("respondent-appellant", "rst") + ,("respondent-appellee", "rse") + ,("responsible party", "rpy") + ,("restager", "rsg") + ,("restorationist", "rsr") + ,("reviewer", "rev") + ,("rubricator", "rbr") + ,("scenarist", "sce") + ,("scientific advisor", "sad") + ,("screenwriter", "aus") + ,("scribe", "scr") + ,("sculptor", "scl") + ,("second party", "spy") + ,("secretary", "sec") + ,("seller", "sll") + ,("set designer", "std") + ,("setting", "stg") + ,("signer", "sgn") + ,("singer", "sng") + ,("sound designer", "sds") + ,("speaker", "spk") + ,("sponsor", "spn") + ,("stage director", "sgd") + ,("stage manager", "stm") + ,("standards body", "stn") + ,("stereotyper", "str") + ,("storyteller", "stl") + ,("supporting host", "sht") + ,("surveyor", "srv") + ,("teacher", "tch") + ,("technical director", "tcd") + ,("television director", "tld") + ,("television producer", "tlp") + ,("thesis advisor", "ths") + ,("transcriber", "trc") + ,("translator", "trl") + ,("type designer", "tyd") + ,("typographer", "tyg") + ,("university place", "uvp") + ,("videographer", "vdg") + ,("witness", "wit") + ,("wood engraver", "wde") + ,("woodcutter", "wdc") + ,("writer of accompanying material", "wam") + ,("writer of added commentary", "wac") + ,("writer of added lyrics", "wal") + ,("writer of added text", "wat") + ] + diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index adbe948be..803617f95 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -44,7 +44,7 @@ import qualified Text.XML.Light.Cursor as XC import Text.Pandoc.Definition import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..), def) -import Text.Pandoc.Shared (orderedListMarkers) +import Text.Pandoc.Shared (orderedListMarkers, isHeaderBlock) import Text.Pandoc.Walk -- | Data to be written at the end of the document: @@ -157,9 +157,7 @@ renderSection level (ttl, body) = do else cMapM blockToXml body return $ el "section" (title ++ content) where - hasSubsections = any isHeader - isHeader (Header _ _ _) = True - isHeader _ = False + hasSubsections = any isHeaderBlock -- | Only <p> and <empty-line> are allowed within <title> in FB2. formatTitle :: [Inline] -> [Content] diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 63b466af3..805bb57f1 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -39,13 +39,13 @@ import Text.Pandoc.Readers.TeXMath import Text.Pandoc.Slides import Text.Pandoc.Highlighting ( highlight, styleToCss, formatHtmlInline, formatHtmlBlock ) -import Text.Pandoc.XML (fromEntities) +import Text.Pandoc.XML (fromEntities, escapeStringForXML) import Network.HTTP ( urlEncode ) import Numeric ( showHex ) import Data.Char ( ord, toLower ) import Data.List ( isPrefixOf, intersperse ) import Data.String ( fromString ) -import Data.Maybe ( catMaybes ) +import Data.Maybe ( catMaybes, fromMaybe ) import Control.Monad.State import Text.Blaze.Html hiding(contents) import Text.Blaze.Internal(preEscapedString) @@ -115,9 +115,10 @@ pandocToHtml opts (Pandoc meta blocks) = do (fmap renderHtml . blockListToHtml opts) (fmap renderHtml . inlineListToHtml opts) meta - let authsMeta = map stringify $ docAuthors meta - let dateMeta = stringify $ docDate meta - let slideLevel = maybe (getSlideLevel blocks) id $ writerSlideLevel opts + let stringifyHTML = escapeStringForXML . stringify + let authsMeta = map stringifyHTML $ docAuthors meta + let dateMeta = stringifyHTML $ docDate meta + let slideLevel = fromMaybe (getSlideLevel blocks) $ writerSlideLevel opts let sects = hierarchicalize $ if writerSlideVariant opts == NoSlides then blocks @@ -143,8 +144,11 @@ pandocToHtml opts (Pandoc meta blocks) = do MathJax url -> H.script ! A.src (toValue url) ! A.type_ "text/javascript" - $ preEscapedString - "MathJax.Hub.Queue([\"Typeset\",MathJax.Hub]);" + $ case writerSlideVariant opts of + SlideousSlides -> + preEscapedString + "MathJax.Hub.Queue([\"Typeset\",MathJax.Hub]);" + _ -> mempty JsMath (Just url) -> H.script ! A.src (toValue url) ! A.type_ "text/javascript" @@ -168,7 +172,7 @@ pandocToHtml opts (Pandoc meta blocks) = do maybe id (defField "toc" . renderHtml) toc $ defField "author-meta" authsMeta $ maybe id (defField "date-meta") (normalizeDate dateMeta) $ - defField "pagetitle" (stringify $ docTitle meta) $ + defField "pagetitle" (stringifyHTML $ docTitle meta) $ defField "idprefix" (writerIdentifierPrefix opts) $ -- these should maybe be set in pandoc.hs defField "slidy-url" @@ -268,11 +272,23 @@ elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elemen else blockToHtml opts (Header level' (id',classes,keyvals) title') let isSec (Sec _ _ _ _ _) = True isSec (Blk _) = False + let isPause (Blk x) = x == Para [Str ".",Space,Str ".",Space,Str "."] + isPause _ = False + let fragmentClass = case writerSlideVariant opts of + RevealJsSlides -> "fragment" + _ -> "incremental" + let inDiv xs = Blk (RawBlock (Format "html") ("<div class=\"" + ++ fragmentClass ++ "\">")) : + (xs ++ [Blk (RawBlock (Format "html") "</div>")]) innerContents <- mapM (elementToHtml slideLevel opts) $ if titleSlide -- title slides have no content of their own then filter isSec elements - else elements + else if slide + then case splitBy isPause elements of + [] -> [] + (x:xs) -> x ++ concatMap inDiv xs + else elements let inNl x = mconcat $ nl opts : intersperse (nl opts) x ++ [nl opts] let classes' = ["titleslide" | titleSlide] ++ ["slide" | slide] ++ ["section" | (slide || writerSectionDivs opts) && @@ -401,16 +417,19 @@ blockToHtml opts (Para [Image txt (s,'f':'i':'g':':':tit)]) = do [nl opts, img, capt, nl opts] else H.div ! A.class_ "figure" $ mconcat [nl opts, img, capt, nl opts] --- . . . indicates a pause in a slideshow -blockToHtml opts (Para [Str ".",Space,Str ".",Space,Str "."]) - | writerSlideVariant opts == RevealJsSlides = - blockToHtml opts (RawBlock "html" "<div class=\"fragment\" />") blockToHtml opts (Para lst) = do contents <- inlineListToHtml opts lst return $ H.p contents -blockToHtml opts (Div attr bs) = do +blockToHtml opts (Div attr@(_,classes,_) bs) = do contents <- blockListToHtml opts bs - return $ addAttrs opts attr $ H.div $ nl opts >> contents >> nl opts + let contents' = nl opts >> contents >> nl opts + return $ + if "notes" `elem` classes + then case writerSlideVariant opts of + RevealJsSlides -> addAttrs opts attr $ H5.aside $ contents' + NoSlides -> addAttrs opts attr $ H.div $ contents' + _ -> mempty + else addAttrs opts attr $ H.div $ contents' blockToHtml _ (RawBlock f str) | f == Format "html" = return $ preEscapedString str | otherwise = return mempty @@ -456,28 +475,21 @@ blockToHtml opts (BlockQuote blocks) = else do contents <- blockListToHtml opts blocks return $ H.blockquote $ nl opts >> contents >> nl opts -blockToHtml opts (Header level (ident,_,_) lst) = do +blockToHtml opts (Header level (_,_,_) lst) = do contents <- inlineListToHtml opts lst secnum <- liftM stSecNum get let contents' = if writerNumberSections opts && not (null secnum) then (H.span ! A.class_ "header-section-number" $ toHtml $ showSecNum secnum) >> strToHtml " " >> contents else contents - let revealSlash = ['/' | writerSlideVariant opts == RevealJsSlides] - let contents'' = if writerTableOfContents opts && not (null ident) - then H.a ! A.href (toValue $ - '#' : revealSlash ++ - writerIdentifierPrefix opts ++ - ident) $ contents' - else contents' return $ case level of - 1 -> H.h1 contents'' - 2 -> H.h2 contents'' - 3 -> H.h3 contents'' - 4 -> H.h4 contents'' - 5 -> H.h5 contents'' - 6 -> H.h6 contents'' - _ -> H.p contents'' + 1 -> H.h1 contents' + 2 -> H.h2 contents' + 3 -> H.h3 contents' + 4 -> H.h4 contents' + 5 -> H.h5 contents' + 6 -> H.h6 contents' + _ -> H.p contents' blockToHtml opts (BulletList lst) = do contents <- mapM (blockListToHtml opts) lst return $ unordList opts contents @@ -505,7 +517,7 @@ blockToHtml opts (DefinitionList lst) = do contents <- mapM (\(term, defs) -> do term' <- if null term then return mempty - else liftM (H.dt) $ inlineListToHtml opts term + else liftM H.dt $ inlineListToHtml opts term defs' <- mapM ((liftM (\x -> H.dd $ (x >> nl opts))) . blockListToHtml opts) defs return $ mconcat $ nl opts : term' : nl opts : @@ -580,8 +592,7 @@ toListItem opts item = nl opts >> H.li item blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html blockListToHtml opts lst = - mapM (blockToHtml opts) lst >>= - return . mconcat . intersperse (nl opts) + fmap (mconcat . intersperse (nl opts)) $ mapM (blockToHtml opts) lst -- | Convert list of Pandoc inline elements to HTML. inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html @@ -595,8 +606,22 @@ inlineToHtml opts inline = (Str str) -> return $ strToHtml str (Space) -> return $ strToHtml " " (LineBreak) -> return $ if writerHtml5 opts then H5.br else H.br - (Span attr ils) -> inlineListToHtml opts ils >>= - return . addAttrs opts attr . H.span + (Span (id',classes,kvs) ils) + -> inlineListToHtml opts ils >>= + return . addAttrs opts attr' . H.span + where attr' = (id',classes',kvs') + classes' = filter (`notElem` ["csl-no-emph", + "csl-no-strong", + "csl-no-smallcaps"]) classes + kvs' = if null styles + then kvs + else (("style", concat styles) : kvs) + styles = ["font-style:normal;" + | "csl-no-emph" `elem` classes] + ++ ["font-weight:normal;" + | "csl-no-strong" `elem` classes] + ++ ["font-variant:normal;" + | "csl-no-smallcaps" `elem` classes] (Emph lst) -> inlineListToHtml opts lst >>= return . H.em (Strong lst) -> inlineListToHtml opts lst >>= return . H.strong (Code attr str) -> case hlCode of @@ -667,14 +692,14 @@ inlineToHtml opts inline = Right r -> return $ preEscapedString $ ppcElement conf r Left _ -> inlineListToHtml opts - (readTeXMath str) >>= return . + (readTeXMath' t str) >>= return . (H.span ! A.class_ "math") MathJax _ -> return $ H.span ! A.class_ "math" $ toHtml $ case t of InlineMath -> "\\(" ++ str ++ "\\)" DisplayMath -> "\\[" ++ str ++ "\\]" PlainMath -> do - x <- inlineListToHtml opts (readTeXMath str) + x <- inlineListToHtml opts (readTeXMath' t str) let m = H.span ! A.class_ "math" $ x let brtag = if writerHtml5 opts then H5.br else H.br return $ case t of @@ -724,7 +749,9 @@ inlineToHtml opts inline = else [A.title $ toValue tit]) return $ foldl (!) H5.embed attributes -- note: null title included, as in Markdown.pl - (Note contents) -> do + (Note contents) + | writerIgnoreNotes opts -> return mempty + | otherwise -> do st <- get let notes = stNotes st let number = (length notes) + 1 @@ -739,11 +766,11 @@ inlineToHtml opts inline = writerIdentifierPrefix opts ++ "fn" ++ ref) ! A.class_ "footnoteRef" ! prefixedId opts ("fnref" ++ ref) + $ H.sup $ toHtml ref - let link' = case writerEpubVersion opts of - Just EPUB3 -> link ! customAttribute "epub:type" "noteref" - _ -> link - return $ H.sup $ link' + return $ case writerEpubVersion opts of + Just EPUB3 -> link ! customAttribute "epub:type" "noteref" + _ -> link (Cite cits il)-> do contents <- inlineListToHtml opts il let citationIds = unwords $ map citationId cits let result = H.span ! A.class_ "citation" $ contents diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 37ca60ce3..a76d6d82b 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -36,10 +36,11 @@ import Text.Pandoc.Writers.Shared import Text.Pandoc.Options import Text.Pandoc.Templates import Text.Printf ( printf ) -import Network.URI ( isAbsoluteURI, unEscapeString ) +import Network.URI ( isURI, unEscapeString ) import Data.List ( (\\), isSuffixOf, isInfixOf, isPrefixOf, intercalate, intersperse ) -import Data.Char ( toLower, isPunctuation ) +import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, ord ) +import Data.Maybe ( fromMaybe ) import Control.Applicative ((<|>)) import Control.Monad.State import Text.Pandoc.Pretty @@ -50,6 +51,8 @@ import Text.Pandoc.Highlighting (highlight, styleToLaTeX, data WriterState = WriterState { stInNote :: Bool -- true if we're in a note + , stInMinipage :: Bool -- true if in minipage + , stNotes :: [Doc] -- notes in a minipage , stOLLevel :: Int -- level of ordered list nesting , stOptions :: WriterOptions -- writer options, so they don't have to be parameter , stVerbInNote :: Bool -- true if document has verbatim text in note @@ -70,7 +73,7 @@ data WriterState = writeLaTeX :: WriterOptions -> Pandoc -> String writeLaTeX options document = evalState (pandocToLaTeX options document) $ - WriterState { stInNote = False, + WriterState { stInNote = False, stInMinipage = False, stNotes = [], stOLLevel = 1, stOptions = options, stVerbInNote = False, stTable = False, stStrikeout = False, @@ -82,10 +85,17 @@ writeLaTeX options document = pandocToLaTeX :: WriterOptions -> Pandoc -> State WriterState String pandocToLaTeX options (Pandoc meta blocks) = do + -- Strip off final 'references' header if --natbib or --biblatex + let method = writerCiteMethod options + let blocks' = if method == Biblatex || method == Natbib + then case reverse blocks of + (Div (_,["references"],_) _):xs -> reverse xs + _ -> blocks + else blocks -- see if there are internal links let isInternalLink (Link _ ('#':xs,_)) = [xs] isInternalLink _ = [] - modify $ \s -> s{ stInternalLinks = query isInternalLink blocks } + modify $ \s -> s{ stInternalLinks = query isInternalLink blocks' } let template = writerTemplate options -- set stBook depending on documentclass let bookClasses = ["memoir","book","report","scrreprt","scrbook"] @@ -107,26 +117,28 @@ pandocToLaTeX options (Pandoc meta blocks) = do (fmap (render colwidth) . blockListToLaTeX) (fmap (render colwidth) . inlineListToLaTeX) meta - let (blocks', lastHeader) = if writerCiteMethod options == Citeproc then - (blocks, []) - else case last blocks of - Header 1 _ il -> (init blocks, il) - _ -> (blocks, []) - blocks'' <- if writerBeamer options - then toSlides blocks' - else return blocks' - body <- mapM (elementToLaTeX options) $ hierarchicalize blocks'' + let (blocks'', lastHeader) = if writerCiteMethod options == Citeproc then + (blocks', []) + else case last blocks' of + Header 1 _ il -> (init blocks', il) + _ -> (blocks', []) + blocks''' <- if writerBeamer options + then toSlides blocks'' + else return blocks'' + body <- mapM (elementToLaTeX options) $ hierarchicalize blocks''' (biblioTitle :: String) <- liftM (render colwidth) $ inlineListToLaTeX lastHeader let main = render colwidth $ vsep body st <- get + titleMeta <- stringToLaTeX TextString $ stringify $ docTitle meta + authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta let context = defField "toc" (writerTableOfContents options) $ defField "toc-depth" (show (writerTOCDepth options - if writerChapters options then 1 else 0)) $ defField "body" main $ - defField "title-meta" (stringify $ docTitle meta) $ - defField "author-meta" (intercalate "; " $ map stringify $ docAuthors meta) $ + defField "title-meta" titleMeta $ + defField "author-meta" (intercalate "; " authorsMeta) $ defField "documentclass" (if writerBeamer options then ("beamer" :: String) else if writerChapters options @@ -179,7 +191,7 @@ stringToLaTeX _ [] = return "" stringToLaTeX ctx (x:xs) = do opts <- gets stOptions rest <- stringToLaTeX ctx xs - let ligatures = writerTeXLigatures opts && not (ctx == CodeString) + let ligatures = writerTeXLigatures opts && (ctx /= CodeString) let isUrl = ctx == URLString when (x == '€') $ modify $ \st -> st{ stUsesEuro = True } @@ -215,6 +227,13 @@ stringToLaTeX ctx (x:xs) = do '\x2013' | ligatures -> "--" ++ rest _ -> x : rest +toLabel :: String -> String +toLabel [] = "" +toLabel (x:xs) + | (isLetter x || isDigit x) && isAscii x = x:toLabel xs + | elem x "-+=:;." = x:toLabel xs + | otherwise = "ux" ++ printf "%x" (ord x) ++ toLabel xs + -- | Puts contents into LaTeX command. inCmd :: String -> Doc -> Doc inCmd cmd contents = char '\\' <> text cmd <> braces contents @@ -222,7 +241,7 @@ inCmd cmd contents = char '\\' <> text cmd <> braces contents toSlides :: [Block] -> State WriterState [Block] toSlides bs = do opts <- gets stOptions - let slideLevel = maybe (getSlideLevel bs) id $ writerSlideLevel opts + let slideLevel = fromMaybe (getSlideLevel bs) $ writerSlideLevel opts let bs' = prepSlides slideLevel bs concat `fmap` (mapM (elementToBeamer slideLevel) $ hierarchicalize bs') @@ -278,7 +297,12 @@ isLineBreakOrSpace _ = False blockToLaTeX :: Block -- ^ Block to convert -> State WriterState Doc blockToLaTeX Null = return empty -blockToLaTeX (Div _ bs) = blockListToLaTeX bs +blockToLaTeX (Div (_,classes,_) bs) = do + beamer <- writerBeamer `fmap` gets stOptions + contents <- blockListToLaTeX bs + if beamer && "notes" `elem` classes -- speaker notes + then return $ "\\note" <> braces contents + else return contents blockToLaTeX (Plain lst) = inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst -- title beginning with fig: indicates that the image is a figure @@ -317,17 +341,23 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do | writerListings opts -> listingsCodeBlock | writerHighlight opts && not (null classes) -> highlightedCodeBlock | otherwise -> rawCodeBlock - where lhsCodeBlock = do + where ref = text $ toLabel identifier + linkAnchor = if null identifier + then empty + else "\\hyperdef{}" <> braces ref <> + braces ("\\label" <> braces ref) + lhsCodeBlock = do modify $ \s -> s{ stLHS = True } - return $ flush ("\\begin{code}" $$ text str $$ "\\end{code}") $$ cr + return $ flush (linkAnchor $$ "\\begin{code}" $$ text str $$ + "\\end{code}") $$ cr rawCodeBlock = do st <- get env <- if stInNote st then modify (\s -> s{ stVerbInNote = True }) >> return "Verbatim" else return "verbatim" - return $ flush (text ("\\begin{" ++ env ++ "}") $$ text str $$ - text ("\\end{" ++ env ++ "}")) <> cr + return $ flush (linkAnchor $$ text ("\\begin{" ++ env ++ "}") $$ + text str $$ text ("\\end{" ++ env ++ "}")) <> cr listingsCodeBlock = do st <- get let params = if writerListings (stOptions st) @@ -343,7 +373,7 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do (key,attr) <- keyvalAttr ] ++ (if identifier == "" then [] - else [ "label=" ++ identifier ]) + else [ "label=" ++ toLabel identifier ]) else [] printParams @@ -355,7 +385,7 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do case highlight formatLaTeXBlock ("",classes,keyvalAttr) str of Nothing -> rawCodeBlock Just h -> modify (\st -> st{ stHighlighting = True }) >> - return (flush $ text h) + return (flush $ linkAnchor $$ text h) blockToLaTeX (RawBlock f x) | f == Format "latex" || f == Format "tex" = return $ text x @@ -414,7 +444,7 @@ blockToLaTeX (DefinitionList lst) = do incremental <- gets stIncremental let inc = if incremental then "[<+->]" else "" items <- mapM defListItemToLaTeX lst - let spacing = if and $ map isTightList (map snd lst) + let spacing = if all isTightList (map snd lst) then text "\\itemsep1pt\\parskip0pt\\parsep0pt" else empty return $ text ("\\begin{description}" ++ inc) $$ spacing $$ vcat items $$ @@ -426,12 +456,12 @@ blockToLaTeX (Header level (id',classes,_) lst) = blockToLaTeX (Table caption aligns widths heads rows) = do headers <- if all null heads then return empty - else ($$ "\\hline\\noalign{\\medskip}") `fmap` + else ($$ "\\midrule\\endhead") `fmap` (tableRowToLaTeX True aligns widths) heads captionText <- inlineListToLaTeX caption let capt = if isEmpty captionText then empty - else text "\\noalign{\\medskip}" + else text "\\addlinespace" $$ text "\\caption" <> braces captionText rows' <- mapM (tableRowToLaTeX False aligns widths) rows let colDescriptors = text $ concat $ map toColDescriptor aligns @@ -439,10 +469,10 @@ blockToLaTeX (Table caption aligns widths heads rows) = do return $ "\\begin{longtable}[c]" <> braces ("@{}" <> colDescriptors <> "@{}") -- the @{} removes extra space at beginning and end - $$ "\\hline\\noalign{\\medskip}" + $$ "\\toprule\\addlinespace" $$ headers $$ vcat rows' - $$ "\\hline" + $$ "\\bottomrule" $$ capt $$ "\\end{longtable}" @@ -463,23 +493,42 @@ tableRowToLaTeX :: Bool -> [[Block]] -> State WriterState Doc tableRowToLaTeX header aligns widths cols = do - renderedCells <- mapM blockListToLaTeX cols - let valign = text $ if header then "[b]" else "[t]" - let halign x = case x of - AlignLeft -> "\\raggedright" - AlignRight -> "\\raggedleft" - AlignCenter -> "\\centering" - AlignDefault -> "\\raggedright" -- scale factor compensates for extra space between columns -- so the whole table isn't larger than columnwidth let scaleFactor = 0.97 ** fromIntegral (length aligns) - let toCell 0 _ c = c - toCell w a c = "\\begin{minipage}" <> valign <> - braces (text (printf "%.2f\\columnwidth" - (w * scaleFactor))) <> - (halign a <> cr <> c <> cr) <> "\\end{minipage}" - let cells = zipWith3 toCell widths aligns renderedCells - return $ hsep (intersperse "&" cells) $$ "\\\\\\noalign{\\medskip}" + let widths' = map (scaleFactor *) widths + cells <- mapM (tableCellToLaTeX header) $ zip3 widths' aligns cols + return $ hsep (intersperse "&" cells) $$ "\\\\\\addlinespace" + +tableCellToLaTeX :: Bool -> (Double, Alignment, [Block]) + -> State WriterState Doc +tableCellToLaTeX _ (0, _, blocks) = blockListToLaTeX blocks +tableCellToLaTeX header (width, align, blocks) = do + modify $ \st -> st{ stInMinipage = True, stNotes = [] } + cellContents <- blockListToLaTeX blocks + notes <- gets stNotes + modify $ \st -> st{ stInMinipage = False, stNotes = [] } + let valign = text $ if header then "[b]" else "[t]" + let halign = case align of + AlignLeft -> "\\raggedright" + AlignRight -> "\\raggedleft" + AlignCenter -> "\\centering" + AlignDefault -> "\\raggedright" + return $ ("\\begin{minipage}" <> valign <> + braces (text (printf "%.2f\\columnwidth" width)) <> + (halign <> cr <> cellContents <> cr) <> "\\end{minipage}") + $$ case notes of + [] -> empty + ns -> (case length ns of + n | n > 1 -> "\\addtocounter" <> + braces "footnote" <> + braces (text $ show $ 1 - n) + | otherwise -> empty) + $$ + vcat (intersperse + ("\\addtocounter" <> braces "footnote" <> braces "1") + $ map (\x -> "\\footnotetext" <> braces x) + $ reverse ns) listItemToLaTeX :: [Block] -> State WriterState Doc listItemToLaTeX lst = blockListToLaTeX lst >>= return . (text "\\item" $$) . @@ -488,8 +537,15 @@ listItemToLaTeX lst = blockListToLaTeX lst >>= return . (text "\\item" $$) . defListItemToLaTeX :: ([Inline], [[Block]]) -> State WriterState Doc defListItemToLaTeX (term, defs) = do term' <- inlineListToLaTeX term + -- put braces around term if it contains an internal link, + -- since otherwise we get bad bracket interactions: \item[\hyperref[..] + let isInternalLink (Link _ ('#':_,_)) = True + isInternalLink _ = False + let term'' = if any isInternalLink term + then braces term' + else term' def' <- liftM vsep $ mapM blockListToLaTeX defs - return $ "\\item" <> brackets term' $$ def' + return $ "\\item" <> brackets term'' $$ def' -- | Craft the section header, inserting the secton reference, if supplied. sectionHeader :: Bool -- True for unnumbered @@ -519,13 +575,13 @@ sectionHeader unnumbered ref level lst = do let refLabel x = (if ref `elem` internalLinks then text "\\hyperdef" <> braces empty - <> braces (text ref) + <> braces (text $ toLabel ref) <> braces x else x) let headerWith x y r = refLabel $ text x <> y <> if null r then empty - else text "\\label" <> braces (text r) + else text "\\label" <> braces (text $ toLabel r) let sectionType = case level' of 0 | writerBeamer opts -> "part" | otherwise -> "chapter" @@ -568,7 +624,16 @@ isQuoted _ = False -- | Convert inline element to LaTeX inlineToLaTeX :: Inline -- ^ Inline to convert -> State WriterState Doc -inlineToLaTeX (Span _ ils) = inlineListToLaTeX ils >>= return . braces +inlineToLaTeX (Span (_,classes,_) ils) = do + let noEmph = "csl-no-emph" `elem` classes + let noStrong = "csl-no-strong" `elem` classes + let noSmallCaps = "csl-no-smallcaps" `elem` classes + ((if noEmph then inCmd "textup" else id) . + (if noStrong then inCmd "textnormal" else id) . + (if noSmallCaps then inCmd "textnormal" else id) . + (if not (noEmph || noStrong || noSmallCaps) + then braces + else id)) `fmap` inlineListToLaTeX ils inlineToLaTeX (Emph lst) = inlineListToLaTeX lst >>= return . inCmd "emph" inlineToLaTeX (Strong lst) = @@ -646,7 +711,8 @@ inlineToLaTeX Space = return space inlineToLaTeX (Link txt ('#':ident, _)) = do contents <- inlineListToLaTeX txt ident' <- stringToLaTeX URLString ident - return $ text "\\hyperref" <> brackets (text ident') <> braces contents + return $ text "\\hyperref" <> brackets (text $ toLabel ident') <> + braces contents inlineToLaTeX (Link txt (src, _)) = case txt of [Str x] | x == src -> -- autolink @@ -659,20 +725,26 @@ inlineToLaTeX (Link txt (src, _)) = contents <> char '}' inlineToLaTeX (Image _ (source, _)) = do modify $ \s -> s{ stGraphics = True } - let source' = if isAbsoluteURI source + let source' = if isURI source then source else unEscapeString source source'' <- stringToLaTeX URLString source' return $ "\\includegraphics" <> braces (text source'') inlineToLaTeX (Note contents) = do + inMinipage <- gets stInMinipage modify (\s -> s{stInNote = True}) contents' <- blockListToLaTeX contents modify (\s -> s {stInNote = False}) let optnl = case reverse contents of (CodeBlock _ _ : _) -> cr _ -> empty - return $ "\\footnote" <> braces (nest 2 contents' <> optnl) - -- note: a \n before } needed when note ends with a Verbatim environment + let noteContents = nest 2 contents' <> optnl + modify $ \st -> st{ stNotes = noteContents : stNotes st } + return $ + if inMinipage + then "\\footnotemark{}" + -- note: a \n before } needed when note ends with a Verbatim environment + else "\\footnote" <> braces noteContents citationsToNatbib :: [Citation] -> State WriterState Doc citationsToNatbib (one:[]) @@ -693,9 +765,9 @@ citationsToNatbib cits | noPrefix (tail cits) && noSuffix (init cits) && ismode NormalCitation cits = citeCommand "citep" p s ks where - noPrefix = and . map (null . citationPrefix) - noSuffix = and . map (null . citationSuffix) - ismode m = and . map (((==) m) . citationMode) + noPrefix = all (null . citationPrefix) + noSuffix = all (null . citationSuffix) + ismode m = all (((==) m) . citationMode) p = citationPrefix $ head $ cits s = citationSuffix $ last $ cits ks = intercalate ", " $ map citationId cits diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 642a002d6..b31cc2b70 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -330,9 +330,10 @@ inlineToMan opts (Cite _ lst) = inlineToMan _ (Code _ str) = return $ text $ "\\f[C]" ++ escapeCode str ++ "\\f[]" inlineToMan _ (Str str) = return $ text $ escapeString str -inlineToMan opts (Math InlineMath str) = inlineListToMan opts $ readTeXMath str +inlineToMan opts (Math InlineMath str) = + inlineListToMan opts $ readTeXMath' InlineMath str inlineToMan opts (Math DisplayMath str) = do - contents <- inlineListToMan opts $ readTeXMath str + contents <- inlineListToMan opts $ readTeXMath' DisplayMath str return $ cr <> text ".RS" $$ contents $$ text ".RE" inlineToMan _ (RawInline f str) | f == Format "man" = return $ text str diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 23e730bf0..278e5cc9d 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -45,9 +45,9 @@ import Text.Pandoc.Pretty import Control.Monad.State import qualified Data.Set as Set import Text.Pandoc.Writers.HTML (writeHtmlString) -import Text.Pandoc.Readers.TeXMath (readTeXMath) +import Text.Pandoc.Readers.TeXMath (readTeXMath') import Text.HTML.TagSoup (renderTags, parseTags, isTagText, Tag(..)) -import Network.URI (isAbsoluteURI) +import Network.URI (isURI) import Data.Default import Data.Yaml (Value(Object,String,Array,Bool,Number)) import qualified Data.HashMap.Strict as H @@ -187,10 +187,11 @@ pandocToMarkdown opts (Pandoc meta blocks) = do then tableOfContents opts headerBlocks else empty -- Strip off final 'references' header if markdown citations enabled - let blocks' = case reverse blocks of - (Div (_,["references"],_) _):xs - | isEnabled Ext_citations opts -> reverse xs - _ -> blocks + let blocks' = if not isPlain && isEnabled Ext_citations opts + then case reverse blocks of + (Div (_,["references"],_) _):xs -> reverse xs + _ -> blocks + else blocks body <- blockListToMarkdown opts blocks' st <- get notes' <- notesToMarkdown opts (reverse $ stNotes st) @@ -337,7 +338,7 @@ blockToMarkdown opts (RawBlock f str) else return $ if isEnabled Ext_markdown_attribute opts then text (addMarkdownAttribute str) <> text "\n" else text str <> text "\n" - | f == "latex" || f == "tex" || f == "markdown" = do + | f `elem` ["latex", "tex", "markdown"] = do st <- get if stPlain st then return empty @@ -380,13 +381,11 @@ blockToMarkdown opts (CodeBlock (_,classes,_) str) isEnabled Ext_literate_haskell opts = return $ prefixed "> " (text str) <> blankline blockToMarkdown opts (CodeBlock attribs str) = return $ - case attribs of - x | x /= nullAttr && isEnabled Ext_fenced_code_blocks opts -> - tildes <> " " <> attrs <> cr <> text str <> - cr <> tildes <> blankline - (_,(cls:_),_) | isEnabled Ext_backtick_code_blocks opts -> - backticks <> " " <> text cls <> cr <> text str <> - cr <> backticks <> blankline + case attribs == nullAttr of + False | isEnabled Ext_backtick_code_blocks opts -> + backticks <> attrs <> cr <> text str <> cr <> backticks <> blankline + | isEnabled Ext_fenced_code_blocks opts -> + tildes <> attrs <> cr <> text str <> cr <> tildes <> blankline _ -> nest (writerTabStop opts) (text str) <> blankline where tildes = text $ case [ln | ln <- lines str, all (=='~') ln] of [] -> "~~~~" @@ -395,8 +394,10 @@ blockToMarkdown opts (CodeBlock attribs str) = return $ | otherwise -> replicate (n+1) '~' backticks = text "```" attrs = if isEnabled Ext_fenced_code_attributes opts - then nowrap $ attrsToMarkdown attribs - else empty + then nowrap $ " " <> attrsToMarkdown attribs + else case attribs of + (_,[cls],_) -> " " <> text cls + _ -> empty blockToMarkdown opts (BlockQuote blocks) = do st <- get -- if we're writing literate haskell, put a space before the bird tracks @@ -554,7 +555,14 @@ bulletListItemToMarkdown opts items = do contents <- blockListToMarkdown opts items let sps = replicate (writerTabStop opts - 2) ' ' let start = text ('-' : ' ' : sps) - return $ hang (writerTabStop opts) start $ contents <> cr + -- remove trailing blank line if it is a tight list + let contents' = case reverse items of + (BulletList xs:_) | isTightList xs -> + chomp contents <> cr + (OrderedList _ xs:_) | isTightList xs -> + chomp contents <> cr + _ -> contents + return $ hang (writerTabStop opts) start $ contents' <> cr -- | Convert ordered list item (a list of blocks) to markdown. orderedListItemToMarkdown :: WriterOptions -- ^ options @@ -620,10 +628,11 @@ getReference label (src, tit) = do Nothing -> do let label' = case find ((== label) . fst) (stRefs st) of Just _ -> -- label is used; generate numerical label - case find (\n -> not (any (== [Str (show n)]) - (map fst (stRefs st)))) [1..(10000 :: Integer)] of - Just x -> [Str (show x)] - Nothing -> error "no unique label" + case find (\n -> notElem [Str (show n)] + (map fst (stRefs st))) + [1..(10000 :: Integer)] of + Just x -> [Str (show x)] + Nothing -> error "no unique label" Nothing -> label modify (\s -> s{ stRefs = (label', (src,tit)) : stRefs st }) return label' @@ -641,8 +650,11 @@ escapeSpaces x = x -- | Convert Pandoc inline element to markdown. inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc inlineToMarkdown opts (Span attrs ils) = do + st <- get contents <- inlineListToMarkdown opts ils - return $ tagWithAttrs "span" attrs <> contents <> text "</span>" + return $ if stPlain st + then contents + else tagWithAttrs "span" attrs <> contents <> text "</span>" inlineToMarkdown opts (Emph lst) = do contents <- inlineListToMarkdown opts lst return $ "*" <> contents <> "*" @@ -696,7 +708,7 @@ inlineToMarkdown opts (Math InlineMath str) return $ "\\(" <> text str <> "\\)" | isEnabled Ext_tex_math_double_backslash opts = return $ "\\\\(" <> text str <> "\\\\)" - | otherwise = inlineListToMarkdown opts $ readTeXMath str + | otherwise = inlineListToMarkdown opts $ readTeXMath' InlineMath str inlineToMarkdown opts (Math DisplayMath str) | isEnabled Ext_tex_math_dollars opts = return $ "$$" <> text str <> "$$" @@ -705,7 +717,7 @@ inlineToMarkdown opts (Math DisplayMath str) | isEnabled Ext_tex_math_double_backslash opts = return $ "\\\\[" <> text str <> "\\\\]" | otherwise = (\x -> cr <> x <> cr) `fmap` - inlineListToMarkdown opts (readTeXMath str) + inlineListToMarkdown opts (readTeXMath' DisplayMath str) inlineToMarkdown opts (RawInline f str) | f == "html" || f == "markdown" || (isEnabled Ext_raw_tex opts && (f == "latex" || f == "tex")) = @@ -752,7 +764,7 @@ inlineToMarkdown opts (Link txt (src, tit)) = do then empty else text $ " \"" ++ tit ++ "\"" let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src - let useAuto = isAbsoluteURI src && + let useAuto = isURI src && case txt of [Str s] | escapeURI s == srcSuffix -> True _ -> False diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index 61741a61e..83fefaa29 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -51,7 +51,7 @@ data WriterState = WriterState { writeMediaWiki :: WriterOptions -> Pandoc -> String writeMediaWiki opts document = evalState (pandocToMediaWiki opts document) - (WriterState { stNotes = False, stListLevel = [], stUseTags = False }) + WriterState { stNotes = False, stListLevel = [], stUseTags = False } -- | Return MediaWiki representation of document. pandocToMediaWiki :: WriterOptions -> Pandoc -> State WriterState String diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index cc0a06243..c3652d65d 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -30,7 +30,10 @@ Conversion of 'Pandoc' documents to ODT. -} module Text.Pandoc.Writers.ODT ( writeODT ) where import Data.IORef -import Data.List ( isPrefixOf ) +import Data.List ( isPrefixOf, isSuffixOf ) +import Data.Maybe ( fromMaybe ) +import Text.XML.Light.Output +import Text.TeXMath import qualified Data.ByteString.Lazy as B import Text.Pandoc.UTF8 ( fromStringLazy ) import Codec.Archive.Zip @@ -40,13 +43,14 @@ import Text.Pandoc.ImageSize ( imageSize, sizeInPoints ) import Text.Pandoc.MIME ( getMimeType ) import Text.Pandoc.Definition import Text.Pandoc.Walk +import Text.Pandoc.Writers.Shared ( fixDisplayMath ) import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) import Control.Monad (liftM) import Text.Pandoc.XML import Text.Pandoc.Pretty import qualified Control.Exception as E import Data.Time.Clock.POSIX ( getPOSIXTime ) -import System.FilePath ( takeExtension ) +import System.FilePath ( takeExtension, takeDirectory ) -- | Produce an ODT file from a Pandoc document. writeODT :: WriterOptions -- ^ Writer options @@ -60,9 +64,9 @@ writeODT opts doc@(Pandoc meta _) = do Just f -> B.readFile f Nothing -> (B.fromChunks . (:[])) `fmap` readDataFile datadir "reference.odt" - -- handle pictures + -- handle formulas and pictures picEntriesRef <- newIORef ([] :: [Entry]) - doc' <- walkM (transformPic opts picEntriesRef) doc + doc' <- walkM (transformPicMath opts picEntriesRef) $ walk fixDisplayMath doc let newContents = writeOpenDocument opts{writerWrapText = False} doc' epochtime <- floor `fmap` getPOSIXTime let contentEntry = toEntry "content.xml" epochtime @@ -72,7 +76,11 @@ writeODT opts doc@(Pandoc meta _) = do $ contentEntry : picEntries -- construct META-INF/manifest.xml based on archive let toFileEntry fp = case getMimeType fp of - Nothing -> empty + Nothing -> if "Formula-" `isPrefixOf` fp && "/" `isSuffixOf` fp + then selfClosingTag "manifest:file-entry" + [("manifest:media-type","application/vnd.oasis.opendocument.formula") + ,("manifest:full-path",fp)] + else empty Just m -> selfClosingTag "manifest:file-entry" [("manifest:media-type", m) ,("manifest:full-path", fp) @@ -80,6 +88,8 @@ writeODT opts doc@(Pandoc meta _) = do ] let files = [ ent | ent <- filesInArchive archive, not ("META-INF" `isPrefixOf` ent) ] + let formulas = [ takeDirectory ent ++ "/" | ent <- filesInArchive archive, + "Formula-" `isPrefixOf` ent, takeExtension ent == ".xml" ] let manifestEntry = toEntry "META-INF/manifest.xml" epochtime $ fromStringLazy $ render Nothing $ text "<?xml version=\"1.0\" encoding=\"utf-8\"?>" @@ -91,6 +101,7 @@ writeODT opts doc@(Pandoc meta _) = do [("manifest:media-type","application/vnd.oasis.opendocument.text") ,("manifest:full-path","/")] $$ vcat ( map toFileEntry $ files ) + $$ vcat ( map toFileEntry $ formulas ) ) ) let archive' = addEntryToArchive manifestEntry archive @@ -118,8 +129,8 @@ writeODT opts doc@(Pandoc meta _) = do $ addEntryToArchive metaEntry archive' return $ fromArchive archive'' -transformPic :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline -transformPic opts entriesRef (Image lab (src,_)) = do +transformPicMath :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline +transformPicMath opts entriesRef (Image lab (src,_)) = do res <- fetchItem (writerSourceURL opts) src case res of Left (_ :: E.SomeException) -> do @@ -127,7 +138,7 @@ transformPic opts entriesRef (Image lab (src,_)) = do return $ Emph lab Right (img, _) -> do let size = imageSize img - let (w,h) = maybe (0,0) id $ sizeInPoints `fmap` size + let (w,h) = fromMaybe (0,0) $ sizeInPoints `fmap` size let tit' = show w ++ "x" ++ show h entries <- readIORef entriesRef let newsrc = "Pictures/" ++ show (length entries) ++ takeExtension src @@ -136,5 +147,29 @@ transformPic opts entriesRef (Image lab (src,_)) = do let entry = toEntry newsrc epochtime $ toLazy img modifyIORef entriesRef (entry:) return $ Image lab (newsrc, tit') -transformPic _ _ x = return x +transformPicMath _ entriesRef (Math t math) = do + entries <- readIORef entriesRef + let dt = if t == InlineMath then DisplayInline else DisplayBlock + case texMathToMathML dt math of + Left _ -> return $ Math t math + Right r -> do + let conf = useShortEmptyTags (const False) defaultConfigPP + let mathml = ppcTopElement conf r + epochtime <- floor `fmap` getPOSIXTime + let dirname = "Formula-" ++ show (length entries) ++ "/" + let fname = dirname ++ "content.xml" + let entry = toEntry fname epochtime (fromStringLazy mathml) + modifyIORef entriesRef (entry:) + return $ RawInline (Format "opendocument") $ render Nothing $ + inTags False "draw:frame" [("text:anchor-type", + if t == DisplayMath + then "paragraph" + else "as-char") + ,("style:vertical-pos", "middle") + ,("style:vertical-rel", "text")] $ + selfClosingTag "draw:object" [("xlink:href", dirname) + , ("xlink:type", "simple") + , ("xlink:show", "embed") + , ("xlink:actuate", "onLoad")] +transformPicMath _ _ x = return x diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 3ec5c2073..0029c3296 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -64,6 +64,7 @@ data WriterState = , stInDefinition :: Bool , stTight :: Bool , stFirstPara :: Bool + , stImageId :: Int } defaultWriterState :: WriterState @@ -78,6 +79,7 @@ defaultWriterState = , stInDefinition = False , stTight = False , stFirstPara = False + , stImageId = 1 } when :: Bool -> Doc -> Doc @@ -283,8 +285,12 @@ blocksToOpenDocument o b = vcat <$> mapM (blockToOpenDocument o) b -- | Convert a Pandoc block element to OpenDocument. blockToOpenDocument :: WriterOptions -> Block -> State WriterState Doc blockToOpenDocument o bs - | Plain b <- bs = inParagraphTags =<< inlinesToOpenDocument o b - | Para b <- bs = inParagraphTags =<< inlinesToOpenDocument o b + | Plain b <- bs = if null b + then return empty + else inParagraphTags =<< inlinesToOpenDocument o b + | Para b <- bs = if null b + then return empty + else inParagraphTags =<< inlinesToOpenDocument o b | Div _ xs <- bs = blocksToOpenDocument o xs | Header i _ b <- bs = setFirstPara >> (inHeaderTags i =<< inlinesToOpenDocument o b) @@ -296,8 +302,8 @@ blockToOpenDocument o bs | Table c a w h r <- bs = setFirstPara >> table c a w h r | HorizontalRule <- bs = setFirstPara >> return (selfClosingTag "text:p" [ ("text:style-name", "Horizontal_20_Line") ]) - | RawBlock f s <- bs = if f == "opendocument" - then preformatted s + | RawBlock f s <- bs = if f == Format "opendocument" + then return $ text s else return empty | Null <- bs = return empty | otherwise = return empty @@ -373,23 +379,27 @@ inlineToOpenDocument o ils | Subscript l <- ils = withTextStyle Sub $ inlinesToOpenDocument o l | SmallCaps l <- ils = withTextStyle SmallC $ inlinesToOpenDocument o l | Quoted t l <- ils = inQuotes t <$> inlinesToOpenDocument o l - | Code _ s <- ils = preformatted s - | Math _ s <- ils = inlinesToOpenDocument o (readTeXMath s) + | Code _ s <- ils = withTextStyle Pre $ inTextStyle $ preformatted s + | Math t s <- ils = inlinesToOpenDocument o (readTeXMath' t s) | Cite _ l <- ils = inlinesToOpenDocument o l - | RawInline f s <- ils = if f == "opendocument" || f == "html" - then preformatted s + | RawInline f s <- ils = if f == Format "opendocument" + then return $ text s else return empty | Link l (s,t) <- ils = mkLink s t <$> inlinesToOpenDocument o l - | Image _ (s,t) <- ils = return $ mkImg s t + | Image _ (s,t) <- ils = mkImg s t | Note l <- ils = mkNote l | otherwise = return empty where - preformatted = return . inSpanTags "Teletype" . handleSpaces . escapeStringForXML + preformatted s = handleSpaces $ escapeStringForXML s mkLink s t = inTags False "text:a" [ ("xlink:type" , "simple") , ("xlink:href" , s ) , ("office:name", t ) ] . inSpanTags "Definition" - mkImg s t = inTags False "draw:frame" (attrsFromTitle t) $ + mkImg s t = do + id' <- gets stImageId + modify (\st -> st{ stImageId = id' + 1 }) + return $ inTags False "draw:frame" + (("draw:name", "img" ++ show id'):attrsFromTitle t) $ selfClosingTag "draw:image" [ ("xlink:href" , s ) , ("xlink:type" , "simple") , ("xlink:show" , "embed" ) @@ -524,7 +534,8 @@ paraTableStyles t s (a:xs) [ ("fo:text-align", x) , ("style:justify-single-word", "false")] -data TextStyle = Italic | Bold | Strike | Sub | Sup | SmallC deriving ( Eq,Ord ) +data TextStyle = Italic | Bold | Strike | Sub | Sup | SmallC | Pre + deriving ( Eq,Ord ) textStyleAttr :: TextStyle -> [(String,String)] textStyleAttr s @@ -538,5 +549,8 @@ textStyleAttr s | Sub <- s = [("style:text-position" ,"sub 58%" )] | Sup <- s = [("style:text-position" ,"super 58%" )] | SmallC <- s = [("fo:font-variant" ,"small-caps")] + | Pre <- s = [("style:font-name" ,"Courier New") + ,("style:font-name-asian" ,"Courier New") + ,("style:font-name-complex" ,"Courier New")] | otherwise = [] diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 51083f52b..d318c5f6a 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -129,7 +129,7 @@ blockToOrg (Para inlines) = do blockToOrg (RawBlock "html" str) = return $ blankline $$ "#+BEGIN_HTML" $$ nest 2 (text str) $$ "#+END_HTML" $$ blankline -blockToOrg (RawBlock f str) | f == "org" || f == "latex" || f == "tex" = +blockToOrg (RawBlock f str) | f `elem` ["org", "latex", "tex"] = return $ text str blockToOrg (RawBlock _ _) = return empty blockToOrg HorizontalRule = return $ blankline $$ "--------------" $$ blankline diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 70c6b4421..37bb66632 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -38,7 +38,7 @@ import Text.Pandoc.Writers.Shared import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Builder (deleteMeta) import Data.List ( isPrefixOf, intersperse, transpose ) -import Network.URI (isAbsoluteURI) +import Network.URI (isURI) import Text.Pandoc.Pretty import Control.Monad.State import Control.Applicative ( (<$>) ) @@ -287,7 +287,7 @@ definitionListItemToRST (label, defs) = do label' <- inlineListToRST label contents <- liftM vcat $ mapM blockListToRST defs tabstop <- get >>= (return . writerTabStop . stOptions) - return $ label' $$ nest tabstop (contents <> cr) + return $ label' $$ nest tabstop (nestle contents <> cr) -- | Convert list of Pandoc block elements to RST. blockListToRST :: [Block] -- ^ List of block elements @@ -296,8 +296,14 @@ blockListToRST blocks = mapM blockToRST blocks >>= return . vcat -- | Convert list of Pandoc inline elements to RST. inlineListToRST :: [Inline] -> State WriterState Doc -inlineListToRST lst = mapM inlineToRST (insertBS lst) >>= return . hcat - where insertBS :: [Inline] -> [Inline] -- insert '\ ' where needed +inlineListToRST lst = + mapM inlineToRST (removeSpaceAfterDisplayMath $ insertBS lst) >>= return . hcat + where -- remove spaces after displaymath, as they screw up indentation: + removeSpaceAfterDisplayMath (Math DisplayMath x : zs) = + Math DisplayMath x : dropWhile (==Space) zs + removeSpaceAfterDisplayMath (x:xs) = x : removeSpaceAfterDisplayMath xs + removeSpaceAfterDisplayMath [] = [] + insertBS :: [Inline] -> [Inline] -- insert '\ ' where needed insertBS (x:y:z:zs) | isComplex y && surroundComplex x z = x : y : RawInline "rst" "\\ " : insertBS (z:zs) @@ -387,7 +393,7 @@ inlineToRST (LineBreak) = return cr -- there's no line break in RST (see Para) inlineToRST Space = return space -- autolink inlineToRST (Link [Str str] (src, _)) - | isAbsoluteURI src && + | isURI src && if "mailto:" `isPrefixOf` src then src == escapeURI ("mailto:" ++ str) else src == escapeURI str = do diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 0e8ce2ece..fb935fa6a 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -40,7 +40,7 @@ import Data.Char ( ord, chr, isDigit, toLower ) import System.FilePath ( takeExtension ) import qualified Data.ByteString as B import Text.Printf ( printf ) -import Network.URI ( isAbsoluteURI, unEscapeString ) +import Network.URI ( isURI, unEscapeString ) import qualified Control.Exception as E -- | Convert Image inlines into a raw RTF embedded image, read from a file. @@ -48,7 +48,7 @@ import qualified Control.Exception as E rtfEmbedImage :: Inline -> IO Inline rtfEmbedImage x@(Image _ (src,_)) = do let ext = map toLower (takeExtension src) - if ext `elem` [".jpg",".jpeg",".png"] && not (isAbsoluteURI src) + if ext `elem` [".jpg",".jpeg",".png"] && not (isURI src) then do let src' = unEscapeString src imgdata <- E.catch (B.readFile src') @@ -324,7 +324,7 @@ inlineToRTF (Quoted DoubleQuote lst) = "\\u8220\"" ++ (inlineListToRTF lst) ++ "\\u8221\"" inlineToRTF (Code _ str) = "{\\f1 " ++ (codeStringToRTF str) ++ "}" inlineToRTF (Str str) = stringToRTF str -inlineToRTF (Math _ str) = inlineListToRTF $ readTeXMath str +inlineToRTF (Math t str) = inlineListToRTF $ readTeXMath' t str inlineToRTF (Cite _ lst) = inlineListToRTF lst inlineToRTF (RawInline f str) | f == Format "rtf" = str diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index 89923822c..604aac1c9 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -34,6 +34,7 @@ module Text.Pandoc.Writers.Shared ( , setField , defField , tagWithAttrs + , fixDisplayMath ) where import Text.Pandoc.Definition @@ -46,6 +47,7 @@ import qualified Data.Map as M import qualified Data.Text as T import Data.Aeson (FromJSON(..), fromJSON, ToJSON (..), Value(Object), Result(..)) import qualified Data.Traversable as Traversable +import Data.List ( groupBy ) -- | Create JSON value for template from a 'Meta' and an association list -- of variables, specified at the command line or in the writer. @@ -65,8 +67,7 @@ metaToJSON opts blockWriter inlineWriter (Meta metamap) renderedMap <- Traversable.mapM (metaValueToJSON blockWriter inlineWriter) metamap - return $ M.foldWithKey (\key val obj -> defField key val obj) - baseContext renderedMap + return $ M.foldWithKey defField baseContext renderedMap | otherwise = return (Object H.empty) metaValueToJSON :: Monad m @@ -134,7 +135,31 @@ tagWithAttrs tag (ident,classes,kvs) = hsep ,if null classes then empty else "class=" <> doubleQuotes (text (unwords classes)) - ] - <> hsep (map (\(k,v) -> text k <> "=" <> + ,hsep (map (\(k,v) -> text k <> "=" <> doubleQuotes (text (escapeStringForXML v))) kvs) - <> ">" + ] <> ">" + +isDisplayMath :: Inline -> Bool +isDisplayMath (Math DisplayMath _) = True +isDisplayMath _ = False + +stripLeadingTrailingSpace :: [Inline] -> [Inline] +stripLeadingTrailingSpace = go . reverse . go . reverse + where go (Space:xs) = xs + go xs = xs + +-- Put display math in its own block (for ODT/DOCX). +fixDisplayMath :: Block -> Block +fixDisplayMath (Plain lst) + | any isDisplayMath lst && not (all isDisplayMath lst) = + -- chop into several paragraphs so each displaymath is its own + Div ("",["math"],[]) $ map (Plain . stripLeadingTrailingSpace) $ + groupBy (\x y -> (isDisplayMath x && isDisplayMath y) || + not (isDisplayMath x || isDisplayMath y)) lst +fixDisplayMath (Para lst) + | any isDisplayMath lst && not (all isDisplayMath lst) = + -- chop into several paragraphs so each displaymath is its own + Div ("",["math"],[]) $ map (Para . stripLeadingTrailingSpace) $ + groupBy (\x y -> (isDisplayMath x && isDisplayMath y) || + not (isDisplayMath x || isDisplayMath y)) lst +fixDisplayMath x = x diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index b1fd3d6af..bf3df8035 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -40,7 +40,7 @@ import Data.Ord ( comparing ) import Data.Char ( chr, ord ) import Control.Monad.State import Text.Pandoc.Pretty -import Network.URI ( isAbsoluteURI, unEscapeString ) +import Network.URI ( isURI, unEscapeString ) import System.FilePath data WriterState = @@ -293,7 +293,7 @@ blockListToTexinfo (x:xs) = do case x of Header level _ _ -> do -- We need need to insert a menu for this node. - let (before, after) = break isHeader xs + let (before, after) = break isHeaderBlock xs before' <- blockListToTexinfo before let menu = if level < 4 then collectNodes (level + 1) after @@ -315,10 +315,6 @@ blockListToTexinfo (x:xs) = do xs' <- blockListToTexinfo xs return $ x' $$ xs' -isHeader :: Block -> Bool -isHeader (Header _ _ _) = True -isHeader _ = False - collectNodes :: Int -> [Block] -> [Block] collectNodes _ [] = [] collectNodes level (x:xs) = @@ -448,7 +444,7 @@ inlineToTexinfo (Image alternate (source, _)) = do where ext = drop 1 $ takeExtension source' base = dropExtension source' - source' = if isAbsoluteURI source + source' = if isURI source then source else unEscapeString source diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index 7c102cc86..95aedf780 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -51,7 +51,7 @@ data WriterState = WriterState { writeTextile :: WriterOptions -> Pandoc -> String writeTextile opts document = evalState (pandocToTextile opts document) - (WriterState { stNotes = [], stListLevel = [], stUseTags = False }) + WriterState { stNotes = [], stListLevel = [], stUseTags = False } -- | Return Textile representation of document. pandocToTextile :: WriterOptions -> Pandoc -> State WriterState String |