diff options
Diffstat (limited to 'src/Text/Pandoc')
34 files changed, 2569 insertions, 388 deletions
diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index 9b0850efb..3c9623b3c 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -32,9 +32,14 @@ module Text.Pandoc.ImageSize ( ImageType(..), imageType, imageSize, sizeInPixels, sizeInPoints ) where import Data.ByteString (ByteString, unpack) import qualified Data.ByteString.Char8 as B +import qualified Data.ByteString.Lazy as BL +import Control.Applicative import Control.Monad import Data.Bits +import Data.Binary +import Data.Binary.Get import Text.Pandoc.Shared (safeRead) +import qualified Data.Map as M -- quick and dirty functions to get image sizes -- algorithms borrowed from wwwis.pl @@ -53,7 +58,8 @@ imageType :: ByteString -> Maybe ImageType imageType img = case B.take 4 img of "\x89\x50\x4e\x47" -> return Png "\x47\x49\x46\x38" -> return Gif - "\xff\xd8\xff\xe0" -> return Jpeg + "\xff\xd8\xff\xe0" -> return Jpeg -- JFIF + "\xff\xd8\xff\xe1" -> return Jpeg -- Exif "%PDF" -> return Pdf "%!PS" | (B.take 4 $ B.drop 1 $ B.dropWhile (/=' ') img) == "EPSF" @@ -139,8 +145,14 @@ gifSize img = do jpegSize :: ByteString -> Maybe ImageSize jpegSize img = do let (hdr, rest) = B.splitAt 4 img - guard $ hdr == "\xff\xd8\xff\xe0" guard $ B.length rest >= 14 + case hdr of + "\xff\xd8\xff\xe0" -> jfifSize rest + "\xff\xd8\xff\xe1" -> exifSize $ B.takeWhile (/= '\xff') rest + _ -> mzero + +jfifSize :: ByteString -> Maybe ImageSize +jfifSize rest = do let [dpiDensity,dpix1,dpix2,dpiy1,dpiy2] = map fromIntegral $ unpack $ B.take 5 $ B.drop 9 $ rest let factor = case dpiDensity of @@ -149,11 +161,11 @@ jpegSize img = do _ -> const 72 let dpix = factor (shift dpix1 8 + dpix2) let dpiy = factor (shift dpiy1 8 + dpiy2) - (w,h) <- findJpegSize rest + (w,h) <- findJfifSize rest return $ ImageSize { pxX = w, pxY = h, dpiX = dpix, dpiY = dpiy } -findJpegSize :: ByteString -> Maybe (Integer,Integer) -findJpegSize bs = do +findJfifSize :: ByteString -> Maybe (Integer,Integer) +findJfifSize bs = do let bs' = B.dropWhile (=='\xff') $ B.dropWhile (/='\xff') bs case B.uncons bs' of Just (c,bs'') | c >= '\xc0' && c <= '\xc3' -> do @@ -165,8 +177,217 @@ findJpegSize bs = do [c1,c2] -> do let len = shift c1 8 + c2 -- skip variables - findJpegSize $ B.drop len bs'' + findJfifSize $ B.drop len bs'' _ -> fail "JPEG parse error" Nothing -> fail "Did not find length record" +exifSize :: ByteString -> Maybe ImageSize +exifSize bs = runGet (Just <$> exifHeader bl) bl + where bl = BL.fromChunks [bs] +-- NOTE: It would be nicer to do +-- runGet ((Just <$> exifHeader) <|> return Nothing) +-- which would prevent pandoc from raising an error when an exif header can't +-- be parsed. But we only get an Alternative instance for Get in binary 0.6, +-- and binary 0.5 ships with ghc 7.6. + +exifHeader :: BL.ByteString -> Get ImageSize +exifHeader hdr = do + _app1DataSize <- getWord16be + exifHdr <- getWord32be + unless (exifHdr == 0x45786966) $ fail "Did not find exif header" + zeros <- getWord16be + unless (zeros == 0) $ fail "Expected zeros after exif header" + -- beginning of tiff header -- we read whole thing to use + -- in getting data from offsets: + let tiffHeader = BL.drop 8 hdr + byteAlign <- getWord16be + let bigEndian = byteAlign == 0x4d4d + let (getWord16, getWord32, getWord64) = + if bigEndian + then (getWord16be, getWord32be, getWord64be) + else (getWord16le, getWord32le, getWord64le) + let getRational = do + num <- getWord32 + den <- getWord32 + return $ fromIntegral num / fromIntegral den + tagmark <- getWord16 + unless (tagmark == 0x002a) $ fail "Failed alignment sanity check" + ifdOffset <- getWord32 + skip (fromIntegral ifdOffset - 8) -- skip to IDF + numentries <- getWord16 + let ifdEntry = do + tag <- getWord16 >>= \t -> + maybe (fail $ "Unknown tag type " ++ show t) return + (M.lookup t tagTypeTable) + dataFormat <- getWord16 + numComponents <- getWord32 + (fmt, bytesPerComponent) <- + case dataFormat of + 1 -> return (UnsignedByte . runGet getWord8, 1) + 2 -> return (AsciiString, 1) + 3 -> return (UnsignedShort . runGet getWord16, 2) + 4 -> return (UnsignedLong . runGet getWord32, 4) + 5 -> return (UnsignedRational . runGet getRational, 8) + 6 -> return (SignedByte . runGet getWord8, 1) + 7 -> return (Undefined . runGet getWord8, 1) + 8 -> return (SignedShort . runGet getWord16, 2) + 9 -> return (SignedLong . runGet getWord32, 4) + 10 -> return (SignedRational . runGet getRational, 8) + 11 -> return (SingleFloat . runGet getWord32 {- TODO -}, 4) + 12 -> return (DoubleFloat . runGet getWord64 {- TODO -}, 8) + _ -> fail $ "Unknown data format " ++ show dataFormat + let totalBytes = fromIntegral $ numComponents * bytesPerComponent + payload <- if totalBytes <= 4 -- data is right here + then fmt <$> + (getLazyByteString (fromIntegral totalBytes) <* + skip (4 - totalBytes)) + else do -- get data from offset + offs <- getWord32 + return $ fmt $ BL.take (fromIntegral totalBytes) $ + BL.drop (fromIntegral offs) tiffHeader + return (tag, payload) + entries <- sequence $ replicate (fromIntegral numentries) ifdEntry + subentries <- case lookup ExifOffset entries of + Just (UnsignedLong offset) -> do + pos <- bytesRead + skip (fromIntegral offset - (fromIntegral pos - 8)) + numsubentries <- getWord16 + sequence $ + replicate (fromIntegral numsubentries) ifdEntry + _ -> return [] + let allentries = entries ++ subentries + (width, height) <- case (lookup ExifImageWidth allentries, + lookup ExifImageHeight allentries) of + (Just (UnsignedLong w), Just (UnsignedLong h)) -> + return (fromIntegral w, fromIntegral h) + _ -> fail "Could not determine image width, height" + let resfactor = case lookup ResolutionUnit allentries of + Just (UnsignedShort 1) -> (100 / 254) + _ -> 1 + let xres = maybe 72 (\(UnsignedRational x) -> floor $ x * resfactor) + $ lookup XResolution allentries + let yres = maybe 72 (\(UnsignedRational x) -> floor $ x * resfactor) + $ lookup YResolution allentries + return $ ImageSize{ + pxX = width + , pxY = height + , dpiX = xres + , dpiY = yres } + +data DataFormat = UnsignedByte Word8 + | AsciiString BL.ByteString + | UnsignedShort Word16 + | UnsignedLong Word32 + | UnsignedRational Rational + | SignedByte Word8 + | Undefined Word8 + | SignedShort Word16 + | SignedLong Word32 + | SignedRational Rational + | SingleFloat Word32 + | DoubleFloat Word64 + deriving (Show) + +data TagType = ImageDescription + | Make + | Model + | Orientation + | XResolution + | YResolution + | ResolutionUnit + | Software + | DateTime + | WhitePoint + | PrimaryChromaticities + | YCbCrCoefficients + | YCbCrPositioning + | ReferenceBlackWhite + | Copyright + | ExifOffset + | ExposureTime + | FNumber + | ExposureProgram + | ISOSpeedRatings + | ExifVersion + | DateTimeOriginal + | DateTimeDigitized + | ComponentConfiguration + | CompressedBitsPerPixel + | ShutterSpeedValue + | ApertureValue + | BrightnessValue + | ExposureBiasValue + | MaxApertureValue + | SubjectDistance + | MeteringMode + | LightSource + | Flash + | FocalLength + | MakerNote + | UserComment + | FlashPixVersion + | ColorSpace + | ExifImageWidth + | ExifImageHeight + | RelatedSoundFile + | ExifInteroperabilityOffset + | FocalPlaneXResolution + | FocalPlaneYResolution + | FocalPlaneResolutionUnit + | SensingMethod + | FileSource + | SceneType + deriving (Show, Eq, Ord) +tagTypeTable :: M.Map Word16 TagType +tagTypeTable = M.fromList + [ (0x010e, ImageDescription) + , (0x010f, Make) + , (0x0110, Model) + , (0x0112, Orientation) + , (0x011a, XResolution) + , (0x011b, YResolution) + , (0x0128, ResolutionUnit) + , (0x0131, Software) + , (0x0132, DateTime) + , (0x013e, WhitePoint) + , (0x013f, PrimaryChromaticities) + , (0x0211, YCbCrCoefficients) + , (0x0213, YCbCrPositioning) + , (0x0214, ReferenceBlackWhite) + , (0x8298, Copyright) + , (0x8769, ExifOffset) + , (0x829a, ExposureTime) + , (0x829d, FNumber) + , (0x8822, ExposureProgram) + , (0x8827, ISOSpeedRatings) + , (0x9000, ExifVersion) + , (0x9003, DateTimeOriginal) + , (0x9004, DateTimeDigitized) + , (0x9101, ComponentConfiguration) + , (0x9102, CompressedBitsPerPixel) + , (0x9201, ShutterSpeedValue) + , (0x9202, ApertureValue) + , (0x9203, BrightnessValue) + , (0x9204, ExposureBiasValue) + , (0x9205, MaxApertureValue) + , (0x9206, SubjectDistance) + , (0x9207, MeteringMode) + , (0x9208, LightSource) + , (0x9209, Flash) + , (0x920a, FocalLength) + , (0x927c, MakerNote) + , (0x9286, UserComment) + , (0xa000, FlashPixVersion) + , (0xa001, ColorSpace) + , (0xa002, ExifImageWidth) + , (0xa003, ExifImageHeight) + , (0xa004, RelatedSoundFile) + , (0xa005, ExifInteroperabilityOffset) + , (0xa20e, FocalPlaneXResolution) + , (0xa20f, FocalPlaneYResolution) + , (0xa210, FocalPlaneResolutionUnit) + , (0xa217, SensingMethod) + , (0xa300, FileSource) + , (0xa301, SceneType) + ] diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs index 1f5f6f862..977cb576b 100644 --- a/src/Text/Pandoc/MIME.hs +++ b/src/Text/Pandoc/MIME.hs @@ -147,6 +147,7 @@ mimeTypesList = -- List borrowed from happstack-server. ,("dxr","application/x-director") ,("emb","chemical/x-embl-dl-nucleotide") ,("embl","chemical/x-embl-dl-nucleotide") + ,("emf","image/x-emf") ,("eml","message/rfc822") ,("ent","chemical/x-ncbi-asn1-ascii") ,("eot","application/vnd.ms-fontobject") @@ -220,6 +221,7 @@ mimeTypesList = -- List borrowed from happstack-server. ,("jnlp","application/x-java-jnlp-file") ,("jpe","image/jpeg") ,("jpeg","image/jpeg") + ,("jfif","image/jpeg") ,("jpg","image/jpeg") ,("js","application/x-javascript") ,("kar","audio/midi") @@ -244,6 +246,7 @@ mimeTypesList = -- List borrowed from happstack-server. ,("lzx","application/x-lzx") ,("m3u","audio/mpegurl") ,("m4a","audio/mpeg") + ,("m4v","video/x-m4v") ,("maker","application/x-maker") ,("man","application/x-troff-man") ,("mcif","chemical/x-mmcif") @@ -464,6 +467,7 @@ mimeTypesList = -- List borrowed from happstack-server. ,("wm","video/x-ms-wm") ,("wma","audio/x-ms-wma") ,("wmd","application/x-ms-wmd") + ,("wmf","image/x-wmf") ,("wml","text/vnd.wap.wml") ,("wmlc","application/vnd.wap.wmlc") ,("wmls","text/vnd.wap.wmlscript") diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 5f65abdde..38220f542 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -210,6 +210,7 @@ data ReaderOptions = ReaderOptions{ , readerIndentedCodeClasses :: [String] -- ^ Default classes for -- indented code blocks , readerDefaultImageExtension :: String -- ^ Default extension for images + , readerTrace :: Bool -- ^ Print debugging info } deriving (Show, Read) instance Default ReaderOptions @@ -225,6 +226,7 @@ instance Default ReaderOptions , readerApplyMacros = True , readerIndentedCodeClasses = [] , readerDefaultImageExtension = "" + , readerTrace = False } -- diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index e8683b98f..608cad2e9 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -41,6 +41,7 @@ import System.Directory import System.Environment import Control.Monad (unless) import Data.List (isInfixOf) +import Data.Maybe (fromMaybe) import qualified Data.ByteString.Base64 as B64 import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Definition @@ -50,6 +51,9 @@ import Text.Pandoc.Options (WriterOptions(..)) import Text.Pandoc.MIME (extensionFromMimeType) import Text.Pandoc.Process (pipeProcess) import qualified Data.ByteString.Lazy as BL +#ifdef _WINDOWS +import Data.List (intercalate) +#endif withTempDir :: String -> (FilePath -> IO a) -> IO a withTempDir = @@ -59,6 +63,11 @@ withTempDir = withSystemTempDirectory #endif +#ifdef _WINDOWS +changePathSeparators :: FilePath -> FilePath +changePathSeparators = intercalate "/" . splitDirectories +#endif + makePDF :: String -- ^ pdf creator (pdflatex, lualatex, xelatex) -> (WriterOptions -> Pandoc -> String) -- ^ writer -> WriterOptions -- ^ options @@ -87,7 +96,7 @@ handleImage' baseURL tmpdir (Image ils (src,tit)) = do res <- fetchItem baseURL src case res of Right (contents, Just mime) -> do - let ext = maybe (takeExtension src) id $ + let ext = fromMaybe (takeExtension src) $ extensionFromMimeType mime let basename = UTF8.toString $ B64.encode $ UTF8.fromString src let fname = tmpdir </> basename <.> ext @@ -107,7 +116,6 @@ tex2pdf' tmpDir program source = do then 3 -- to get page numbers else 2 -- 1 run won't give you PDF bookmarks (exit, log', mbPdf) <- runTeXProgram program numruns tmpDir source - let msg = "Error producing PDF from TeX source.\n" case (exit, mbPdf) of (ExitFailure _, _) -> do let logmsg = extractMsg log' @@ -116,8 +124,8 @@ tex2pdf' tmpDir program source = do x | "! Package inputenc Error" `BC.isPrefixOf` x -> "\nTry running pandoc with --latex-engine=xelatex." _ -> "" - return $ Left $ msg <> logmsg <> extramsg - (ExitSuccess, Nothing) -> return $ Left msg + return $ Left $ logmsg <> extramsg + (ExitSuccess, Nothing) -> return $ Left "" (ExitSuccess, Just pdf) -> return $ Right pdf (<>) :: ByteString -> ByteString -> ByteString @@ -145,10 +153,19 @@ runTeXProgram program runsLeft tmpDir source = do let file = tmpDir </> "input.tex" exists <- doesFileExist file unless exists $ UTF8.writeFile file source +#ifdef _WINDOWS + -- note: we want / even on Windows, for TexLive + let tmpDir' = changePathSeparators tmpDir + let file' = changePathSeparators file +#else + let tmpDir' = tmpDir + let file' = file +#endif let programArgs = ["-halt-on-error", "-interaction", "nonstopmode", - "-output-directory", tmpDir, file] + "-output-directory", tmpDir', file'] env' <- getEnvironment - let texinputs = maybe (tmpDir ++ ":") ((tmpDir ++ ":") ++) + let sep = searchPathSeparator:[] + let texinputs = maybe (tmpDir' ++ sep) ((tmpDir' ++ sep) ++) $ lookup "TEXINPUTS" env' let env'' = ("TEXINPUTS", texinputs) : [(k,v) | (k,v) <- env', k /= "TEXINPUTS"] diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 9687d7712..883a560d0 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -48,6 +48,8 @@ module Text.Pandoc.Parsing ( (>>~), romanNumeral, emailAddress, uri, + mathInline, + mathDisplay, withHorizDisplacement, withRaw, escaped, @@ -172,13 +174,13 @@ import Text.Pandoc.Asciify (toAsciiChar) import Data.Default import qualified Data.Set as Set import Control.Monad.Reader -import Control.Applicative ((*>), (<*), (<$), liftA2) +import Control.Applicative ((*>), (<*), (<$), liftA2, Applicative) import Data.Monoid import Data.Maybe (catMaybes) type Parser t s = Parsec t s -newtype F a = F { unF :: Reader ParserState a } deriving (Monad, Functor) +newtype F a = F { unF :: Reader ParserState a } deriving (Monad, Applicative, Functor) runF :: F a -> ParserState -> a runF = runReader . unF @@ -269,7 +271,7 @@ spaceChar = satisfy $ \c -> c == ' ' || c == '\t' -- | Parses a nonspace, nonnewline character. nonspaceChar :: Parser [Char] st Char -nonspaceChar = satisfy $ \x -> x /= '\t' && x /= '\n' && x /= ' ' && x /= '\r' +nonspaceChar = satisfy $ flip notElem ['\t', '\n', ' ', '\r'] -- | Skips zero or more spaces or tabs. skipSpaces :: Parser [Char] st () @@ -455,6 +457,39 @@ uri = try $ do let uri' = scheme ++ ":" ++ fromEntities str' return (uri', escapeURI uri') +mathInlineWith :: String -> String -> Parser [Char] st String +mathInlineWith op cl = try $ do + string op + notFollowedBy space + words' <- many1Till (count 1 (noneOf "\n\\") + <|> (char '\\' >> anyChar >>= \c -> return ['\\',c]) + <|> count 1 newline <* notFollowedBy' blankline + *> return " ") + (try $ string cl) + notFollowedBy digit -- to prevent capture of $5 + return $ concat words' + +mathDisplayWith :: String -> String -> Parser [Char] st String +mathDisplayWith op cl = try $ do + string op + many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string cl) + +mathDisplay :: Parser [Char] ParserState String +mathDisplay = + (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$") + <|> (guardEnabled Ext_tex_math_single_backslash >> + mathDisplayWith "\\[" "\\]") + <|> (guardEnabled Ext_tex_math_double_backslash >> + mathDisplayWith "\\\\[" "\\\\]") + +mathInline :: Parser [Char] ParserState String +mathInline = + (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$") + <|> (guardEnabled Ext_tex_math_single_backslash >> + mathInlineWith "\\(" "\\)") + <|> (guardEnabled Ext_tex_math_double_backslash >> + mathInlineWith "\\\\(" "\\\\)") + -- | Applies a parser, returns tuple of its results and its horizontal -- displacement (the difference between the source column at the end -- and the source column at the beginning). Vertical displacement @@ -818,6 +853,11 @@ data ParserState = ParserState stateHasChapters :: Bool, -- ^ True if \chapter encountered stateMacros :: [Macro], -- ^ List of macros defined so far stateRstDefaultRole :: String, -- ^ Current rST default interpreted text role + stateRstCustomRoles :: M.Map String (String, Maybe String, Attr -> (String, Attr)), -- ^ Current rST custom text roles + -- Triple represents: 1) Base role, 2) Optional format (only for :raw: + -- roles), 3) Source language annotation for code (could be used to + -- annotate role classes too). + stateWarnings :: [String] -- ^ Warnings generated by the parser } @@ -880,6 +920,7 @@ defaultParserState = stateHasChapters = False, stateMacros = [], stateRstDefaultRole = "title-reference", + stateRstCustomRoles = M.empty, stateWarnings = []} getOption :: (ReaderOptions -> a) -> Parser s ParserState a @@ -1027,7 +1068,7 @@ doubleQuoteStart :: Parser [Char] ParserState () doubleQuoteStart = do failIfInQuoteContext InDoubleQuote try $ do charOrRef "\"\8220\147" - notFollowedBy (satisfy (\c -> c == ' ' || c == '\t' || c == '\n')) + notFollowedBy . satisfy $ flip elem [' ', '\t', '\n'] doubleQuoteEnd :: Parser [Char] st () doubleQuoteEnd = do diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs index faf2a6797..5331587ce 100644 --- a/src/Text/Pandoc/Pretty.hs +++ b/src/Text/Pandoc/Pretty.hs @@ -60,6 +60,7 @@ module Text.Pandoc.Pretty ( , hsep , vcat , vsep + , nestle , chomp , inside , braces @@ -72,7 +73,7 @@ module Text.Pandoc.Pretty ( ) where -import Data.Sequence (Seq, fromList, (<|), singleton, mapWithIndex) +import Data.Sequence (Seq, fromList, (<|), singleton, mapWithIndex, viewl, ViewL(..)) import Data.Foldable (toList) import Data.List (intercalate) import Data.Monoid @@ -80,8 +81,7 @@ import Data.String import Control.Monad.State import Data.Char (isSpace) -data Monoid a => - RenderState a = RenderState{ +data RenderState a = RenderState{ output :: [a] -- ^ In reverse order , prefix :: String , usePrefix :: Bool @@ -186,6 +186,14 @@ vcat = foldr ($$) empty vsep :: [Doc] -> Doc vsep = foldr ($+$) empty +-- | Removes leading blank lines from a 'Doc'. +nestle :: Doc -> Doc +nestle (Doc d) = Doc $ go d + where go x = case viewl x of + (BlankLine :< rest) -> go rest + (NewLine :< rest) -> go rest + _ -> x + -- | Chomps trailing blank space off of a 'Doc'. chomp :: Doc -> Doc chomp d = Doc (fromList dl') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index fc29988d5..56cb16b20 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -12,6 +12,7 @@ import Data.Char (isSpace) import Control.Monad.State import Control.Applicative ((<$>)) import Data.List (intersperse) +import Data.Maybe (fromMaybe) {- @@ -683,7 +684,7 @@ parseBlock (Elem e) = "lowerroman" -> LowerRoman "upperroman" -> UpperRoman _ -> Decimal - let start = maybe 1 id $ + let start = fromMaybe 1 $ (attrValue "override" <$> filterElement (named "listitem") e) >>= safeRead orderedListWith (start,listStyle,DefaultDelim) @@ -779,7 +780,7 @@ parseBlock (Elem e) = caption <- case filterChild isCaption e of Just t -> getInlines t Nothing -> return mempty - let e' = maybe e id $ filterChild (named "tgroup") e + let e' = fromMaybe e $ filterChild (named "tgroup") e let isColspec x = named "colspec" x || named "col" x let colspecs = case filterChild (named "colgroup") e' of Just c -> filterChildren isColspec c @@ -801,12 +802,14 @@ parseBlock (Elem e) = Just "center" -> AlignCenter _ -> AlignDefault let toWidth c = case findAttr (unqual "colwidth") c of - Just w -> maybe 0 id + Just w -> fromMaybe 0 $ safeRead $ '0': filter (\x -> (x >= '0' && x <= '9') || x == '.') w Nothing -> 0 :: Double - let numrows = maximum $ map length bodyrows + let numrows = case bodyrows of + [] -> 0 + xs -> maximum $ map length xs let aligns = case colspecs of [] -> replicate numrows AlignDefault cs -> map toAlignment cs diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index d691c9878..d1e4d0024 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -76,9 +76,18 @@ pBody :: TagParser [Block] pBody = pInTags "body" block pHead :: TagParser [Block] -pHead = pInTags "head" $ pTitle <|> ([] <$ pAnyTag) +pHead = pInTags "head" $ pTitle <|> pMetaTag <|> ([] <$ pAnyTag) where pTitle = pInTags "title" inline >>= setTitle . normalizeSpaces setTitle t = [] <$ (updateState $ B.setMeta "title" (B.fromList t)) + pMetaTag = do + mt <- pSatisfy (~== TagOpen "meta" []) + let name = fromAttrib "name" mt + if null name + then return [] + else do + let content = fromAttrib "content" mt + updateState $ B.setMeta name (B.text content) + return [] block :: TagParser [Block] block = choice @@ -207,7 +216,7 @@ pHeader = try $ do let bodyTitle = TagOpen tagtype attr ~== TagOpen "h1" [("class","title")] let level = read (drop 1 tagtype) contents <- liftM concat $ manyTill inline (pCloses tagtype <|> eof) - let ident = maybe "" id $ lookup "id" attr + let ident = fromMaybe "" $ lookup "id" attr let classes = maybe [] words $ lookup "class" attr let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"] return $ if bodyTitle @@ -257,7 +266,7 @@ pCol = try $ do skipMany pBlank return $ case lookup "width" attribs of Just x | not (null x) && last x == '%' -> - maybe 0.0 id $ safeRead ('0':'.':init x) + fromMaybe 0.0 $ safeRead ('0':'.':init x) _ -> 0.0 pColgroup :: TagParser [Double] @@ -467,7 +476,13 @@ pBlank = try $ do pTagContents :: Parser [Char] ParserState Inline pTagContents = - pStr <|> pSpace <|> smartPunctuation pTagContents <|> pSymbol <|> pBad + Math DisplayMath `fmap` mathDisplay + <|> Math InlineMath `fmap` mathInline + <|> pStr + <|> pSpace + <|> smartPunctuation pTagContents + <|> pSymbol + <|> pBad pStr :: Parser [Char] ParserState Inline pStr = do @@ -482,6 +497,7 @@ isSpecial '"' = True isSpecial '\'' = True isSpecial '.' = True isSpecial '-' = True +isSpecial '$' = True isSpecial '\8216' = True isSpecial '\8217' = True isSpecial '\8220' = True diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 75e29ebb9..51271edc5 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -38,7 +38,8 @@ import Text.Pandoc.Definition import Text.Pandoc.Walk import Text.Pandoc.Shared import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding ((<|>), many, optional, space) +import Text.Pandoc.Parsing hiding ((<|>), many, optional, space, + mathDisplay, mathInline) import qualified Text.Pandoc.UTF8 as UTF8 import Data.Char ( chr, ord ) import Control.Monad @@ -172,6 +173,8 @@ double_quote :: LP Inlines double_quote = ( quoted' doubleQuoted (try $ string "``") (void $ try $ string "''") <|> quoted' doubleQuoted (string "“") (void $ char '”') + -- the following is used by babel for localized quotes: + <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'") <|> quoted' doubleQuoted (string "\"") (void $ char '"') ) @@ -454,6 +457,7 @@ inlineCommands = M.fromList $ , ("footnote", (note . mconcat) <$> (char '{' *> manyTill block (char '}'))) , ("verb", doverb) , ("lstinline", doverb) + , ("Verb", doverb) , ("texttt", (code . stringify . toList) <$> tok) , ("url", (unescapeURL <$> braced) >>= \url -> pure (link url "" (str url))) @@ -870,9 +874,8 @@ verbatimEnv = do (_,r) <- withRaw $ do controlSeq "begin" name <- braced - guard $ name == "verbatim" || name == "Verbatim" || - name == "lstlisting" || name == "minted" || - name == "alltt" + guard $ name `elem` ["verbatim", "Verbatim", "lstlisting", + "minted", "alltt"] verbEnv name rest <- getInput return (r,rest) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 33d1a9620..aa0252266 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -60,6 +60,8 @@ import System.FilePath (takeExtension, addExtension) import Text.HTML.TagSoup import Text.HTML.TagSoup.Match (tagOpen) import qualified Data.Set as Set +import Text.Printf (printf) +import Debug.Trace (trace) type MarkdownParser = Parser [Char] ParserState @@ -215,10 +217,10 @@ pandocTitleBlock = try $ do author' <- author date' <- date return $ - ( if B.isNull title' then id else B.setMeta "title" title' - . if null author' then id else B.setMeta "author" author' - . if B.isNull date' then id else B.setMeta "date" date' ) - nullMeta + (if B.isNull title' then id else B.setMeta "title" title') + . (if null author' then id else B.setMeta "author" author') + . (if B.isNull date' then id else B.setMeta "date" date') + $ nullMeta updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' } yamlMetaBlock :: MarkdownParser (F Blocks) @@ -354,7 +356,7 @@ referenceKey = try $ do tit <- option "" referenceTitle -- currently we just ignore MMD-style link/image attributes _kvs <- option [] $ guardEnabled Ext_link_attributes - >> many (spnl >> keyValAttr) + >> many (try $ spnl >> keyValAttr) blanklines let target = (escapeURI $ trimr src, tit) st <- getState @@ -440,7 +442,10 @@ parseBlocks :: MarkdownParser (F Blocks) parseBlocks = mconcat <$> manyTill block eof block :: MarkdownParser (F Blocks) -block = choice [ mempty <$ blanklines +block = do + tr <- getOption readerTrace + pos <- getPosition + res <- choice [ mempty <$ blanklines , codeBlockFenced , yamlMetaBlock , guardEnabled Ext_latex_macros *> (macro >>= return . return) @@ -465,6 +470,11 @@ block = choice [ mempty <$ blanklines , para , plain ] <?> "block" + when tr $ do + st <- getState + trace (printf "line %d: %s" (sourceLine pos) + (take 60 $ show $ B.toList $ runF res st)) (return ()) + return res -- -- header blocks @@ -730,9 +740,14 @@ listStart = bulletListStart <|> (anyOrderedListStart >> return ()) listLine :: MarkdownParser String listLine = try $ do notFollowedBy' (do indentSpaces - many (spaceChar) + many spaceChar listStart) - chunks <- manyTill (liftM snd (htmlTag isCommentTag) <|> count 1 anyChar) newline + notFollowedBy' $ htmlTag (~== TagClose "div") + chunks <- manyTill + ( many1 (satisfy $ \c -> c /= '\n' && c /= '<') + <|> liftM snd (htmlTag isCommentTag) + <|> count 1 anyChar + ) newline return $ concat chunks -- parse raw text for one list item, excluding start marker and continuations @@ -759,6 +774,7 @@ listContinuationLine :: MarkdownParser String listContinuationLine = try $ do notFollowedBy blankline notFollowedBy' listStart + notFollowedBy' $ htmlTag (~== TagClose "div") optional indentSpaces result <- anyLine return $ result ++ "\n" @@ -783,8 +799,8 @@ listItem start = try $ do orderedList :: MarkdownParser (F Blocks) orderedList = try $ do (start, style, delim) <- lookAhead anyOrderedListStart - unless ((style == DefaultStyle || style == Decimal || style == Example) && - (delim == DefaultDelim || delim == Period)) $ + unless (style `elem` [DefaultStyle, Decimal, Example] && + delim `elem` [DefaultDelim, Period]) $ guardEnabled Ext_fancy_lists when (style == Example) $ guardEnabled Ext_example_lists items <- fmap sequence $ many1 $ listItem @@ -896,7 +912,9 @@ plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline -- htmlElement :: MarkdownParser String -htmlElement = strictHtmlBlock <|> liftM snd (htmlTag isBlockTag) +htmlElement = rawVerbatimBlock + <|> strictHtmlBlock + <|> liftM snd (htmlTag isBlockTag) htmlBlock :: MarkdownParser (F Blocks) htmlBlock = do @@ -917,8 +935,8 @@ strictHtmlBlock = htmlInBalanced (not . isInlineTag) rawVerbatimBlock :: MarkdownParser String rawVerbatimBlock = try $ do - (TagOpen tag _, open) <- htmlTag (tagOpen (\t -> - t == "pre" || t == "style" || t == "script") + (TagOpen tag _, open) <- htmlTag (tagOpen (flip elem + ["pre", "style", "script"]) (const True)) contents <- manyTill anyChar (htmlTag (~== TagClose tag)) return $ open ++ contents ++ renderTags [TagClose tag] @@ -936,6 +954,8 @@ rawHtmlBlocks = do htmlBlocks <- many1 $ try $ do s <- rawVerbatimBlock <|> try ( do (t,raw) <- htmlTag isBlockTag + guard $ t ~/= TagOpen "div" [] && + t ~/= TagClose "div" exts <- getOption readerExtensions -- if open tag, need markdown="1" if -- markdown_attributes extension is set @@ -1118,12 +1138,12 @@ multilineTableHeader headless = try $ do then liftM (map (:[]) . tail . splitStringByIndices (init indices)) $ lookAhead anyLine else return $ transpose $ map - (\ln -> tail $ splitStringByIndices (init indices) ln) + (tail . splitStringByIndices (init indices)) rawContent let aligns = zipWith alignType rawHeadsList lengths let rawHeads = if headless then replicate (length dashes) "" - else map unwords rawHeadsList + else map (unlines . map trim) rawHeadsList heads <- fmap sequence $ mapM (parseFromString (mconcat <$> many plain)) $ map trim rawHeads @@ -1180,7 +1200,7 @@ gridTableHeader headless = try $ do -- RST does not have a notion of alignments let rawHeads = if headless then replicate (length dashes) "" - else map unwords $ transpose + else map (unlines . map trim) $ transpose $ map (gridTableSplitLine indices) rawContent heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads return (heads, aligns, indices) @@ -1366,7 +1386,7 @@ ltSign :: MarkdownParser (F Inlines) ltSign = do guardDisabled Ext_raw_html <|> guardDisabled Ext_markdown_in_html_blocks - <|> (notFollowedBy' rawHtmlBlocks >> return ()) + <|> (notFollowedBy' (htmlTag isBlockTag) >> return ()) char '<' return $ return $ B.str "<" @@ -1406,39 +1426,6 @@ math :: MarkdownParser (F Inlines) math = (return . B.displayMath <$> (mathDisplay >>= applyMacros')) <|> (return . B.math <$> (mathInline >>= applyMacros')) -mathDisplay :: MarkdownParser String -mathDisplay = - (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$") - <|> (guardEnabled Ext_tex_math_single_backslash >> - mathDisplayWith "\\[" "\\]") - <|> (guardEnabled Ext_tex_math_double_backslash >> - mathDisplayWith "\\\\[" "\\\\]") - -mathDisplayWith :: String -> String -> MarkdownParser String -mathDisplayWith op cl = try $ do - string op - many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string cl) - -mathInline :: MarkdownParser String -mathInline = - (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$") - <|> (guardEnabled Ext_tex_math_single_backslash >> - mathInlineWith "\\(" "\\)") - <|> (guardEnabled Ext_tex_math_double_backslash >> - mathInlineWith "\\\\(" "\\\\)") - -mathInlineWith :: String -> String -> MarkdownParser String -mathInlineWith op cl = try $ do - string op - notFollowedBy space - words' <- many1Till (count 1 (noneOf "\n\\") - <|> (char '\\' >> anyChar >>= \c -> return ['\\',c]) - <|> count 1 newline <* notFollowedBy' blankline - *> return " ") - (try $ string cl) - notFollowedBy digit -- to prevent capture of $5 - return $ concat words' - -- Parses material enclosed in *s, **s, _s, or __s. -- Designed to avoid backtracking. enclosure :: Char @@ -1565,16 +1552,14 @@ endline :: MarkdownParser (F Inlines) endline = try $ do newline notFollowedBy blankline + -- parse potential list-starts differently if in a list: + st <- getState + when (stateParserContext st == ListItemState) $ notFollowedBy listStart guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header - guardEnabled Ext_backtick_code_blocks >> + guardDisabled Ext_backtick_code_blocks <|> notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced)) - -- parse potential list-starts differently if in a list: - st <- getState - when (stateParserContext st == ListItemState) $ do - notFollowedBy' bulletListStart - notFollowedBy' anyOrderedListStart (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak)) <|> (guardEnabled Ext_ignore_line_breaks >> return mempty) <|> (return $ return B.space) @@ -1747,7 +1732,7 @@ spanHtml = try $ do guardEnabled Ext_markdown_in_html_blocks (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" []) contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span")) - let ident = maybe "" id $ lookup "id" attrs + let ident = fromMaybe "" $ lookup "id" attrs let classes = maybe [] words $ lookup "class" attrs let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] return $ B.spanWith (ident, classes, keyvals) <$> contents @@ -1755,12 +1740,19 @@ spanHtml = try $ do divHtml :: MarkdownParser (F Blocks) divHtml = try $ do guardEnabled Ext_markdown_in_html_blocks - (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "div" []) - contents <- mconcat <$> manyTill block (htmlTag (~== TagClose "div")) - let ident = maybe "" id $ lookup "id" attrs - let classes = maybe [] words $ lookup "class" attrs - let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] - return $ B.divWith (ident, classes, keyvals) <$> contents + (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" []) + bls <- option "" (blankline >> option "" blanklines) + contents <- mconcat <$> + many (notFollowedBy' (htmlTag (~== TagClose "div")) >> block) + closed <- option False (True <$ htmlTag (~== TagClose "div")) + if closed + then do + let ident = fromMaybe "" $ lookup "id" attrs + let classes = maybe [] words $ lookup "class" attrs + let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] + return $ B.divWith (ident, classes, keyvals) <$> contents + else -- avoid backtracing + return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents rawHtmlInline :: MarkdownParser (F Inlines) rawHtmlInline = do @@ -1836,9 +1828,10 @@ citeKey = try $ do guard $ lastStrPos /= Just pos suppress_author <- option False (char '-' >> return True) char '@' - first <- letter - let internal p = try $ p >>~ lookAhead (letter <|> digit) - rest <- many $ letter <|> digit <|> internal (oneOf ":.#$%&-_+?<>~/") + first <- letter <|> char '_' + let regchar = satisfy (\c -> isAlphaNum c || c == '_') + let internal p = try $ p >>~ lookAhead regchar + rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") let key = first:rest return (suppress_author, key) diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 7f99af528..7ac2f33ba 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -54,6 +54,7 @@ import Data.Sequence (viewl, ViewL(..), (<|)) import qualified Data.Foldable as F import qualified Data.Map as M import Data.Char (isDigit, isSpace) +import Data.Maybe (fromMaybe) -- | Read mediawiki from an input string and return a Pandoc document. readMediaWiki :: ReaderOptions -- ^ Reader options @@ -148,9 +149,16 @@ inlinesInTags tag = try $ do blocksInTags :: String -> MWParser Blocks blocksInTags tag = try $ do (_,raw) <- htmlTag (~== TagOpen tag []) + let closer = if tag == "li" + then htmlTag (~== TagClose "li") + <|> lookAhead ( + htmlTag (~== TagOpen "li" []) + <|> htmlTag (~== TagClose "ol") + <|> htmlTag (~== TagClose "ul")) + else htmlTag (~== TagClose tag) if '/' `elem` raw -- self-closing tag then return mempty - else mconcat <$> manyTill block (htmlTag (~== TagClose tag)) + else mconcat <$> manyTill block closer charsInTags :: String -> MWParser [Char] charsInTags tag = try $ do @@ -204,7 +212,7 @@ table = do tableStart styles <- option [] parseAttrs <* blankline let tableWidth = case lookup "width" styles of - Just w -> maybe 1.0 id $ parseWidth w + Just w -> fromMaybe 1.0 $ parseWidth w Nothing -> 1.0 caption <- option mempty tableCaption optional rowsep @@ -285,7 +293,7 @@ tableCell = try $ do Just "center" -> AlignCenter _ -> AlignDefault let width = case lookup "width" attrs of - Just xs -> maybe 0.0 id $ parseWidth xs + Just xs -> fromMaybe 0.0 $ parseWidth xs Nothing -> 0.0 return ((align, width), bs) @@ -380,15 +388,13 @@ bulletList = B.bulletList <$> orderedList :: MWParser Blocks orderedList = (B.orderedList <$> many1 (listItem '#')) - <|> (B.orderedList <$> (htmlTag (~== TagOpen "ul" []) *> spaces *> - many (listItem '#' <|> li) <* - optional (htmlTag (~== TagClose "ul")))) - <|> do (tag,_) <- htmlTag (~== TagOpen "ol" []) - spaces - items <- many (listItem '#' <|> li) - optional (htmlTag (~== TagClose "ol")) - let start = maybe 1 id $ safeRead $ fromAttrib "start" tag - return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items + <|> try + (do (tag,_) <- htmlTag (~== TagOpen "ol" []) + spaces + items <- many (listItem '#' <|> li) + optional (htmlTag (~== TagClose "ol")) + let start = fromMaybe 1 $ safeRead $ fromAttrib "start" tag + return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items) definitionList :: MWParser Blocks definitionList = B.definitionList <$> many1 defListItem diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs new file mode 100644 index 000000000..5dc250f04 --- /dev/null +++ b/src/Text/Pandoc/Readers/Org.hs @@ -0,0 +1,552 @@ +{-# LANGUAGE OverloadedStrings #-} +{- +Copyright (C) 2014 Albert Krewinkel <tarleb@moltkeplatz.de> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Org + Copyright : Copyright (C) 2014 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel <tarleb@moltkeplatz.de> + +Conversion of Org-Mode to 'Pandoc' document. +-} +module Text.Pandoc.Readers.Org ( readOrg ) where + +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) +import Text.Pandoc.Definition +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (orderedListMarker) +import Text.Pandoc.Shared (compactify') + +import Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**>)) +import Control.Monad (guard, mzero) +import Data.Char (toLower) +import Data.List (foldl') +import Data.Maybe (listToMaybe, fromMaybe) +import Data.Monoid (mconcat, mempty, mappend) + +-- | Parse org-mode string and return a Pandoc document. +readOrg :: ReaderOptions -- ^ Reader options + -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> Pandoc +readOrg opts s = (readWith parseOrg) def{ stateOptions = opts } (s ++ "\n\n") + +type OrgParser = Parser [Char] ParserState + +parseOrg:: OrgParser Pandoc +parseOrg = do + blocks' <- B.toList <$> parseBlocks + st <- getState + let meta = stateMeta st + return $ Pandoc meta $ filter (/= Null) blocks' + +-- +-- parsing blocks +-- + +parseBlocks :: OrgParser Blocks +parseBlocks = mconcat <$> manyTill block eof + +block :: OrgParser Blocks +block = choice [ mempty <$ blanklines + , orgBlock + , example + , drawer + , specialLine + , header + , hline + , list + , table + , paraOrPlain + ] <?> "block" + +-- +-- Org Blocks (#+BEGIN_... / #+END_...) +-- + +orgBlock :: OrgParser Blocks +orgBlock = try $ do + (indent, blockType, args) <- blockHeader + blockStr <- rawBlockContent indent blockType + let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ] + case blockType of + "comment" -> return mempty + "src" -> return $ B.codeBlockWith ("", classArgs, []) blockStr + _ -> B.divWith ("", [blockType], []) + <$> (parseFromString parseBlocks blockStr) + +blockHeader :: OrgParser (Int, String, [String]) +blockHeader = (,,) <$> blockIndent + <*> blockType + <*> (skipSpaces *> blockArgs) + where blockIndent = length <$> many spaceChar + blockType = map toLower <$> (stringAnyCase "#+begin_" *> many letter) + blockArgs = manyTill (many nonspaceChar <* skipSpaces) newline + +rawBlockContent :: Int -> String -> OrgParser String +rawBlockContent indent blockType = + unlines . map commaEscaped <$> manyTill indentedLine blockEnder + where + indentedLine = try $ choice [ blankline *> pure "\n" + , indentWith indent *> anyLine + ] + blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType) + +-- indent by specified number of spaces (or equiv. tabs) +indentWith :: Int -> OrgParser String +indentWith num = do + tabStop <- getOption readerTabStop + if (num < tabStop) + then count num (char ' ') + else choice [ try (count num (char ' ')) + , try (char '\t' >> count (num - tabStop) (char ' ')) ] + +translateLang :: String -> String +translateLang "sh" = "bash" +translateLang cs = cs + +commaEscaped :: String -> String +commaEscaped (',':cs@('*':_)) = cs +commaEscaped (',':cs@('#':'+':_)) = cs +commaEscaped cs = cs + +example :: OrgParser Blocks +example = try $ + B.codeBlockWith ("", ["example"], []) . unlines <$> many1 exampleLine + +exampleLine :: OrgParser String +exampleLine = try $ string ": " *> anyLine + +-- Drawers for properties or a logbook +drawer :: OrgParser Blocks +drawer = try $ do + drawerStart + manyTill drawerLine (try drawerEnd) + return mempty + +drawerStart :: OrgParser String +drawerStart = try $ + skipSpaces *> drawerName <* skipSpaces <* newline + where drawerName = try $ char ':' *> validDrawerName <* char ':' + validDrawerName = stringAnyCase "PROPERTIES" + <|> stringAnyCase "LOGBOOK" + +drawerLine :: OrgParser String +drawerLine = try $ anyLine + +drawerEnd :: OrgParser String +drawerEnd = try $ + skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline + + +-- Comments, Options and Metadata +specialLine :: OrgParser Blocks +specialLine = try $ metaLine <|> commentLine + +metaLine :: OrgParser Blocks +metaLine = try $ metaLineStart *> declarationLine + +commentLine :: OrgParser Blocks +commentLine = try $ commentLineStart *> anyLine *> pure mempty + +-- The order, in which blocks are tried, makes sure that we're not looking at +-- the beginning of a block, so we don't need to check for it +metaLineStart :: OrgParser String +metaLineStart = try $ mappend <$> many spaceChar <*> string "#+" + +commentLineStart :: OrgParser String +commentLineStart = try $ mappend <$> many spaceChar <*> string "# " + +declarationLine :: OrgParser Blocks +declarationLine = try $ do + meta' <- B.setMeta <$> metaKey <*> metaValue <*> pure nullMeta + updateState $ \st -> st { stateMeta = stateMeta st <> meta' } + return mempty + +metaValue :: OrgParser MetaValue +metaValue = MetaInlines . B.toList . trimInlines <$> restOfLine + +metaKey :: OrgParser [Char] +metaKey = map toLower <$> many1 (noneOf ": \n\r") + <* char ':' + <* skipSpaces + +-- | Headers +header :: OrgParser Blocks +header = try $ + B.header <$> headerStart + <*> (trimInlines <$> restOfLine) + +headerStart :: OrgParser Int +headerStart = try $ + (length <$> many1 (char '*')) <* many1 (char ' ') + +-- Horizontal Line (five dashes or more) +hline :: OrgParser Blocks +hline = try $ do + skipSpaces + string "-----" + many (char '-') + skipSpaces + newline + return B.horizontalRule + +-- +-- Tables +-- + +data OrgTableRow = OrgContentRow [Blocks] + | OrgAlignRow [Alignment] + | OrgHlineRow + deriving (Eq, Show) + +type OrgTableContent = (Int, [Alignment], [Double], [Blocks], [[Blocks]]) + +table :: OrgParser Blocks +table = try $ do + lookAhead tableStart + (_, aligns, widths, heads, lns) <- normalizeTable . tableContent <$> tableRows + return $ B.table "" (zip aligns widths) heads lns + +tableStart :: OrgParser Char +tableStart = try $ skipSpaces *> char '|' + +tableRows :: OrgParser [OrgTableRow] +tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow) + +tableContentRow :: OrgParser OrgTableRow +tableContentRow = try $ + OrgContentRow <$> (tableStart *> manyTill tableContentCell newline) + +tableContentCell :: OrgParser Blocks +tableContentCell = try $ + B.plain . trimInlines . mconcat <$> many1Till inline (try endOfCell) + +endOfCell :: OrgParser Char +-- endOfCell = char '|' <|> newline +endOfCell = try $ char '|' <|> lookAhead newline + +tableAlignRow :: OrgParser OrgTableRow +tableAlignRow = try $ + OrgAlignRow <$> (tableStart *> manyTill tableAlignCell newline) + +tableAlignCell :: OrgParser Alignment +tableAlignCell = + choice [ try $ emptyCell *> return (AlignDefault) + , try $ skipSpaces + *> char '<' + *> tableAlignFromChar + <* many digit + <* char '>' + <* emptyCell + ] <?> "alignment info" + where emptyCell = try $ skipSpaces *> endOfCell + +tableAlignFromChar :: OrgParser Alignment +tableAlignFromChar = try $ choice [ char 'l' *> return AlignLeft + , char 'c' *> return AlignCenter + , char 'r' *> return AlignRight + ] + +tableHline :: OrgParser OrgTableRow +tableHline = try $ + OrgHlineRow <$ (tableStart *> char '-' *> anyLine) + +tableContent :: [OrgTableRow] + -> OrgTableContent +tableContent = foldl' (flip rowToContent) (0, mempty, repeat 0, mempty, mempty) + +normalizeTable :: OrgTableContent + -> OrgTableContent +normalizeTable (cols, aligns, widths, heads, lns) = + let aligns' = fillColumns aligns AlignDefault + widths' = fillColumns widths 0.0 + heads' = if heads == mempty + then heads + else fillColumns heads (B.plain mempty) + lns' = map (flip fillColumns (B.plain mempty)) lns + fillColumns base padding = take cols $ base ++ repeat padding + in (cols, aligns', widths', heads', lns') + + +-- One or more horizontal rules after the first content line mark the previous +-- line as a header. All other horizontal lines are discarded. +rowToContent :: OrgTableRow + -> OrgTableContent + -> OrgTableContent +rowToContent OrgHlineRow = maybeBodyToHeader +rowToContent (OrgContentRow rs) = setLongestRow rs . appendToBody rs +rowToContent (OrgAlignRow as) = setLongestRow as . setAligns as + +setLongestRow :: [a] + -> OrgTableContent + -> OrgTableContent +setLongestRow r (cols, aligns, widths, heads, lns) = + (max cols (length r), aligns, widths, heads, lns) + +maybeBodyToHeader :: OrgTableContent + -> OrgTableContent +maybeBodyToHeader (cols, aligns, widths, [], b:[]) = (cols, aligns, widths, b, []) +maybeBodyToHeader content = content + +appendToBody :: [Blocks] + -> OrgTableContent + -> OrgTableContent +appendToBody r (cols, aligns, widths, heads, lns) = + (cols, aligns, widths, heads, lns ++ [r]) + +setAligns :: [Alignment] + -> OrgTableContent + -> OrgTableContent +setAligns aligns (cols, _, widths, heads, lns) = + (cols, aligns, widths, heads, lns) + +-- Paragraphs or Plain text +paraOrPlain :: OrgParser Blocks +paraOrPlain = try $ + trimInlines . mconcat + <$> many1 inline + <**> option B.plain + (try $ newline *> pure B.para) + +restOfLine :: OrgParser Inlines +restOfLine = mconcat <$> manyTill inline newline + + +-- +-- list blocks +-- + +list :: OrgParser Blocks +list = choice [ bulletList, orderedList ] <?> "list" + +bulletList :: OrgParser Blocks +bulletList = B.bulletList . compactify' <$> many1 (listItem bulletListStart) + +orderedList :: OrgParser Blocks +orderedList = B.orderedList . compactify' <$> many1 (listItem orderedListStart) + +genericListStart :: OrgParser String + -> OrgParser Int +genericListStart listMarker = try $ + (+) <$> (length <$> many spaceChar) + <*> (length <$> listMarker <* many1 spaceChar) + +-- parses bullet list start and returns its length (excl. following whitespace) +bulletListStart :: OrgParser Int +bulletListStart = genericListStart bulletListMarker + where bulletListMarker = pure <$> oneOf "*-+" + +orderedListStart :: OrgParser Int +orderedListStart = genericListStart orderedListMarker + -- Ordered list markers allowed in org-mode + where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)") + +listItem :: OrgParser Int + -> OrgParser Blocks +listItem start = try $ do + (markerLength, first) <- try (start >>= rawListItem) + rest <- many (listContinuation markerLength) + parseFromString parseBlocks $ concat (first:rest) + +-- parse raw text for one list item, excluding start marker and continuations +rawListItem :: Int + -> OrgParser (Int, String) +rawListItem markerLength = try $ do + firstLine <- anyLine + restLines <- many (listLine markerLength) + return (markerLength, (firstLine ++ "\n" ++ (concat restLines))) + +-- continuation of a list item - indented and separated by blankline or endline. +-- Note: nested lists are parsed as continuations. +listContinuation :: Int + -> OrgParser String +listContinuation markerLength = try $ + mappend <$> many blankline + <*> (concat <$> many1 (listLine markerLength)) + +-- parse a line of a list item +listLine :: Int + -> OrgParser String +listLine markerLength = try $ + indentWith markerLength *> anyLine + <**> pure (++ "\n") + + +-- +-- inline +-- + +inline :: OrgParser Inlines +inline = choice inlineParsers <?> "inline" + where inlineParsers = [ whitespace + , link + , str + , endline + , emph + , strong + , strikeout + , underline + , code + , verbatim + , subscript + , superscript + , symbol + ] + +-- treat these as potentially non-text when parsing inline: +specialChars :: [Char] +specialChars = "\"$'()*+-./:<=>[\\]^_{|}~" + + +whitespace :: OrgParser Inlines +whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace" + +str :: OrgParser Inlines +str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") + <* updateLastStrPos + +-- an endline character that can be treated as a space, not a structural break +endline :: OrgParser Inlines +endline = try $ do + newline + notFollowedBy blankline + notFollowedBy' exampleLine + notFollowedBy' hline + notFollowedBy' tableStart + notFollowedBy' drawerStart + notFollowedBy' headerStart + notFollowedBy' metaLineStart + notFollowedBy' commentLineStart + notFollowedBy' bulletListStart + notFollowedBy' orderedListStart + return B.space + +link :: OrgParser Inlines +link = explicitLink <|> selfLink <?> "link" + +explicitLink :: OrgParser Inlines +explicitLink = try $ do + char '[' + src <- enclosedRaw (char '[') (char ']') + title <- enclosedInlines (char '[') (char ']') + char ']' + return $ B.link src "" title + +selfLink :: OrgParser Inlines +selfLink = try $ do + src <- enclosedRaw (string "[[") (string "]]") + return $ B.link src "" (B.str src) + +emph :: OrgParser Inlines +emph = B.emph <$> inlinesEnclosedBy '/' + +strong :: OrgParser Inlines +strong = B.strong <$> inlinesEnclosedBy '*' + +strikeout :: OrgParser Inlines +strikeout = B.strikeout <$> inlinesEnclosedBy '+' + +-- There is no underline, so we use strong instead. +underline :: OrgParser Inlines +underline = B.strong <$> inlinesEnclosedBy '_' + +code :: OrgParser Inlines +code = B.code <$> rawEnclosedBy '=' + +verbatim :: OrgParser Inlines +verbatim = B.rawInline "" <$> rawEnclosedBy '~' + +subscript :: OrgParser Inlines +subscript = B.subscript <$> (try $ char '_' *> maybeGroupedByBraces) + +superscript :: OrgParser Inlines +superscript = B.superscript <$> (try $ char '^' *> maybeGroupedByBraces) + +maybeGroupedByBraces :: OrgParser Inlines +maybeGroupedByBraces = try $ + choice [ try $ enclosedInlines (char '{') (char '}') + , B.str . (:"") <$> anyChar + ] + +symbol :: OrgParser Inlines +symbol = B.str . (: "") <$> oneOf specialChars + +enclosedInlines :: OrgParser a + -> OrgParser b + -> OrgParser Inlines +enclosedInlines start end = try $ + trimInlines . mconcat <$> enclosed start end inline + +-- FIXME: This is a hack +inlinesEnclosedBy :: Char + -> OrgParser Inlines +inlinesEnclosedBy c = enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c) + (atEnd $ char c) + +enclosedRaw :: OrgParser a + -> OrgParser b + -> OrgParser String +enclosedRaw start end = try $ + start *> (onSingleLine <|> spanningTwoLines) + where onSingleLine = try $ many1Till (noneOf "\n\r") end + spanningTwoLines = try $ + anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine + +rawEnclosedBy :: Char + -> OrgParser String +rawEnclosedBy c = enclosedRaw (atStart $ char c) (atEnd $ char c) + +-- succeeds only if we're not right after a str (ie. in middle of word) +atStart :: OrgParser a -> OrgParser a +atStart p = do + pos <- getPosition + st <- getState + guard $ stateLastStrPos st /= Just pos + p + +-- | succeeds only if we're at the end of a word +atEnd :: OrgParser a -> OrgParser a +atEnd p = try $ p <* lookingAtEndOfWord + where lookingAtEndOfWord = lookAhead . oneOf $ postWordChars + +postWordChars :: [Char] +postWordChars = "\t\n\r !\"'),-.:?}" + +-- FIXME: These functions are hacks and should be replaced +endsOnThisOrNextLine :: Char + -> OrgParser () +endsOnThisOrNextLine c = do + inp <- getInput + let doOtherwise = \rest -> endsOnThisLine rest c (const mzero) + endsOnThisLine inp c doOtherwise + +endsOnThisLine :: [Char] + -> Char + -> ([Char] -> OrgParser ()) + -> OrgParser () +endsOnThisLine input c doOnOtherLines = do + case break (`elem` c:"\n") input of + (_,'\n':rest) -> doOnOtherLines rest + (_,_:rest@(n:_)) -> if n `elem` postWordChars + then return () + else endsOnThisLine rest c doOnOtherLines + _ -> mzero + diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 32893128a..127eae167 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -36,12 +36,13 @@ import Text.Pandoc.Builder (setMeta, fromList) import Text.Pandoc.Shared import Text.Pandoc.Parsing import Text.Pandoc.Options -import Control.Monad ( when, liftM, guard, mzero ) +import Control.Monad ( when, liftM, guard, mzero, mplus ) import Data.List ( findIndex, intersperse, intercalate, transpose, sort, deleteFirstsBy, isSuffixOf ) +import Data.Maybe (fromMaybe) import qualified Data.Map as M import Text.Printf ( printf ) -import Control.Applicative ((<$>), (<$), (<*), (*>)) +import Control.Applicative ((<$>), (<$), (<*), (*>), (<*>)) import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) import qualified Text.Pandoc.Builder as B import Data.Monoid (mconcat, mempty) @@ -347,14 +348,25 @@ lhsCodeBlock = try $ do getPosition >>= guard . (==1) . sourceColumn guardEnabled Ext_literate_haskell optional codeBlockStart - lns <- many1 birdTrackLine - -- if (as is normal) there is always a space after >, drop it - let lns' = if all (\ln -> null ln || take 1 ln == " ") lns - then map (drop 1) lns - else lns + lns <- latexCodeBlock <|> birdCodeBlock blanklines return $ B.codeBlockWith ("", ["sourceCode", "literate", "haskell"], []) - $ intercalate "\n" lns' + $ intercalate "\n" lns + +latexCodeBlock :: Parser [Char] st [[Char]] +latexCodeBlock = try $ do + try (latexBlockLine "\\begin{code}") + many1Till anyLine (try $ latexBlockLine "\\end{code}") + where + latexBlockLine s = skipMany spaceChar >> string s >> blankline + +birdCodeBlock :: Parser [Char] st [[Char]] +birdCodeBlock = filterSpace <$> many1 birdTrackLine + where filterSpace lns = + -- if (as is normal) there is always a space after >, drop it + if all (\ln -> null ln || take 1 ln == " ") lns + then map (drop 1) lns + else lns birdTrackLine :: Parser [Char] st [Char] birdTrackLine = char '>' >> anyLine @@ -519,7 +531,7 @@ directive' = do let body' = body ++ "\n\n" case label of "raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body) - "role" -> return mempty + "role" -> addNewRole top $ map (\(k,v) -> (k, trim v)) fields "container" -> parseFromString parseBlocks body' "replace" -> B.para <$> -- consumed by substKey parseFromString (trimInlines . mconcat <$> many inline) @@ -569,7 +581,7 @@ directive' = do "figure" -> do (caption, legend) <- parseFromString extractCaption body' let src = escapeURI $ trim top - return $ B.para (B.image src "" caption) <> legend + return $ B.para (B.image src "fig:" caption) <> legend "image" -> do let src = escapeURI $ trim top let alt = B.str $ maybe "image" trim $ lookup "alt" fields @@ -580,7 +592,38 @@ directive' = do Nothing -> B.image src "" alt _ -> return mempty --- Can contain haracter codes as decimal numbers or +-- TODO: +-- - Silently ignores illegal fields +-- - Silently drops classes +-- - Only supports :format: fields with a single format for :raw: roles, +-- change Text.Pandoc.Definition.Format to fix +addNewRole :: String -> [(String, String)] -> RSTParser Blocks +addNewRole roleString fields = do + (role, parentRole) <- parseFromString inheritedRole roleString + customRoles <- stateRstCustomRoles <$> getState + baseRole <- case M.lookup parentRole customRoles of + Just (base, _, _) -> return base + Nothing -> return parentRole + + let fmt = if baseRole == "raw" then lookup "format" fields else Nothing + annotate = maybe id addLanguage $ + if baseRole == "code" + then lookup "language" fields + else Nothing + + updateState $ \s -> s { + stateRstCustomRoles = + M.insert role (baseRole, fmt, (,) parentRole . annotate) customRoles + } + + return $ B.singleton Null + where + addLanguage lang (ident, classes, keyValues) = + (ident, "sourceCode" : lang : classes, keyValues) + inheritedRole = + (,) <$> roleNameEndingIn (char '(') <*> roleNameEndingIn (char ')') + +-- Can contain character codes as decimal numbers or -- hexadecimal numbers, prefixed by 0x, x, \x, U+, u, or \u -- or as XML-style hexadecimal character entities, e.g. ᨫ -- or text, which is used as-is. Comments start with .. @@ -919,17 +962,56 @@ strong = B.strong . trimInlines . mconcat <$> -- Note, this doesn't precisely implement the complex rule in -- http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules -- but it should be good enough for most purposes +-- +-- TODO: +-- - Classes are silently discarded in addNewRole +-- - Lacks sensible implementation for title-reference (which is the default) +-- - Allows direct use of the :raw: role, rST only allows inherited use. interpretedRole :: RSTParser Inlines interpretedRole = try $ do (role, contents) <- roleBefore <|> roleAfter - case role of - "sup" -> return $ B.superscript $ B.str contents - "sub" -> return $ B.subscript $ B.str contents - "math" -> return $ B.math contents - _ -> return $ B.str contents --unknown + renderRole contents Nothing role nullAttr + +renderRole :: String -> Maybe String -> String -> Attr -> RSTParser Inlines +renderRole contents fmt role attr = case role of + "sup" -> return $ B.superscript $ B.str contents + "superscript" -> return $ B.superscript $ B.str contents + "sub" -> return $ B.subscript $ B.str contents + "subscript" -> return $ B.subscript $ B.str contents + "emphasis" -> return $ B.emph $ B.str contents + "strong" -> return $ B.strong $ B.str contents + "rfc-reference" -> return $ rfcLink contents + "RFC" -> return $ rfcLink contents + "pep-reference" -> return $ pepLink contents + "PEP" -> return $ pepLink contents + "literal" -> return $ B.str contents + "math" -> return $ B.math contents + "title-reference" -> titleRef contents + "title" -> titleRef contents + "t" -> titleRef contents + "code" -> return $ B.codeWith attr contents + "raw" -> return $ B.rawInline (fromMaybe "" fmt) contents + custom -> do + customRole <- stateRstCustomRoles <$> getState + case M.lookup custom customRole of + Just (_, newFmt, inherit) -> let + fmtStr = fmt `mplus` newFmt + (newRole, newAttr) = inherit attr + in renderRole contents fmtStr newRole newAttr + Nothing -> return $ B.str contents -- Undefined role + where + titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour + rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo) + where rfcUrl = "http://www.faqs.org/rfcs/rfc" ++ rfcNo ++ ".html" + pepLink pepNo = B.link pepUrl ("PEP " ++ pepNo) $ B.str ("PEP " ++ pepNo) + where padNo = replicate (4 - length pepNo) '0' ++ pepNo + pepUrl = "http://http://www.python.org/dev/peps/pep-" ++ padNo ++ "/" + +roleNameEndingIn :: RSTParser Char -> RSTParser String +roleNameEndingIn end = many1Till (letter <|> char '-') end roleMarker :: RSTParser String -roleMarker = char ':' *> many1Till (letter <|> char '-') (char ':') +roleMarker = char ':' *> roleNameEndingIn (char ':') roleBefore :: RSTParser (String,String) roleBefore = try $ do diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 23e07f621..93658cdea 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -594,7 +594,7 @@ surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try bo simpleInline :: Parser [Char] ParserState t -- ^ surrounding parser -> ([Inline] -> Inline) -- ^ Inline constructor -> Parser [Char] ParserState Inline -- ^ content parser (to be used repeatedly) -simpleInline border construct = surrounded border (inlineWithAttribute) >>= +simpleInline border construct = surrounded border inlineWithAttribute >>= return . construct . normalizeSpaces where inlineWithAttribute = (try $ optional attributes) >> inline diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index 6112e764f..7fc9c2966 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -50,14 +50,16 @@ isOk c = isAscii c && isAlphaNum c convertTag :: Maybe FilePath -> Tag String -> IO (Tag String) convertTag userdata t@(TagOpen tagname as) - | tagname `elem` ["img", "embed", "video", "input", "audio", "source"] = - case fromAttrib "src" t of - [] -> return t - src -> do - (raw, mime) <- getRaw userdata (fromAttrib "type" t) src - let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw) - return $ TagOpen tagname - (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"]) + | tagname `elem` ["img", "embed", "video", "input", "audio", "source"] = do + as' <- mapM processAttribute as + return $ TagOpen tagname as' + where processAttribute (x,y) = + if x == "src" || x == "href" || x == "poster" + then do + (raw, mime) <- getRaw userdata (fromAttrib "type" t) y + let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw) + return (x, enc) + else return (x,y) convertTag userdata t@(TagOpen "script" as) = case fromAttrib "src" t of [] -> return t diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 8dcd88148..714402e42 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -91,7 +91,8 @@ import Data.Char ( toLower, isLower, isUpper, isAlpha, isLetter, isDigit, isSpace ) import Data.List ( find, isPrefixOf, intercalate ) import qualified Data.Map as M -import Network.URI ( escapeURIString, isURI, unEscapeString ) +import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo, + unEscapeString, parseURIReference ) import System.Directory import Text.Pandoc.MIME (getMimeType) import System.FilePath ( (</>), takeExtension, dropExtension ) @@ -121,6 +122,7 @@ import Data.ByteString.Lazy (toChunks) import Network.HTTP.Conduit (httpLbs, parseUrl, withManager, responseBody, responseHeaders) import Network.HTTP.Types.Header ( hContentType) +import Network (withSocketsDo) #else import Network.URI (parseURI) import Network.HTTP (findHeader, rspBody, @@ -270,7 +272,7 @@ normalizeDate s = fmap (formatTime defaultTimeLocale "%F") (msum $ map (\fs -> parsetimeWith fs s) formats :: Maybe Day) where parsetimeWith = parseTime defaultTimeLocale formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y", - "%d %B %Y", "%b. %d, %Y", "%B %d, %Y"] + "%d %B %Y", "%b. %d, %Y", "%B %d, %Y", "%Y"] -- -- Pandoc block and inline list processing @@ -531,7 +533,7 @@ headerShift n = walk shift -- | Detect if a list is tight. isTightList :: [[Block]] -> Bool -isTightList = and . map firstIsPlain +isTightList = all firstIsPlain where firstIsPlain (Plain _ : _) = True firstIsPlain _ = False @@ -563,14 +565,10 @@ makeMeta title authors date = -- | Render HTML tags. renderTags' :: [Tag String] -> String renderTags' = renderTagsOptions - renderOptions{ optMinimize = \x -> - let y = map toLower x - in y == "hr" || y == "br" || - y == "img" || y == "meta" || - y == "link" - , optRawTag = \x -> - let y = map toLower x - in y == "script" || y == "style" } + renderOptions{ optMinimize = matchTags ["hr", "br", "img", + "meta", "link"] + , optRawTag = matchTags ["script", "style"] } + where matchTags = \tags -> flip elem tags . map toLower -- -- File handling @@ -626,9 +624,13 @@ fetchItem :: Maybe String -> String -> IO (Either E.SomeException (BS.ByteString, Maybe String)) fetchItem sourceURL s | isURI s = openURL s - | otherwise = case sourceURL of - Just u -> openURL (u ++ "/" ++ s) - Nothing -> E.try readLocalFile + | otherwise = + case sourceURL >>= parseURIReference of + Just u -> case parseURIReference s of + Just s' -> openURL $ show $ + s' `nonStrictRelativeTo` u + Nothing -> openURL $ show u ++ "/" ++ s + Nothing -> E.try readLocalFile where readLocalFile = do let mime = case takeExtension s of ".gz" -> getMimeType $ dropExtension s @@ -644,7 +646,7 @@ openURL u contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u in return $ Right (decodeLenient contents, Just mime) #ifdef HTTP_CONDUIT - | otherwise = E.try $ do + | otherwise = withSocketsDo $ E.try $ do req <- parseUrl u resp <- withManager $ httpLbs req return (BS.concat $ toChunks $ responseBody resp, diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index ad8838f72..52625abf6 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -190,6 +190,7 @@ resolveVar var' val = Just (String t) -> T.stripEnd t Just (Number n) -> T.pack $ show n Just (Bool True) -> "true" + Just (Object _) -> "true" Just _ -> mempty Nothing -> mempty diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 179d9bc5b..3095cf508 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -130,7 +130,7 @@ blockToConTeXt (Plain lst) = inlineListToConTeXt lst -- title beginning with fig: indicates that the image is a figure blockToConTeXt (Para [Image txt (src,'f':'i':'g':':':_)]) = do capt <- inlineListToConTeXt txt - return $ blankline $$ "\\placefigure[here]" <> braces capt <> + return $ blankline $$ "\\placefigure" <> braces capt <> braces ("\\externalfigure" <> brackets (text src)) <> blankline blockToConTeXt (Para lst) = do contents <- inlineListToConTeXt lst @@ -205,9 +205,9 @@ blockToConTeXt (Table caption aligns widths heads rows) = do else liftM ($$ "\\HL") $ tableRowToConTeXt heads captionText <- inlineListToConTeXt caption rows' <- mapM tableRowToConTeXt rows - return $ "\\placetable" <> brackets ("here" <> if null caption - then ",none" - else "") + return $ "\\placetable" <> (if null caption + then brackets "none" + else empty) <> braces captionText $$ "\\starttable" <> brackets (text colDescriptors) $$ "\\HL" $$ headers $$ diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 5c7341b69..2a834c2da 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -29,7 +29,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to docx. -} module Text.Pandoc.Writers.Docx ( writeDocx ) where -import Data.List ( intercalate, groupBy ) +import Data.Maybe (fromMaybe) +import Data.List ( intercalate, isPrefixOf, isSuffixOf ) import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BL8 @@ -42,6 +43,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.ImageSize import Text.Pandoc.Shared hiding (Element) +import Text.Pandoc.Writers.Shared (fixDisplayMath) import Text.Pandoc.Options import Text.Pandoc.Readers.TeXMath import Text.Pandoc.Highlighting ( highlight ) @@ -130,7 +132,8 @@ writeDocx opts doc@(Pandoc meta _) = do let mkOverrideNode (part', contentType') = mknode "Override" [("PartName",part'),("ContentType",contentType')] () let mkImageOverride (_, imgpath, mbMimeType, _, _) = - mkOverrideNode ("/word/" ++ imgpath, maybe "application/octet-stream" id mbMimeType) + mkOverrideNode ("/word/" ++ imgpath, + fromMaybe "application/octet-stream" mbMimeType) let overrides = map mkOverrideNode [("/word/webSettings.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml") @@ -260,6 +263,12 @@ writeDocx opts doc@(Pandoc meta _) = do fontTableEntry <- entryFromArchive "word/fontTable.xml" settingsEntry <- entryFromArchive "word/settings.xml" webSettingsEntry <- entryFromArchive "word/webSettings.xml" + let miscRels = [ f | f <- filesInArchive refArchive + , "word/_rels/" `isPrefixOf` f + , ".xml.rels" `isSuffixOf` f + , f /= "word/_rels/document.xml.rels" + , f /= "word/_rels/footnotes.xml.rels" ] + miscRelEntries <- mapM entryFromArchive miscRels -- Create archive let archive = foldr addEntryToArchive emptyArchive $ @@ -267,7 +276,7 @@ writeDocx opts doc@(Pandoc meta _) = do footnoteRelEntry : numEntry : styleEntry : footnotesEntry : docPropsEntry : docPropsAppEntry : themeEntry : fontTableEntry : settingsEntry : webSettingsEntry : - imageEntries + imageEntries ++ miscRelEntries return $ fromArchive archive styleToOpenXml :: Style -> [Element] @@ -322,7 +331,7 @@ mkNum markers marker numid = NumberMarker _ _ start -> map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))] $ mknode "w:startOverride" [("w:val",show start)] ()) [0..6] - where absnumid = maybe 0 id $ M.lookup marker markers + where absnumid = fromMaybe 0 $ M.lookup marker markers mkAbstractNum :: (ListMarker,Int) -> IO Element mkAbstractNum (marker,numid) = do @@ -807,30 +816,8 @@ br = mknode "w:r" [] [mknode "w:br" [("w:type","textWrapping")] () ] parseXml :: Archive -> String -> IO Element parseXml refArchive relpath = - case (findEntryByPath relpath refArchive >>= parseXMLDoc . UTF8.toStringLazy . fromEntry) of - Just d -> return d + case findEntryByPath relpath refArchive of + Just e -> case parseXMLDoc $ UTF8.toStringLazy $ fromEntry e of + Just d -> return d + Nothing -> fail $ relpath ++ " corrupt in reference docx" Nothing -> fail $ relpath ++ " missing in reference docx" - -isDisplayMath :: Inline -> Bool -isDisplayMath (Math DisplayMath _) = True -isDisplayMath _ = False - -stripLeadingTrailingSpace :: [Inline] -> [Inline] -stripLeadingTrailingSpace = go . reverse . go . reverse - where go (Space:xs) = xs - go xs = xs - -fixDisplayMath :: Block -> Block -fixDisplayMath (Plain lst) - | any isDisplayMath lst && not (all isDisplayMath lst) = - -- chop into several paragraphs so each displaymath is its own - Div ("",["math"],[]) $ map (Plain . stripLeadingTrailingSpace) $ - groupBy (\x y -> (isDisplayMath x && isDisplayMath y) || - not (isDisplayMath x || isDisplayMath y)) lst -fixDisplayMath (Para lst) - | any isDisplayMath lst && not (all isDisplayMath lst) = - -- chop into several paragraphs so each displaymath is its own - Div ("",["math"],[]) $ map (Para . stripLeadingTrailingSpace) $ - groupBy (\x y -> (isDisplayMath x && isDisplayMath y) || - not (isDisplayMath x || isDisplayMath y)) lst -fixDisplayMath x = x diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index ef4c7be23..dae45b90f 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE PatternGuards, CPP #-} +{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables #-} {- Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu> @@ -30,16 +30,18 @@ Conversion of 'Pandoc' documents to EPUB. -} module Text.Pandoc.Writers.EPUB ( writeEPUB ) where import Data.IORef -import Data.Maybe ( fromMaybe, isNothing ) +import qualified Data.Map as M +import Data.Maybe ( fromMaybe ) import Data.List ( isInfixOf, intercalate ) import System.Environment ( getEnv ) import Text.Printf (printf) import System.FilePath ( (</>), takeBaseName, takeExtension, takeFileName ) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as B8 -import Text.Pandoc.UTF8 ( fromStringLazy, toString ) +import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.SelfContained ( makeSelfContained ) import Codec.Archive.Zip +import Control.Applicative ((<$>)) import Data.Time.Clock.POSIX import Data.Time import System.Locale @@ -54,7 +56,7 @@ import Text.XML.Light hiding (ppTopElement) import Text.Pandoc.UUID import Text.Pandoc.Writers.HTML import Text.Pandoc.Writers.Markdown ( writePlain ) -import Data.Char ( toLower ) +import Data.Char ( toLower, isDigit ) import Network.URI ( unEscapeString ) import Text.Pandoc.MIME (getMimeType) #if MIN_VERSION_base(4,6,0) @@ -63,6 +65,7 @@ import Prelude hiding (catch) #endif import Control.Exception (catch, SomeException) import Text.Blaze.Html.Renderer.Utf8 (renderHtml) +import Text.HTML.TagSoup -- A Chapter includes a list of blocks and maybe a section -- number offset. Note, some chapters are unnumbered. The section @@ -70,12 +73,237 @@ import Text.Blaze.Html.Renderer.Utf8 (renderHtml) -- in filenames, chapter0003.xhtml. data Chapter = Chapter (Maybe [Int]) [Block] +data EPUBMetadata = EPUBMetadata{ + epubIdentifier :: [Identifier] + , epubTitle :: [Title] + , epubDate :: String + , epubLanguage :: String + , epubCreator :: [Creator] + , epubContributor :: [Creator] + , epubSubject :: [String] + , epubDescription :: Maybe String + , epubType :: Maybe String + , epubFormat :: Maybe String + , epubPublisher :: Maybe String + , epubSource :: Maybe String + , epubRelation :: Maybe String + , epubCoverage :: Maybe String + , epubRights :: Maybe String + , epubCoverImage :: Maybe String + , epubStylesheet :: Maybe Stylesheet + } deriving Show + +data Stylesheet = StylesheetPath FilePath + | StylesheetContents String + deriving Show + +data Creator = Creator{ + creatorText :: String + , creatorRole :: Maybe String + , creatorFileAs :: Maybe String + } deriving Show + +data Identifier = Identifier{ + identifierText :: String + , identifierScheme :: Maybe String + } deriving Show + +data Title = Title{ + titleText :: String + , titleFileAs :: Maybe String + , titleType :: Maybe String + } deriving Show + +dcName :: String -> QName +dcName n = QName n Nothing (Just "dc") + +dcNode :: Node t => String -> t -> Element +dcNode = node . dcName + +opfName :: String -> QName +opfName n = QName n Nothing (Just "opf") + +plainify :: [Inline] -> String +plainify t = + trimr $ writePlain def{ writerStandalone = False } + $ Pandoc nullMeta [Plain $ walk removeNote t] + +removeNote :: Inline -> Inline +removeNote (Note _) = Str "" +removeNote x = x + +getEPUBMetadata :: WriterOptions -> Meta -> IO EPUBMetadata +getEPUBMetadata opts meta = do + let md = metadataFromMeta opts meta + let elts = onlyElems $ parseXML $ writerEpubMetadata opts + let md' = foldr addMetadataFromXML md elts + let addIdentifier m = + if null (epubIdentifier m) + then do + randomId <- fmap show getRandomUUID + return $ m{ epubIdentifier = [Identifier randomId Nothing] } + else return m + let addLanguage m = + if null (epubLanguage m) + then case lookup "lang" (writerVariables opts) of + Just x -> return m{ epubLanguage = x } + Nothing -> do + localeLang <- catch (liftM + (map (\c -> if c == '_' then '-' else c) . + takeWhile (/='.')) $ getEnv "LANG") + (\e -> let _ = (e :: SomeException) in return "en-US") + return m{ epubLanguage = localeLang } + else return m + let fixDate m = + if null (epubDate m) + then do + currentTime <- getCurrentTime + return $ m{ epubDate = showDateTimeISO8601 currentTime } + else return m + let addAuthor m = + if any (\c -> creatorRole c == Just "aut") $ epubCreator m + then return m + else do + let authors' = map plainify $ docAuthors meta + let toAuthor name = Creator{ creatorText = name + , creatorRole = Just "aut" + , creatorFileAs = Nothing } + return $ m{ epubCreator = map toAuthor authors' ++ epubCreator m } + addIdentifier md' >>= fixDate >>= addAuthor >>= addLanguage + +addMetadataFromXML :: Element -> EPUBMetadata -> EPUBMetadata +addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md + | name == "identifier" = md{ epubIdentifier = + Identifier{ identifierText = strContent e + , identifierScheme = lookupAttr (opfName "scheme") attrs + } : epubIdentifier md } + | name == "title" = md{ epubTitle = + Title{ titleText = strContent e + , titleFileAs = getAttr "file-as" + , titleType = getAttr "type" + } : epubTitle md } + | name == "date" = md{ epubDate = fromMaybe "" $ normalizeDate' + $ strContent e } + | name == "language" = md{ epubLanguage = strContent e } + | name == "creator" = md{ epubCreator = + Creator{ creatorText = strContent e + , creatorRole = getAttr "role" + , creatorFileAs = getAttr "file-as" + } : epubCreator md } + | name == "contributor" = md{ epubContributor = + Creator { creatorText = strContent e + , creatorRole = getAttr "role" + , creatorFileAs = getAttr "file-as" + } : epubContributor md } + | name == "subject" = md{ epubSubject = strContent e : epubSubject md } + | name == "description" = md { epubDescription = Just $ strContent e } + | name == "type" = md { epubType = Just $ strContent e } + | name == "format" = md { epubFormat = Just $ strContent e } + | name == "type" = md { epubType = Just $ strContent e } + | name == "publisher" = md { epubPublisher = Just $ strContent e } + | name == "source" = md { epubSource = Just $ strContent e } + | name == "relation" = md { epubRelation = Just $ strContent e } + | name == "coverage" = md { epubCoverage = Just $ strContent e } + | name == "rights" = md { epubRights = Just $ strContent e } + | otherwise = md + where getAttr n = lookupAttr (opfName n) attrs +addMetadataFromXML _ md = md + +metaValueToString :: MetaValue -> String +metaValueToString (MetaString s) = s +metaValueToString (MetaInlines ils) = plainify ils +metaValueToString (MetaBlocks bs) = plainify $ query (:[]) bs +metaValueToString (MetaBool b) = show b +metaValueToString _ = "" + +getList :: String -> Meta -> (MetaValue -> a) -> [a] +getList s meta handleMetaValue = + case lookupMeta s meta of + Just (MetaList xs) -> map handleMetaValue xs + Just mv -> [handleMetaValue mv] + Nothing -> [] + +getIdentifier :: Meta -> [Identifier] +getIdentifier meta = getList "identifier" meta handleMetaValue + where handleMetaValue (MetaMap m) = + Identifier{ identifierText = maybe "" metaValueToString + $ M.lookup "text" m + , identifierScheme = metaValueToString <$> + M.lookup "scheme" m } + handleMetaValue mv = Identifier (metaValueToString mv) Nothing + +getTitle :: Meta -> [Title] +getTitle meta = getList "title" meta handleMetaValue + where handleMetaValue (MetaMap m) = + Title{ titleText = maybe "" metaValueToString $ M.lookup "text" m + , titleFileAs = metaValueToString <$> M.lookup "file-as" m + , titleType = metaValueToString <$> M.lookup "type" m } + handleMetaValue mv = Title (metaValueToString mv) Nothing Nothing + +getCreator :: String -> Meta -> [Creator] +getCreator s meta = getList s meta handleMetaValue + where handleMetaValue (MetaMap m) = + Creator{ creatorText = maybe "" metaValueToString $ M.lookup "text" m + , creatorFileAs = metaValueToString <$> M.lookup "file-as" m + , creatorRole = metaValueToString <$> M.lookup "role" m } + handleMetaValue mv = Creator (metaValueToString mv) Nothing Nothing + +simpleList :: String -> Meta -> [String] +simpleList s meta = + case lookupMeta s meta of + Just (MetaList xs) -> map metaValueToString xs + Just x -> [metaValueToString x] + Nothing -> [] + +metadataFromMeta :: WriterOptions -> Meta -> EPUBMetadata +metadataFromMeta opts meta = EPUBMetadata{ + epubIdentifier = identifiers + , epubTitle = titles + , epubDate = date + , epubLanguage = language + , epubCreator = creators + , epubContributor = contributors + , epubSubject = subjects + , epubDescription = description + , epubType = epubtype + , epubFormat = format + , epubPublisher = publisher + , epubSource = source + , epubRelation = relation + , epubCoverage = coverage + , epubRights = rights + , epubCoverImage = coverImage + , epubStylesheet = stylesheet + } + where identifiers = getIdentifier meta + titles = getTitle meta + date = fromMaybe "" $ + (metaValueToString <$> lookupMeta "date" meta) >>= normalizeDate' + language = maybe "" metaValueToString $ + lookupMeta "language" meta `mplus` lookupMeta "lang" meta + creators = getCreator "creator" meta + contributors = getCreator "contributor" meta + subjects = simpleList "subject" meta + description = metaValueToString <$> lookupMeta "description" meta + epubtype = metaValueToString <$> lookupMeta "type" meta + format = metaValueToString <$> lookupMeta "format" meta + publisher = metaValueToString <$> lookupMeta "publisher" meta + source = metaValueToString <$> lookupMeta "source" meta + relation = metaValueToString <$> lookupMeta "relation" meta + coverage = metaValueToString <$> lookupMeta "coverage" meta + rights = metaValueToString <$> lookupMeta "rights" meta + coverImage = lookup "epub-cover-image" (writerVariables opts) `mplus` + (metaValueToString <$> lookupMeta "cover-image" meta) + stylesheet = (StylesheetContents <$> writerEpubStylesheet opts) `mplus` + ((StylesheetPath . metaValueToString) <$> + lookupMeta "stylesheet" meta) + -- | Produce an EPUB file from a Pandoc document. writeEPUB :: WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert -> IO B.ByteString writeEPUB opts doc@(Pandoc meta _) = do - let version = maybe EPUB2 id (writerEpubVersion opts) + let version = fromMaybe EPUB2 (writerEpubVersion opts) let epub3 = version == EPUB3 epochtime <- floor `fmap` getPOSIXTime let mkEntry path content = toEntry path epochtime content @@ -93,11 +321,11 @@ writeEPUB opts doc@(Pandoc meta _) = do then MathML Nothing else writerHTMLMathMethod opts , writerWrapText = False } - let mbCoverImage = lookup "epub-cover-image" vars + metadata <- getEPUBMetadata opts' meta -- cover page (cpgEntry, cpicEntry) <- - case mbCoverImage of + case epubCoverImage metadata of Nothing -> return ([],[]) Just img -> do let coverImage = "cover-image" ++ takeExtension img @@ -114,15 +342,15 @@ writeEPUB opts doc@(Pandoc meta _) = do let tpEntry = mkEntry "title_page.xhtml" tpContent -- handle pictures - picsRef <- newIORef [] - Pandoc _ blocks <- walkM - (transformInline opts' picsRef) doc - pics <- readIORef picsRef + mediaRef <- newIORef [] + Pandoc _ blocks <- walkM (transformInline opts' mediaRef) doc >>= + walkM (transformBlock opts' mediaRef) + pics <- readIORef mediaRef let readPicEntry entries (oldsrc, newsrc) = do res <- fetchItem (writerSourceURL opts') oldsrc case res of Left _ -> do - warn $ "Could not find image `" ++ oldsrc ++ "', skipping..." + warn $ "Could not find media `" ++ oldsrc ++ "', skipping..." return entries Right (img,_) -> return $ (toEntry newsrc epochtime $ B.fromChunks . (:[]) $ img) : entries @@ -179,64 +407,60 @@ writeEPUB opts doc@(Pandoc meta _) = do chapToEntry num (Chapter mbnum bs) = mkEntry (showChapter num) $ renderHtml $ writeHtml opts'{ writerNumberOffset = - maybe [] id mbnum } + fromMaybe [] mbnum } $ case bs of (Header _ _ xs : _) -> - Pandoc (setMeta "title" (fromList xs) nullMeta) bs + -- remove notes or we get doubled footnotes + Pandoc (setMeta "title" (walk removeNote $ fromList xs) + nullMeta) bs _ -> Pandoc nullMeta bs let chapterEntries = zipWith chapToEntry [1..] chapters -- incredibly inefficient (TODO): - let containsMathML ent = "<math" `isInfixOf` (B8.unpack $ fromEntry ent) + let containsMathML ent = epub3 && + "<math" `isInfixOf` (B8.unpack $ fromEntry ent) + let containsSVG ent = epub3 && + "<svg" `isInfixOf` (B8.unpack $ fromEntry ent) + let props ent = ["mathml" | containsMathML ent] ++ ["svg" | containsSVG ent] -- contents.opf - localeLang <- catch (liftM (map (\c -> if c == '_' then '-' else c) . - takeWhile (/='.')) $ getEnv "LANG") - (\e -> let _ = (e :: SomeException) in return "en-US") - let lang = case lookup "lang" (writerVariables opts') of - Just x -> x - Nothing -> localeLang - let userNodes = onlyElems $ parseXML $ writerEpubMetadata opts' - let mbIdent = findElement (QName "identifier" Nothing (Just "dc")) - $ unode "dummy" ! [] $ userNodes - uuid <- case mbIdent of - Just id' -> return $ trim $ strContent id' - Nothing -> fmap show getRandomUUID let chapterNode ent = unode "item" ! ([("id", takeBaseName $ eRelativePath ent), ("href", eRelativePath ent), ("media-type", "application/xhtml+xml")] - ++ [("properties","mathml") | epub3 && - containsMathML ent]) $ () + ++ case props ent of + [] -> [] + xs -> [("properties", unwords xs)]) + $ () let chapterRefNode ent = unode "itemref" ! [("idref", takeBaseName $ eRelativePath ent)] $ () let pictureNode ent = unode "item" ! [("id", takeBaseName $ eRelativePath ent), ("href", eRelativePath ent), ("media-type", fromMaybe "application/octet-stream" - $ imageTypeOf $ eRelativePath ent)] $ () + $ mediaTypeOf $ eRelativePath ent)] $ () let fontNode ent = unode "item" ! [("id", takeBaseName $ eRelativePath ent), ("href", eRelativePath ent), - ("media-type", maybe "" id $ getMimeType $ eRelativePath ent)] $ () - let plainify t = trimr $ - writePlain opts'{ writerStandalone = False } $ - Pandoc meta [Plain t] - let plainTitle = plainify $ docTitle meta - let plainAuthors = map plainify $ docAuthors meta + ("media-type", fromMaybe "" $ getMimeType $ eRelativePath ent)] $ () + let plainTitle = case docTitle meta of + [] -> case epubTitle metadata of + [] -> "UNTITLED" + (x:_) -> titleText x + x -> plainify x + let uuid = case epubIdentifier metadata of + (x:_) -> identifierText x -- use first identifier as UUID + [] -> error "epubIdentifier is null" -- shouldn't happen currentTime <- getCurrentTime - let plainDate = maybe (showDateTimeISO8601 currentTime) id - $ normalizeDate $ stringify $ docDate meta - let contentsData = fromStringLazy $ ppTopElement $ + let contentsData = UTF8.fromStringLazy $ ppTopElement $ unode "package" ! [("version", case version of EPUB2 -> "2.0" EPUB3 -> "3.0") ,("xmlns","http://www.idpf.org/2007/opf") - ,("unique-identifier","BookId")] $ - [ metadataElement version userNodes - uuid lang plainTitle plainAuthors plainDate currentTime mbCoverImage + ,("unique-identifier","epub-id-1")] $ + [ metadataElement version metadata currentTime , unode "manifest" $ [ unode "item" ! [("id","ncx"), ("href","toc.ncx") ,("media-type","application/x-dtbncx+xml")] $ () @@ -248,10 +472,15 @@ writeEPUB opts doc@(Pandoc meta _) = do [("properties","nav") | epub3 ]) $ () ] ++ map chapterNode (cpgEntry ++ (tpEntry : chapterEntries)) ++ - map pictureNode (cpicEntry ++ picEntries) ++ + (case cpicEntry of + [] -> [] + (x:_) -> [add_attrs + [Attr (unqual "properties") "cover-image" | epub3] + (pictureNode x)]) ++ + map pictureNode picEntries ++ map fontNode fontEntries , unode "spine" ! [("toc","ncx")] $ - case mbCoverImage of + case epubCoverImage metadata of Nothing -> [] Just _ -> [ unode "itemref" ! [("idref", "cover"),("linear","no")] $ () ] @@ -266,10 +495,11 @@ writeEPUB opts doc@(Pandoc meta _) = do map chapterRefNode chapterEntries) , unode "guide" $ [ unode "reference" ! - [("type","toc"),("title",plainTitle),("href","nav.xhtml")] $ () + [("type","toc"),("title",plainTitle), + ("href","nav.xhtml")] $ () ] ++ [ unode "reference" ! - [("type","cover"),("title","Cover"),("href","cover.xhtml")] $ () | mbCoverImage /= Nothing + [("type","cover"),("title","Cover"),("href","cover.xhtml")] $ () | epubCoverImage metadata /= Nothing ] ] let contentsEntry = mkEntry "content.opf" contentsData @@ -312,7 +542,7 @@ writeEPUB opts doc@(Pandoc meta _) = do [ unode "navLabel" $ unode "text" (plainify $ docTitle meta) , unode "content" ! [("src","title_page.xhtml")] $ () ] - let tocData = fromStringLazy $ ppTopElement $ + let tocData = UTF8.fromStringLazy $ ppTopElement $ unode "ncx" ! [("version","2005-1") ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $ [ unode "head" $ @@ -324,7 +554,7 @@ writeEPUB opts doc@(Pandoc meta _) = do ,("content", "0")] $ () , unode "meta" ! [("name","dtb:maxPageNumber") ,("content", "0")] $ () - ] ++ case mbCoverImage of + ] ++ case epubCoverImage metadata of Nothing -> [] Just _ -> [unode "meta" ! [("name","cover"), ("content","cover-image")] $ ()] @@ -344,7 +574,7 @@ writeEPUB opts doc@(Pandoc meta _) = do (_:_) -> [unode "ol" ! [("class","toc")] $ subs] let navtag = if epub3 then "nav" else "div" - let navData = fromStringLazy $ ppTopElement $ + let navData = UTF8.fromStringLazy $ ppTopElement $ unode "html" ! [("xmlns","http://www.w3.org/1999/xhtml") ,("xmlns:epub","http://www.idpf.org/2007/ops")] $ [ unode "head" $ @@ -358,10 +588,10 @@ writeEPUB opts doc@(Pandoc meta _) = do let navEntry = mkEntry "nav.xhtml" navData -- mimetype - let mimetypeEntry = mkEntry "mimetype" $ fromStringLazy "application/epub+zip" + let mimetypeEntry = mkEntry "mimetype" $ UTF8.fromStringLazy "application/epub+zip" -- container.xml - let containerData = fromStringLazy $ ppTopElement $ + let containerData = UTF8.fromStringLazy $ ppTopElement $ unode "container" ! [("version","1.0") ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $ unode "rootfiles" $ @@ -370,18 +600,19 @@ writeEPUB opts doc@(Pandoc meta _) = do let containerEntry = mkEntry "META-INF/container.xml" containerData -- com.apple.ibooks.display-options.xml - let apple = fromStringLazy $ ppTopElement $ + let apple = UTF8.fromStringLazy $ ppTopElement $ unode "display_options" $ unode "platform" ! [("name","*")] $ unode "option" ! [("name","specified-fonts")] $ "true" let appleEntry = mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple -- stylesheet - stylesheet <- case writerEpubStylesheet opts of - Just s -> return s - Nothing -> toString `fmap` + stylesheet <- case epubStylesheet metadata of + Just (StylesheetPath fp) -> UTF8.readFile fp + Just (StylesheetContents s) -> return s + Nothing -> UTF8.toString `fmap` readDataFile (writerUserDataDir opts) "epub.css" - let stylesheetEntry = mkEntry "stylesheet.css" $ fromStringLazy stylesheet + let stylesheetEntry = mkEntry "stylesheet.css" $ UTF8.fromStringLazy stylesheet -- construct archive let archive = foldr addEntryToArchive emptyArchive @@ -390,51 +621,150 @@ writeEPUB opts doc@(Pandoc meta _) = do (picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries ++ fontEntries)) return $ fromArchive archive -metadataElement :: EPUBVersion -> [Element] -> String -> String -> String -> [String] - -> String -> UTCTime -> Maybe a -> Element -metadataElement version userNodes uuid lang title authors date currentTime mbCoverImage = - let elt = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/") - ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ - filter isMetadataElement userNodes - dublinElements = ["contributor","coverage","creator","date", - "description","format","identifier","language","publisher", - "relation","rights","source","subject","title","type"] - isMetadataElement e = (qPrefix (elName e) == Just "dc" && - qName (elName e) `elem` dublinElements) || - (qPrefix (elName e) == Nothing && - qName (elName e) `elem` ["link","meta"]) - contains e n = not (null (findElements (QName n Nothing (Just "dc")) e)) - newNodes = [ unode "dc:title" title | not (elt `contains` "title") ] ++ - [ unode "dc:language" lang | not (elt `contains` "language") ] ++ - [ unode "dc:identifier" ! [("id","BookId")] $ uuid | - not (elt `contains` "identifier") ] ++ - [ unode "dc:creator" ! [("opf:role","aut") | version == EPUB2] - $ a | a <- authors, not (elt `contains` "creator") ] ++ - [ unode "dc:date" date | not (elt `contains` "date") ] ++ - [ unode "meta" ! [("property", "dcterms:modified")] $ - (showDateTimeISO8601 currentTime) | version == EPUB3] ++ - [ unode "meta" ! [("name","cover"), ("content","cover-image")] $ () | - not (isNothing mbCoverImage) ] - in elt{ elContent = elContent elt ++ map Elem newNodes } +metadataElement :: EPUBVersion -> EPUBMetadata -> UTCTime -> Element +metadataElement version md currentTime = + unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/") + ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ mdNodes + where mdNodes = identifierNodes ++ titleNodes ++ dateNodes ++ languageNodes + ++ creatorNodes ++ contributorNodes ++ subjectNodes + ++ descriptionNodes ++ typeNodes ++ formatNodes + ++ publisherNodes ++ sourceNodes ++ relationNodes + ++ coverageNodes ++ rightsNodes ++ coverImageNodes + ++ modifiedNodes + withIds base f = concat . zipWith f (map (\x -> base ++ ('-' : show x)) + ([1..] :: [Int])) + identifierNodes = withIds "epub-id" toIdentifierNode $ + epubIdentifier md + titleNodes = withIds "epub-title" toTitleNode $ epubTitle md + dateNodes = dcTag' "date" $ epubDate md + languageNodes = [dcTag "language" $ epubLanguage md] + creatorNodes = withIds "epub-creator" (toCreatorNode "creator") $ + epubCreator md + contributorNodes = withIds "epub-contributor" + (toCreatorNode "contributor") $ epubContributor md + subjectNodes = map (dcTag "subject") $ epubSubject md + descriptionNodes = maybe [] (dcTag' "description") $ epubDescription md + typeNodes = maybe [] (dcTag' "type") $ epubType md + formatNodes = maybe [] (dcTag' "format") $ epubFormat md + publisherNodes = maybe [] (dcTag' "publisher") $ epubPublisher md + sourceNodes = maybe [] (dcTag' "source") $ epubSource md + relationNodes = maybe [] (dcTag' "relation") $ epubRelation md + coverageNodes = maybe [] (dcTag' "coverage") $ epubCoverage md + rightsNodes = maybe [] (dcTag' "rights") $ epubRights md + coverImageNodes = maybe [] + (const $ [unode "meta" ! [("name","cover"), + ("content","cover-image")] $ ()]) + $ epubCoverImage md + modifiedNodes = [ unode "meta" ! [("property", "dcterms:modified")] $ + (showDateTimeISO8601 currentTime) | version == EPUB3 ] + dcTag n s = unode ("dc:" ++ n) s + dcTag' n s = [dcTag n s] + toIdentifierNode id' (Identifier txt scheme) + | version == EPUB2 = [dcNode "identifier" ! + ([("id",id')] ++ maybe [] (\x -> [("opf:scheme", x)]) scheme) $ + txt] + | otherwise = [dcNode "identifier" ! [("id",id')] $ txt] ++ + maybe [] (\x -> [unode "meta" ! + [("refines",'#':id'),("property","identifier-type"), + ("scheme","onix:codelist5")] $ x]) + (schemeToOnix `fmap` scheme) + toCreatorNode s id' creator + | version == EPUB2 = [dcNode s ! + ([("id",id')] ++ + maybe [] (\x -> [("opf:file-as",x)]) (creatorFileAs creator) ++ + maybe [] (\x -> [("opf:role",x)]) + (creatorRole creator >>= toRelator)) $ creatorText creator] + | otherwise = [dcNode s ! [("id",id')] $ creatorText creator] ++ + maybe [] (\x -> [unode "meta" ! + [("refines",'#':id'),("property","file-as")] $ x]) + (creatorFileAs creator) ++ + maybe [] (\x -> [unode "meta" ! + [("refines",'#':id'),("property","role"), + ("scheme","marc:relators")] $ x]) + (creatorRole creator >>= toRelator) + toTitleNode id' title + | version == EPUB2 = [dcNode "title" ! + ([("id",id')] ++ + maybe [] (\x -> [("opf:file-as",x)]) (titleFileAs title) ++ + maybe [] (\x -> [("opf:title-type",x)]) (titleType title)) $ + titleText title] + | otherwise = [dcNode "title" ! [("id",id')] $ titleText title] + ++ + maybe [] (\x -> [unode "meta" ! + [("refines",'#':id'),("property","file-as")] $ x]) + (titleFileAs title) ++ + maybe [] (\x -> [unode "meta" ! + [("refines",'#':id'),("property","title-type")] $ x]) + (titleType title) + schemeToOnix "ISBN-10" = "02" + schemeToOnix "GTIN-13" = "03" + schemeToOnix "UPC" = "04" + schemeToOnix "ISMN-10" = "05" + schemeToOnix "DOI" = "06" + schemeToOnix "LCCN" = "13" + schemeToOnix "GTIN-14" = "14" + schemeToOnix "ISBN-13" = "15" + schemeToOnix "Legal deposit number" = "17" + schemeToOnix "URN" = "22" + schemeToOnix "OCLC" = "23" + schemeToOnix "ISMN-13" = "25" + schemeToOnix "ISBN-A" = "26" + schemeToOnix "JP" = "27" + schemeToOnix "OLCC" = "28" + schemeToOnix _ = "01" showDateTimeISO8601 :: UTCTime -> String showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ" +transformTag :: WriterOptions + -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media + -> Tag String + -> IO (Tag String) +transformTag opts mediaRef tag@(TagOpen name attr) + | name == "video" || name == "source" || name == "img" = do + let src = fromAttrib "src" tag + let poster = fromAttrib "poster" tag + let oldsrc = maybe src (</> src) $ writerSourceURL opts + let oldposter = maybe poster (</> poster) $ writerSourceURL opts + newsrc <- modifyMediaRef mediaRef oldsrc + newposter <- modifyMediaRef mediaRef oldposter + let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++ + [("src", newsrc) | not (null newsrc)] ++ + [("poster", newposter) | not (null newposter)] + return $ TagOpen name attr' +transformTag _ _ tag = return tag + +modifyMediaRef :: IORef [(FilePath, FilePath)] -> FilePath -> IO FilePath +modifyMediaRef _ "" = return "" +modifyMediaRef mediaRef oldsrc = do + media <- readIORef mediaRef + case lookup oldsrc media of + Just n -> return n + Nothing -> do + let new = "media/file" ++ show (length media) ++ + takeExtension oldsrc + modifyIORef mediaRef ( (oldsrc, new): ) + return new + +transformBlock :: WriterOptions + -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media + -> Block + -> IO Block +transformBlock opts mediaRef (RawBlock fmt raw) + | fmt == Format "html" = do + let tags = parseTags raw + tags' <- mapM (transformTag opts mediaRef) tags + return $ RawBlock fmt (renderTags tags') +transformBlock _ _ b = return b + transformInline :: WriterOptions - -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) images + -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media -> Inline -> IO Inline -transformInline opts picsRef (Image lab (src,tit)) = do +transformInline opts mediaRef (Image lab (src,tit)) = do let src' = unEscapeString src - pics <- readIORef picsRef let oldsrc = maybe src' (</> src) $ writerSourceURL opts - let ext = takeExtension src' - newsrc <- case lookup oldsrc pics of - Just n -> return n - Nothing -> do - let new = "images/img" ++ show (length pics) ++ ext - modifyIORef picsRef ( (oldsrc, new): ) - return new + newsrc <- modifyMediaRef mediaRef oldsrc return $ Image lab (newsrc, tit) transformInline opts _ (x@(Math _ _)) | WebTeX _ <- writerHTMLMathMethod opts = do @@ -464,16 +794,12 @@ ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . Nothing -> '&':'#':unEntity xs unEntity (x:xs) = x : unEntity xs -imageTypeOf :: FilePath -> Maybe String -imageTypeOf x = case drop 1 (map toLower (takeExtension x)) of - "jpg" -> Just "image/jpeg" - "jpeg" -> Just "image/jpeg" - "jfif" -> Just "image/jpeg" - "png" -> Just "image/png" - "gif" -> Just "image/gif" - "svg" -> Just "image/svg+xml" - _ -> Nothing - +mediaTypeOf :: FilePath -> Maybe String +mediaTypeOf x = case getMimeType x of + Just y@('i':'m':'a':'g':'e':_) -> Just y + Just y@('v':'i':'d':'e':'o':_) -> Just y + Just y@('a':'u':'d':'i':'o':_) -> Just y + _ -> Nothing data IdentState = IdentState{ chapterNumber :: Int, @@ -527,3 +853,287 @@ replaceRefs refTable = walk replaceOneRef Just url -> Link lab (url,tit) Nothing -> x replaceOneRef x = x + +-- Variant of normalizeDate that allows partial dates: YYYY, YYYY-MM +normalizeDate' :: String -> Maybe String +normalizeDate' xs = + let xs' = trim xs in + case xs' of + [y1,y2,y3,y4] | all isDigit [y1,y2,y3,y4] -> Just xs' -- YYYY + [y1,y2,y3,y4,'-',m1,m2] | all isDigit [y1,y2,y3,y4,m1,m2] -- YYYY-MM + -> Just xs' + _ -> normalizeDate xs' + +toRelator :: String -> Maybe String +toRelator x + | x `elem` relators = Just x + | otherwise = lookup (map toLower x) relatorMap + +relators :: [String] +relators = map snd relatorMap + +relatorMap :: [(String, String)] +relatorMap = + [("abridger", "abr") + ,("actor", "act") + ,("adapter", "adp") + ,("addressee", "rcp") + ,("analyst", "anl") + ,("animator", "anm") + ,("annotator", "ann") + ,("appellant", "apl") + ,("appellee", "ape") + ,("applicant", "app") + ,("architect", "arc") + ,("arranger", "arr") + ,("art copyist", "acp") + ,("art director", "adi") + ,("artist", "art") + ,("artistic director", "ard") + ,("assignee", "asg") + ,("associated name", "asn") + ,("attributed name", "att") + ,("auctioneer", "auc") + ,("author", "aut") + ,("author in quotations or text abstracts", "aqt") + ,("author of afterword, colophon, etc.", "aft") + ,("author of dialog", "aud") + ,("author of introduction, etc.", "aui") + ,("autographer", "ato") + ,("bibliographic antecedent", "ant") + ,("binder", "bnd") + ,("binding designer", "bdd") + ,("blurb writer", "blw") + ,("book designer", "bkd") + ,("book producer", "bkp") + ,("bookjacket designer", "bjd") + ,("bookplate designer", "bpd") + ,("bookseller", "bsl") + ,("braille embosser", "brl") + ,("broadcaster", "brd") + ,("calligrapher", "cll") + ,("cartographer", "ctg") + ,("caster", "cas") + ,("censor", "cns") + ,("choreographer", "chr") + ,("cinematographer", "cng") + ,("client", "cli") + ,("collection registrar", "cor") + ,("collector", "col") + ,("collotyper", "clt") + ,("colorist", "clr") + ,("commentator", "cmm") + ,("commentator for written text", "cwt") + ,("compiler", "com") + ,("complainant", "cpl") + ,("complainant-appellant", "cpt") + ,("complainant-appellee", "cpe") + ,("composer", "cmp") + ,("compositor", "cmt") + ,("conceptor", "ccp") + ,("conductor", "cnd") + ,("conservator", "con") + ,("consultant", "csl") + ,("consultant to a project", "csp") + ,("contestant", "cos") + ,("contestant-appellant", "cot") + ,("contestant-appellee", "coe") + ,("contestee", "cts") + ,("contestee-appellant", "ctt") + ,("contestee-appellee", "cte") + ,("contractor", "ctr") + ,("contributor", "ctb") + ,("copyright claimant", "cpc") + ,("copyright holder", "cph") + ,("corrector", "crr") + ,("correspondent", "crp") + ,("costume designer", "cst") + ,("court governed", "cou") + ,("court reporter", "crt") + ,("cover designer", "cov") + ,("creator", "cre") + ,("curator", "cur") + ,("dancer", "dnc") + ,("data contributor", "dtc") + ,("data manager", "dtm") + ,("dedicatee", "dte") + ,("dedicator", "dto") + ,("defendant", "dfd") + ,("defendant-appellant", "dft") + ,("defendant-appellee", "dfe") + ,("degree granting institution", "dgg") + ,("delineator", "dln") + ,("depicted", "dpc") + ,("depositor", "dpt") + ,("designer", "dsr") + ,("director", "drt") + ,("dissertant", "dis") + ,("distribution place", "dbp") + ,("distributor", "dst") + ,("donor", "dnr") + ,("draftsman", "drm") + ,("dubious author", "dub") + ,("editor", "edt") + ,("editor of compilation", "edc") + ,("editor of moving image work", "edm") + ,("electrician", "elg") + ,("electrotyper", "elt") + ,("enacting jurisdiction", "enj") + ,("engineer", "eng") + ,("engraver", "egr") + ,("etcher", "etr") + ,("event place", "evp") + ,("expert", "exp") + ,("facsimilist", "fac") + ,("field director", "fld") + ,("film director", "fmd") + ,("film distributor", "fds") + ,("film editor", "flm") + ,("film producer", "fmp") + ,("filmmaker", "fmk") + ,("first party", "fpy") + ,("forger", "frg") + ,("former owner", "fmo") + ,("funder", "fnd") + ,("geographic information specialist", "gis") + ,("honoree", "hnr") + ,("host", "hst") + ,("host institution", "his") + ,("illuminator", "ilu") + ,("illustrator", "ill") + ,("inscriber", "ins") + ,("instrumentalist", "itr") + ,("interviewee", "ive") + ,("interviewer", "ivr") + ,("inventor", "inv") + ,("issuing body", "isb") + ,("judge", "jud") + ,("jurisdiction governed", "jug") + ,("laboratory", "lbr") + ,("laboratory director", "ldr") + ,("landscape architect", "lsa") + ,("lead", "led") + ,("lender", "len") + ,("libelant", "lil") + ,("libelant-appellant", "lit") + ,("libelant-appellee", "lie") + ,("libelee", "lel") + ,("libelee-appellant", "let") + ,("libelee-appellee", "lee") + ,("librettist", "lbt") + ,("licensee", "lse") + ,("licensor", "lso") + ,("lighting designer", "lgd") + ,("lithographer", "ltg") + ,("lyricist", "lyr") + ,("manufacture place", "mfp") + ,("manufacturer", "mfr") + ,("marbler", "mrb") + ,("markup editor", "mrk") + ,("metadata contact", "mdc") + ,("metal-engraver", "mte") + ,("moderator", "mod") + ,("monitor", "mon") + ,("music copyist", "mcp") + ,("musical director", "msd") + ,("musician", "mus") + ,("narrator", "nrt") + ,("onscreen presenter", "osp") + ,("opponent", "opn") + ,("organizer of meeting", "orm") + ,("originator", "org") + ,("other", "oth") + ,("owner", "own") + ,("panelist", "pan") + ,("papermaker", "ppm") + ,("patent applicant", "pta") + ,("patent holder", "pth") + ,("patron", "pat") + ,("performer", "prf") + ,("permitting agency", "pma") + ,("photographer", "pht") + ,("plaintiff", "ptf") + ,("plaintiff-appellant", "ptt") + ,("plaintiff-appellee", "pte") + ,("platemaker", "plt") + ,("praeses", "pra") + ,("presenter", "pre") + ,("printer", "prt") + ,("printer of plates", "pop") + ,("printmaker", "prm") + ,("process contact", "prc") + ,("producer", "pro") + ,("production company", "prn") + ,("production designer", "prs") + ,("production manager", "pmn") + ,("production personnel", "prd") + ,("production place", "prp") + ,("programmer", "prg") + ,("project director", "pdr") + ,("proofreader", "pfr") + ,("provider", "prv") + ,("publication place", "pup") + ,("publisher", "pbl") + ,("publishing director", "pbd") + ,("puppeteer", "ppt") + ,("radio director", "rdd") + ,("radio producer", "rpc") + ,("recording engineer", "rce") + ,("recordist", "rcd") + ,("redaktor", "red") + ,("renderer", "ren") + ,("reporter", "rpt") + ,("repository", "rps") + ,("research team head", "rth") + ,("research team member", "rtm") + ,("researcher", "res") + ,("respondent", "rsp") + ,("respondent-appellant", "rst") + ,("respondent-appellee", "rse") + ,("responsible party", "rpy") + ,("restager", "rsg") + ,("restorationist", "rsr") + ,("reviewer", "rev") + ,("rubricator", "rbr") + ,("scenarist", "sce") + ,("scientific advisor", "sad") + ,("screenwriter", "aus") + ,("scribe", "scr") + ,("sculptor", "scl") + ,("second party", "spy") + ,("secretary", "sec") + ,("seller", "sll") + ,("set designer", "std") + ,("setting", "stg") + ,("signer", "sgn") + ,("singer", "sng") + ,("sound designer", "sds") + ,("speaker", "spk") + ,("sponsor", "spn") + ,("stage director", "sgd") + ,("stage manager", "stm") + ,("standards body", "stn") + ,("stereotyper", "str") + ,("storyteller", "stl") + ,("supporting host", "sht") + ,("surveyor", "srv") + ,("teacher", "tch") + ,("technical director", "tcd") + ,("television director", "tld") + ,("television producer", "tlp") + ,("thesis advisor", "ths") + ,("transcriber", "trc") + ,("translator", "trl") + ,("type designer", "tyd") + ,("typographer", "tyg") + ,("university place", "uvp") + ,("videographer", "vdg") + ,("witness", "wit") + ,("wood engraver", "wde") + ,("woodcutter", "wdc") + ,("writer of accompanying material", "wam") + ,("writer of added commentary", "wac") + ,("writer of added lyrics", "wal") + ,("writer of added text", "wat") + ] + diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index adbe948be..803617f95 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -44,7 +44,7 @@ import qualified Text.XML.Light.Cursor as XC import Text.Pandoc.Definition import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..), def) -import Text.Pandoc.Shared (orderedListMarkers) +import Text.Pandoc.Shared (orderedListMarkers, isHeaderBlock) import Text.Pandoc.Walk -- | Data to be written at the end of the document: @@ -157,9 +157,7 @@ renderSection level (ttl, body) = do else cMapM blockToXml body return $ el "section" (title ++ content) where - hasSubsections = any isHeader - isHeader (Header _ _ _) = True - isHeader _ = False + hasSubsections = any isHeaderBlock -- | Only <p> and <empty-line> are allowed within <title> in FB2. formatTitle :: [Inline] -> [Content] diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 641652276..e0385af25 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -45,7 +45,7 @@ import Numeric ( showHex ) import Data.Char ( ord, toLower ) import Data.List ( isPrefixOf, intersperse ) import Data.String ( fromString ) -import Data.Maybe ( catMaybes ) +import Data.Maybe ( catMaybes, fromMaybe ) import Control.Monad.State import Text.Blaze.Html hiding(contents) import Text.Blaze.Internal(preEscapedString) @@ -118,7 +118,7 @@ pandocToHtml opts (Pandoc meta blocks) = do let stringifyHTML = escapeStringForXML . stringify let authsMeta = map stringifyHTML $ docAuthors meta let dateMeta = stringifyHTML $ docDate meta - let slideLevel = maybe (getSlideLevel blocks) id $ writerSlideLevel opts + let slideLevel = fromMaybe (getSlideLevel blocks) $ writerSlideLevel opts let sects = hierarchicalize $ if writerSlideVariant opts == NoSlides then blocks @@ -475,28 +475,22 @@ blockToHtml opts (BlockQuote blocks) = else do contents <- blockListToHtml opts blocks return $ H.blockquote $ nl opts >> contents >> nl opts -blockToHtml opts (Header level (ident,_,_) lst) = do +blockToHtml opts (Header level (_,classes,_) lst) = do contents <- inlineListToHtml opts lst secnum <- liftM stSecNum get let contents' = if writerNumberSections opts && not (null secnum) + && "unnumbered" `notElem` classes then (H.span ! A.class_ "header-section-number" $ toHtml $ showSecNum secnum) >> strToHtml " " >> contents else contents - let revealSlash = ['/' | writerSlideVariant opts == RevealJsSlides] - let contents'' = if writerTableOfContents opts && not (null ident) - then H.a ! A.href (toValue $ - '#' : revealSlash ++ - writerIdentifierPrefix opts ++ - ident) $ contents' - else contents' return $ case level of - 1 -> H.h1 contents'' - 2 -> H.h2 contents'' - 3 -> H.h3 contents'' - 4 -> H.h4 contents'' - 5 -> H.h5 contents'' - 6 -> H.h6 contents'' - _ -> H.p contents'' + 1 -> H.h1 contents' + 2 -> H.h2 contents' + 3 -> H.h3 contents' + 4 -> H.h4 contents' + 5 -> H.h5 contents' + 6 -> H.h6 contents' + _ -> H.p contents' blockToHtml opts (BulletList lst) = do contents <- mapM (blockListToHtml opts) lst return $ unordList opts contents @@ -524,7 +518,7 @@ blockToHtml opts (DefinitionList lst) = do contents <- mapM (\(term, defs) -> do term' <- if null term then return mempty - else liftM (H.dt) $ inlineListToHtml opts term + else liftM H.dt $ inlineListToHtml opts term defs' <- mapM ((liftM (\x -> H.dd $ (x >> nl opts))) . blockListToHtml opts) defs return $ mconcat $ nl opts : term' : nl opts : @@ -539,11 +533,16 @@ blockToHtml opts (Table capt aligns widths headers rows') = do let percent w = show (truncate (100*w) :: Integer) ++ "%" let coltags = if all (== 0.0) widths then mempty - else mconcat $ map (\w -> - if writerHtml5 opts - then H.col ! A.style (toValue $ "width: " ++ percent w) - else H.col ! A.width (toValue $ percent w) >> nl opts) - widths + else do + H.colgroup $ do + nl opts + mapM_ (\w -> do + if writerHtml5 opts + then H.col ! A.style (toValue $ "width: " ++ + percent w) + else H.col ! A.width (toValue $ percent w) + nl opts) widths + nl opts head' <- if all null headers then return mempty else do @@ -756,7 +755,9 @@ inlineToHtml opts inline = else [A.title $ toValue tit]) return $ foldl (!) H5.embed attributes -- note: null title included, as in Markdown.pl - (Note contents) -> do + (Note contents) + | writerIgnoreNotes opts -> return mempty + | otherwise -> do st <- get let notes = stNotes st let number = (length notes) + 1 diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs new file mode 100644 index 000000000..19d486b25 --- /dev/null +++ b/src/Text/Pandoc/Writers/ICML.hs @@ -0,0 +1,525 @@ +{-# LANGUAGE OverloadedStrings #-} + +{- | + Module : Text.Pandoc.Writers.ICML + Copyright : Copyright (C) 2013 github.com/mb21 + License : GNU GPL, version 2 or above + + Stability : alpha + +Conversion of 'Pandoc' documents to Adobe InCopy ICML, a stand-alone XML format +which is a subset of the zipped IDML format for which the documentation is +available here: http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/indesign/sdk/cs6/idml/idml-specification.pdf +InCopy is the companion word-processor to Adobe InDesign and ICML documents can be integrated +into InDesign with File -> Place. +-} +module Text.Pandoc.Writers.ICML (writeICML) where +import Text.Pandoc.Definition +import Text.Pandoc.XML +import Text.Pandoc.Writers.Shared +import Text.Pandoc.Shared (splitBy) +import Text.Pandoc.Options +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Pretty +import Data.List (isPrefixOf, isInfixOf, stripPrefix) +import Data.Text as Text (breakOnAll, pack) +import Data.Monoid (mappend) +import Control.Monad.State +import qualified Data.Set as Set + +type Style = [String] +type Hyperlink = [(Int, String)] + +data WriterState = WriterState{ + blockStyles :: Set.Set String + , inlineStyles :: Set.Set String + , links :: Hyperlink + , listDepth :: Int + , maxListDepth :: Int + } + +type WS a = State WriterState a + +defaultWriterState :: WriterState +defaultWriterState = WriterState{ + blockStyles = Set.empty + , inlineStyles = Set.empty + , links = [] + , listDepth = 1 + , maxListDepth = 0 + } + +-- inline names (appear in InDesign's character styles pane) +emphName :: String +strongName :: String +strikeoutName :: String +superscriptName :: String +subscriptName :: String +smallCapsName :: String +codeName :: String +linkName :: String +emphName = "Italic" +strongName = "Bold" +strikeoutName = "Strikeout" +superscriptName = "Superscript" +subscriptName = "Subscript" +smallCapsName = "SmallCaps" +codeName = "Code" +linkName = "Link" + +-- block element names (appear in InDesign's paragraph styles pane) +paragraphName :: String +codeBlockName :: String +rawBlockName :: String +blockQuoteName :: String +orderedListName :: String +bulletListName :: String +defListTermName :: String +defListDefName :: String +headerName :: String +tableName :: String +tableHeaderName :: String +tableCaptionName :: String +alignLeftName :: String +alignRightName :: String +alignCenterName :: String +firstListItemName :: String +beginsWithName :: String +lowerRomanName :: String +upperRomanName :: String +lowerAlphaName :: String +upperAlphaName :: String +subListParName :: String +footnoteName :: String +paragraphName = "Paragraph" +codeBlockName = "CodeBlock" +rawBlockName = "Rawblock" +blockQuoteName = "Blockquote" +orderedListName = "NumList" +bulletListName = "BulList" +defListTermName = "DefListTerm" +defListDefName = "DefListDef" +headerName = "Header" +tableName = "TablePar" +tableHeaderName = "TableHeader" +tableCaptionName = "TableCaption" +alignLeftName = "LeftAlign" +alignRightName = "RightAlign" +alignCenterName = "CenterAlign" +firstListItemName = "first" +beginsWithName = "beginsWith-" +lowerRomanName = "lowerRoman" +upperRomanName = "upperRoman" +lowerAlphaName = "lowerAlpha" +upperAlphaName = "upperAlpha" +subListParName = "subParagraph" +footnoteName = "Footnote" + + +-- | Convert Pandoc document to string in ICML format. +writeICML :: WriterOptions -> Pandoc -> String +writeICML opts (Pandoc meta blocks) = + let colwidth = if writerWrapText opts + then Just $ writerColumns opts + else Nothing + render' = render colwidth + renderMeta f s = Just $ render' $ fst $ runState (f opts [] s) defaultWriterState + Just metadata = metaToJSON opts + (renderMeta blocksToICML) + (renderMeta inlinesToICML) + meta + (doc, st) = runState (blocksToICML opts [] blocks) defaultWriterState + main = render' doc + context = defField "body" main + $ defField "charStyles" (render' $ charStylesToDoc st) + $ defField "parStyles" (render' $ parStylesToDoc st) + $ defField "hyperlinks" (render' $ hyperlinksToDoc $ links st) + $ metadata + in if writerStandalone opts + then renderTemplate' (writerTemplate opts) context + else main + +-- | Auxilary functions for parStylesToDoc and charStylesToDoc. +contains :: String -> (String, (String, String)) -> [(String, String)] +contains s rule = + if isInfixOf (fst rule) s + then [snd rule] + else [] + +-- | The monospaced font to use as default. +monospacedFont :: Doc +monospacedFont = inTags False "AppliedFont" [("type", "string")] $ text "Courier New" + +-- | How much to indent blockquotes etc. +defaultIndent :: Int +defaultIndent = 20 + +-- | How much to indent numbered lists before the number. +defaultListIndent :: Int +defaultListIndent = 10 + +-- other constants +lineSeparator :: String +lineSeparator = "
" + +-- | Convert a WriterState with its block styles to the ICML listing of Paragraph Styles. +parStylesToDoc :: WriterState -> Doc +parStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ blockStyles st + where + makeStyle s = + let countSubStrs sub str = length $ Text.breakOnAll (Text.pack sub) (Text.pack str) + attrs = concat $ map (contains s) $ [ + (defListTermName, ("BulletsAndNumberingListType", "BulletList")) + , (defListTermName, ("FontStyle", "Bold")) + , (tableHeaderName, ("FontStyle", "Bold")) + , (alignLeftName, ("Justification", "LeftAlign")) + , (alignRightName, ("Justification", "RightAlign")) + , (alignCenterName, ("Justification", "CenterAlign")) + , (headerName++"1", ("PointSize", "36")) + , (headerName++"2", ("PointSize", "30")) + , (headerName++"3", ("PointSize", "24")) + , (headerName++"4", ("PointSize", "18")) + , (headerName++"5", ("PointSize", "14")) + ] + -- what is the most nested list type, if any? + (isBulletList, isOrderedList) = findList $ reverse $ splitBy (==' ') s + where + findList [] = (False, False) + findList (x:xs) | x == bulletListName = (True, False) + | x == orderedListName = (False, True) + | otherwise = findList xs + nBuls = countSubStrs bulletListName s + nOrds = countSubStrs orderedListName s + attrs' = numbering ++ listType ++ indent ++ attrs + where + numbering | isOrderedList = [("NumberingExpression", "^#.^t"), ("NumberingLevel", show nOrds)] + | otherwise = [] + listType | isOrderedList && (not $ isInfixOf subListParName s) + = [("BulletsAndNumberingListType", "NumberedList")] + | isBulletList && (not $ isInfixOf subListParName s) + = [("BulletsAndNumberingListType", "BulletList")] + | otherwise = [] + indent = [("LeftIndent", show indt)] + where + nBlockQuotes = countSubStrs blockQuoteName s + nDefLists = countSubStrs defListDefName s + indt = max 0 $ defaultListIndent*(nBuls + nOrds - 1) + defaultIndent*(nBlockQuotes + nDefLists) + props = inTags True "Properties" [] $ (basedOn $$ tabList $$ numbForm) + where + font = if isInfixOf codeBlockName s + then monospacedFont + else empty + basedOn = inTags False "BasedOn" [("type", "object")] (text "$ID/NormalParagraphStyle") $$ font + tabList = if isBulletList + then inTags True "TabList" [("type","list")] $ inTags True "ListItem" [("type","record")] + $ vcat [ + inTags False "Alignment" [("type","enumeration")] $ text "LeftAlign" + , inTags False "AlignmentCharacter" [("type","string")] $ text "." + , selfClosingTag "Leader" [("type","string")] + , inTags False "Position" [("type","unit")] $ text + $ show $ defaultListIndent * (nBuls + nOrds) + ] + else empty + makeNumb name = inTags False "NumberingFormat" [("type", "string")] (text name) + numbForm | isInfixOf lowerRomanName s = makeNumb "i, ii, iii, iv..." + | isInfixOf upperRomanName s = makeNumb "I, II, III, IV..." + | isInfixOf lowerAlphaName s = makeNumb "a, b, c, d..." + | isInfixOf upperAlphaName s = makeNumb "A, B, C, D..." + | otherwise = empty + in inTags True "ParagraphStyle" ([("Self", "ParagraphStyle/"++s), ("Name", s)] ++ attrs') props + +-- | Convert a WriterState with its inline styles to the ICML listing of Character Styles. +charStylesToDoc :: WriterState -> Doc +charStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ inlineStyles st + where + makeStyle s = + let attrs = concat $ map (contains s) [ + (strikeoutName, ("StrikeThru", "true")) + , (superscriptName, ("Position", "Superscript")) + , (subscriptName, ("Position", "Subscript")) + , (smallCapsName, ("Capitalization", "SmallCaps")) + ] + attrs' | isInfixOf emphName s && isInfixOf strongName s = ("FontStyle", "Bold Italic") : attrs + | isInfixOf strongName s = ("FontStyle", "Bold") : attrs + | isInfixOf emphName s = ("FontStyle", "Italic") : attrs + | otherwise = attrs + props = inTags True "Properties" [] $ + inTags False "BasedOn" [("type", "object")] (text "$ID/NormalCharacterStyle") $$ font + where + font = + if isInfixOf codeName s + then monospacedFont + else empty + in inTags True "CharacterStyle" ([("Self", "CharacterStyle/"++s), ("Name", s)] ++ attrs') props + +-- | Convert a list of (identifier, url) pairs to the ICML listing of hyperlinks. +hyperlinksToDoc :: Hyperlink -> Doc +hyperlinksToDoc [] = empty +hyperlinksToDoc (x:xs) = hyp x $$ hyperlinksToDoc xs + where + hyp (ident, url) = hdest $$ hlink + where + hdest = selfClosingTag "HyperlinkURLDestination" + [("Self", "HyperlinkURLDestination/"++url), ("Name","link"), ("DestinationURL",url), ("DestinationUniqueKey","1")] + hlink = inTags True "Hyperlink" [("Self","uf-"++show ident), ("Name",url), + ("Source","htss-"++show ident), ("Visible","true"), ("DestinationUniqueKey","1")] + $ inTags True "Properties" [] + $ inTags False "BorderColor" [("type","enumeration")] (text "Black") + $$ (inTags False "Destination" [("type","object")] + $ text $ "HyperlinkURLDestination/"++(escapeStringForXML url)) + + +-- | Convert a list of Pandoc blocks to ICML. +blocksToICML :: WriterOptions -> Style -> [Block] -> WS Doc +blocksToICML opts style lst = vcat `fmap` mapM (blockToICML opts style) lst + +-- | Convert a Pandoc block element to ICML. +blockToICML :: WriterOptions -> Style -> Block -> WS Doc +blockToICML opts style (Plain lst) = parStyle opts style lst +blockToICML opts style (Para lst) = parStyle opts (paragraphName:style) lst +blockToICML opts style (CodeBlock _ str) = parStyle opts (codeBlockName:style) $ [Str str] +blockToICML opts style (RawBlock _ str) = parStyle opts (rawBlockName:style) $ [Str str] +blockToICML opts style (BlockQuote blocks) = blocksToICML opts (blockQuoteName:style) blocks +blockToICML opts style (OrderedList attribs lst) = listItemsToICML opts orderedListName style (Just attribs) lst +blockToICML opts style (BulletList lst) = listItemsToICML opts bulletListName style Nothing lst +blockToICML opts style (DefinitionList lst) = vcat `fmap` mapM (definitionListItemToICML opts style) lst +blockToICML opts style (Header lvl _ lst) = + let stl = (headerName ++ show lvl):style + in parStyle opts stl lst +blockToICML _ _ HorizontalRule = return empty -- we could insert a page break instead +blockToICML opts style (Table caption aligns widths headers rows) = + let style' = tableName : style + noHeader = all null headers + nrHeaders = if noHeader + then "0" + else "1" + nrRows = length rows + nrCols = if null rows + then 0 + else length $ head rows + rowsToICML [] _ = return empty + rowsToICML (col:rest) rowNr = + liftM2 ($$) (colsToICML col rowNr (0::Int)) $ rowsToICML rest (rowNr+1) + colsToICML [] _ _ = return empty + colsToICML (cell:rest) rowNr colNr = do + let stl = if rowNr == 0 && not noHeader + then tableHeaderName:style' + else style' + alig = aligns !! colNr + stl' | alig == AlignLeft = alignLeftName : stl + | alig == AlignRight = alignRightName : stl + | alig == AlignCenter = alignCenterName : stl + | otherwise = stl + c <- blocksToICML opts stl' cell + let cl = return $ inTags True "Cell" + [("Name", show colNr ++":"++ show rowNr), ("AppliedCellStyle","CellStyle/Cell")] c + liftM2 ($$) cl $ colsToICML rest rowNr (colNr+1) + in do + let tabl = if noHeader + then rows + else headers:rows + cells <- rowsToICML tabl (0::Int) + let colWidths w = if w > 0 + then [("SingleColumnWidth",show $ 500 * w)] + else [] + let tupToDoc tup = selfClosingTag "Column" $ [("Name",show $ fst tup)] ++ (colWidths $ snd tup) + let colDescs = vcat $ map tupToDoc $ zip [0..nrCols-1] widths + let tableDoc = return $ inTags True "Table" [ + ("AppliedTableStyle","TableStyle/Table") + , ("HeaderRowCount", nrHeaders) + , ("BodyRowCount", show nrRows) + , ("ColumnCount", show nrCols) + ] (colDescs $$ cells) + liftM2 ($$) tableDoc $ parStyle opts (tableCaptionName:style) caption +blockToICML opts style (Div _ lst) = blocksToICML opts style lst +blockToICML _ _ Null = return empty + +-- | Convert a list of lists of blocks to ICML list items. +listItemsToICML :: WriterOptions -> String -> Style -> Maybe ListAttributes -> [[Block]] -> WS Doc +listItemsToICML _ _ _ _ [] = return empty +listItemsToICML opts listType style attribs (first:rest) = do + st <- get + put st{ listDepth = 1 + listDepth st} + let stl = listType:style + let f = listItemToICML opts stl True attribs first + let r = map (listItemToICML opts stl False attribs) rest + docs <- sequence $ f:r + s <- get + let maxD = max (maxListDepth s) (listDepth s) + put s{ listDepth = 1, maxListDepth = maxD } + return $ vcat docs + +-- | Convert a list of blocks to ICML list items. +listItemToICML :: WriterOptions -> Style -> Bool-> Maybe ListAttributes -> [Block] -> WS Doc +listItemToICML opts style isFirst attribs item = + let makeNumbStart (Just (beginsWith, numbStl, _)) = + let doN DefaultStyle = [] + doN LowerRoman = [lowerRomanName] + doN UpperRoman = [upperRomanName] + doN LowerAlpha = [lowerAlphaName] + doN UpperAlpha = [upperAlphaName] + doN _ = [] + bw = if beginsWith > 1 + then [beginsWithName ++ show beginsWith] + else [] + in doN numbStl ++ bw + makeNumbStart Nothing = [] + stl = if isFirst + then firstListItemName:style + else style + stl' = makeNumbStart attribs ++ stl + in if length item > 1 + then do + let insertTab (Para lst) = blockToICML opts (subListParName:style) $ Para $ (Str "\t"):lst + insertTab block = blockToICML opts style block + f <- blockToICML opts stl' $ head item + r <- fmap vcat $ mapM insertTab $ tail item + return $ f $$ r + else blocksToICML opts stl' item + +definitionListItemToICML :: WriterOptions -> Style -> ([Inline],[[Block]]) -> WS Doc +definitionListItemToICML opts style (term,defs) = do + term' <- parStyle opts (defListTermName:style) term + defs' <- vcat `fmap` mapM (blocksToICML opts (defListDefName:style)) defs + return $ term' $$ defs' + + +-- | Convert a list of inline elements to ICML. +inlinesToICML :: WriterOptions -> Style -> [Inline] -> WS Doc +inlinesToICML opts style lst = vcat `fmap` mapM (inlineToICML opts style) (mergeSpaces lst) + +-- | Convert an inline element to ICML. +inlineToICML :: WriterOptions -> Style -> Inline -> WS Doc +inlineToICML _ style (Str str) = charStyle style $ text $ escapeStringForXML str +inlineToICML opts style (Emph lst) = inlinesToICML opts (emphName:style) lst +inlineToICML opts style (Strong lst) = inlinesToICML opts (strongName:style) lst +inlineToICML opts style (Strikeout lst) = inlinesToICML opts (strikeoutName:style) lst +inlineToICML opts style (Superscript lst) = inlinesToICML opts (superscriptName:style) lst +inlineToICML opts style (Subscript lst) = inlinesToICML opts (subscriptName:style) lst +inlineToICML opts style (SmallCaps lst) = inlinesToICML opts (smallCapsName:style) lst +inlineToICML opts style (Quoted SingleQuote lst) = inlinesToICML opts style $ [Str "‘"] ++ lst ++ [Str "’"] +inlineToICML opts style (Quoted DoubleQuote lst) = inlinesToICML opts style $ [Str "“"] ++ lst ++ [Str "”"] +inlineToICML opts style (Cite _ lst) = footnoteToICML opts style [Para lst] +inlineToICML _ style (Code _ str) = charStyle (codeName:style) $ text $ escapeStringForXML str +inlineToICML _ style Space = charStyle style space +inlineToICML _ style LineBreak = charStyle style $ text lineSeparator +inlineToICML _ style (Math _ str) = charStyle style $ text $ escapeStringForXML str --InDesign doesn't really do math +inlineToICML _ style (RawInline _ str) = charStyle style $ text $ escapeStringForXML str +inlineToICML opts style (Link lst (url, title)) = do + content <- inlinesToICML opts (linkName:style) lst + state $ \st -> + let ident = if null $ links st + then 1::Int + else 1 + (fst $ head $ links st) + newst = st{ links = (ident, url):(links st) } + cont = inTags True "HyperlinkTextSource" + [("Self","htss-"++show ident), ("Name",title), ("Hidden","false")] content + in (cont, newst) +inlineToICML opts style (Image alt target) = imageICML opts style alt target +inlineToICML opts style (Note lst) = footnoteToICML opts style lst +inlineToICML opts style (Span _ lst) = inlinesToICML opts style lst + +-- | Convert a list of block elements to an ICML footnote. +footnoteToICML :: WriterOptions -> Style -> [Block] -> WS Doc +footnoteToICML opts style lst = + let insertTab (Para ls) = blockToICML opts (footnoteName:style) $ Para $ (Str "\t"):ls + insertTab block = blockToICML opts (footnoteName:style) block + in do + contents <- mapM insertTab lst + let number = inTags True "ParagraphStyleRange" [] $ + inTags True "CharacterStyleRange" [] $ inTagsSimple "Content" "<?ACE 4?>" + return $ inTags True "CharacterStyleRange" + [("AppliedCharacterStyle","$ID/NormalCharacterStyle"), ("Position","Superscript")] + $ inTags True "Footnote" [] $ number $$ vcat contents + +-- | Auxiliary function to merge Space elements into the adjacent Strs. +mergeSpaces :: [Inline] -> [Inline] +mergeSpaces ((Str s):(Space:((Str s'):xs))) = mergeSpaces $ Str(s++" "++s') : xs +mergeSpaces (Space:((Str s):xs)) = mergeSpaces $ Str (" "++s) : xs +mergeSpaces ((Str s):(Space:xs)) = mergeSpaces $ Str (s++" ") : xs +mergeSpaces (x:xs) = x : (mergeSpaces xs) +mergeSpaces [] = [] + +-- | Wrap a list of inline elements in an ICML Paragraph Style +parStyle :: WriterOptions -> Style -> [Inline] -> WS Doc +parStyle opts style lst = + let slipIn x y = if null y + then x + else x ++ " > " ++ y + stlStr = foldr slipIn [] $ reverse style + stl = if null stlStr + then "" + else "ParagraphStyle/" ++ stlStr + attrs = ("AppliedParagraphStyle", stl) + attrs' = if firstListItemName `elem` style + then let ats = attrs : [("NumberingContinue", "false")] + begins = filter (isPrefixOf beginsWithName) style + in if null begins + then ats + else let i = maybe "" id $ stripPrefix beginsWithName $ head begins + in ("NumberingStartAt", i) : ats + else [attrs] + in do + content <- inlinesToICML opts [] lst + let cont = inTags True "ParagraphStyleRange" attrs' + $ mappend content $ selfClosingTag "Br" [] + state $ \st -> (cont, st{ blockStyles = Set.insert stlStr $ blockStyles st }) + +-- | Wrap a Doc in an ICML Character Style. +charStyle :: Style -> Doc -> WS Doc +charStyle style content = + let (stlStr, attrs) = styleToStrAttr style + doc = inTags True "CharacterStyleRange" attrs $ inTagsSimple "Content" $ flush content + in do + state $ \st -> + let styles = if null stlStr + then st + else st{ inlineStyles = Set.insert stlStr $ inlineStyles st } + in (doc, styles) + +-- | Transform a Style to a tuple of String (eliminating duplicates and ordered) and corresponding attribute. +styleToStrAttr :: Style -> (String, [(String, String)]) +styleToStrAttr style = + let stlStr = unwords $ Set.toAscList $ Set.fromList style + stl = if null style + then "$ID/NormalCharacterStyle" + else "CharacterStyle/" ++ stlStr + attrs = [("AppliedCharacterStyle", stl)] + in (stlStr, attrs) + +-- | Assemble an ICML Image. +imageICML :: WriterOptions -> Style -> [Inline] -> Target -> WS Doc +imageICML _ style _ (linkURI, _) = + let imgWidth = 300::Int --TODO: set width, height dynamically as in Docx.hs + imgHeight = 200::Int + scaleFact = show (1::Double) --TODO: set scaling factor so image is scaled exactly to imgWidth x imgHeight + hw = show $ imgWidth `div` 2 + hh = show $ imgHeight `div` 2 + qw = show $ imgWidth `div` 4 + qh = show $ imgHeight `div` 4 + (stlStr, attrs) = styleToStrAttr style + props = inTags True "Properties" [] $ inTags True "PathGeometry" [] + $ inTags True "GeometryPathType" [("PathOpen","false")] + $ inTags True "PathPointArray" [] + $ vcat [ + selfClosingTag "PathPointType" [("Anchor", "-"++qw++" -"++qh), + ("LeftDirection", "-"++qw++" -"++qh), ("RightDirection", "-"++qw++" -"++qh)] + , selfClosingTag "PathPointType" [("Anchor", "-"++qw++" "++qh), + ("LeftDirection", "-"++qw++" "++qh), ("RightDirection", "-"++qw++" "++qh)] + , selfClosingTag "PathPointType" [("Anchor", qw++" "++qh), + ("LeftDirection", qw++" "++qh), ("RightDirection", qw++" "++qh)] + , selfClosingTag "PathPointType" [("Anchor", qw++" -"++qh), + ("LeftDirection", qw++" -"++qh), ("RightDirection", qw++" -"++qh)] + ] + image = inTags True "Image" + [("Self","ue6"), ("ItemTransform", scaleFact++" 0 0 "++scaleFact++" -"++qw++" -"++qh)] + $ vcat [ + inTags True "Properties" [] $ inTags True "Profile" [("type","string")] $ text "$ID/Embedded" + $$ selfClosingTag "GraphicBounds" [("Left","0"), ("Top","0"), ("Right", hw), ("Bottom", hh)] + , selfClosingTag "Link" [("Self", "ueb"), ("LinkResourceURI", linkURI)] + ] + doc = inTags True "CharacterStyleRange" attrs + $ inTags True "Rectangle" [("Self","uec"), ("ItemTransform", "1 0 0 1 "++qw++" -"++qh)] + $ (props $$ image) + in do + state $ \st -> (doc, st{ inlineStyles = Set.insert stlStr $ inlineStyles st } ) diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index f3cbcf19f..b2eff4490 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -40,6 +40,7 @@ import Network.URI ( isURI, unEscapeString ) import Data.List ( (\\), isSuffixOf, isInfixOf, isPrefixOf, intercalate, intersperse ) import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, ord ) +import Data.Maybe ( fromMaybe ) import Control.Applicative ((<|>)) import Control.Monad.State import Text.Pandoc.Pretty @@ -50,6 +51,8 @@ import Text.Pandoc.Highlighting (highlight, styleToLaTeX, data WriterState = WriterState { stInNote :: Bool -- true if we're in a note + , stInMinipage :: Bool -- true if in minipage + , stNotes :: [Doc] -- notes in a minipage , stOLLevel :: Int -- level of ordered list nesting , stOptions :: WriterOptions -- writer options, so they don't have to be parameter , stVerbInNote :: Bool -- true if document has verbatim text in note @@ -70,7 +73,7 @@ data WriterState = writeLaTeX :: WriterOptions -> Pandoc -> String writeLaTeX options document = evalState (pandocToLaTeX options document) $ - WriterState { stInNote = False, + WriterState { stInNote = False, stInMinipage = False, stNotes = [], stOLLevel = 1, stOptions = options, stVerbInNote = False, stTable = False, stStrikeout = False, @@ -188,7 +191,7 @@ stringToLaTeX _ [] = return "" stringToLaTeX ctx (x:xs) = do opts <- gets stOptions rest <- stringToLaTeX ctx xs - let ligatures = writerTeXLigatures opts && not (ctx == CodeString) + let ligatures = writerTeXLigatures opts && (ctx /= CodeString) let isUrl = ctx == URLString when (x == '€') $ modify $ \st -> st{ stUsesEuro = True } @@ -203,7 +206,7 @@ stringToLaTeX ctx (x:xs) = do '_' | not isUrl -> "\\_" ++ rest '#' -> "\\#" ++ rest '-' -> case xs of -- prevent adjacent hyphens from forming ligatures - ('-':_) -> "-{}" ++ rest + ('-':_) -> "-\\/" ++ rest _ -> '-' : rest '~' | not isUrl -> "\\textasciitilde{}" ++ rest '^' -> "\\^{}" ++ rest @@ -238,7 +241,7 @@ inCmd cmd contents = char '\\' <> text cmd <> braces contents toSlides :: [Block] -> State WriterState [Block] toSlides bs = do opts <- gets stOptions - let slideLevel = maybe (getSlideLevel bs) id $ writerSlideLevel opts + let slideLevel = fromMaybe (getSlideLevel bs) $ writerSlideLevel opts let bs' = prepSlides slideLevel bs concat `fmap` (mapM (elementToBeamer slideLevel) $ hierarchicalize bs') @@ -441,7 +444,7 @@ blockToLaTeX (DefinitionList lst) = do incremental <- gets stIncremental let inc = if incremental then "[<+->]" else "" items <- mapM defListItemToLaTeX lst - let spacing = if and $ map isTightList (map snd lst) + let spacing = if all isTightList (map snd lst) then text "\\itemsep1pt\\parskip0pt\\parsep0pt" else empty return $ text ("\\begin{description}" ++ inc) $$ spacing $$ vcat items $$ @@ -453,12 +456,12 @@ blockToLaTeX (Header level (id',classes,_) lst) = blockToLaTeX (Table caption aligns widths heads rows) = do headers <- if all null heads then return empty - else ($$ "\\hline\\noalign{\\medskip}") `fmap` + else ($$ "\\midrule\\endhead") `fmap` (tableRowToLaTeX True aligns widths) heads captionText <- inlineListToLaTeX caption let capt = if isEmpty captionText then empty - else text "\\noalign{\\medskip}" + else text "\\addlinespace" $$ text "\\caption" <> braces captionText rows' <- mapM (tableRowToLaTeX False aligns widths) rows let colDescriptors = text $ concat $ map toColDescriptor aligns @@ -466,10 +469,10 @@ blockToLaTeX (Table caption aligns widths heads rows) = do return $ "\\begin{longtable}[c]" <> braces ("@{}" <> colDescriptors <> "@{}") -- the @{} removes extra space at beginning and end - $$ "\\hline\\noalign{\\medskip}" + $$ "\\toprule\\addlinespace" $$ headers $$ vcat rows' - $$ "\\hline" + $$ "\\bottomrule" $$ capt $$ "\\end{longtable}" @@ -490,23 +493,42 @@ tableRowToLaTeX :: Bool -> [[Block]] -> State WriterState Doc tableRowToLaTeX header aligns widths cols = do - renderedCells <- mapM blockListToLaTeX cols - let valign = text $ if header then "[b]" else "[t]" - let halign x = case x of - AlignLeft -> "\\raggedright" - AlignRight -> "\\raggedleft" - AlignCenter -> "\\centering" - AlignDefault -> "\\raggedright" -- scale factor compensates for extra space between columns -- so the whole table isn't larger than columnwidth let scaleFactor = 0.97 ** fromIntegral (length aligns) - let toCell 0 _ c = c - toCell w a c = "\\begin{minipage}" <> valign <> - braces (text (printf "%.2f\\columnwidth" - (w * scaleFactor))) <> - (halign a <> cr <> c <> cr) <> "\\end{minipage}" - let cells = zipWith3 toCell widths aligns renderedCells - return $ hsep (intersperse "&" cells) $$ "\\\\\\noalign{\\medskip}" + let widths' = map (scaleFactor *) widths + cells <- mapM (tableCellToLaTeX header) $ zip3 widths' aligns cols + return $ hsep (intersperse "&" cells) $$ "\\\\\\addlinespace" + +tableCellToLaTeX :: Bool -> (Double, Alignment, [Block]) + -> State WriterState Doc +tableCellToLaTeX _ (0, _, blocks) = blockListToLaTeX blocks +tableCellToLaTeX header (width, align, blocks) = do + modify $ \st -> st{ stInMinipage = True, stNotes = [] } + cellContents <- blockListToLaTeX blocks + notes <- gets stNotes + modify $ \st -> st{ stInMinipage = False, stNotes = [] } + let valign = text $ if header then "[b]" else "[t]" + let halign = case align of + AlignLeft -> "\\raggedright" + AlignRight -> "\\raggedleft" + AlignCenter -> "\\centering" + AlignDefault -> "\\raggedright" + return $ ("\\begin{minipage}" <> valign <> + braces (text (printf "%.2f\\columnwidth" width)) <> + (halign <> cr <> cellContents <> cr) <> "\\end{minipage}") + $$ case notes of + [] -> empty + ns -> (case length ns of + n | n > 1 -> "\\addtocounter" <> + braces "footnote" <> + braces (text $ show $ 1 - n) + | otherwise -> empty) + $$ + vcat (intersperse + ("\\addtocounter" <> braces "footnote" <> braces "1") + $ map (\x -> "\\footnotetext" <> braces x) + $ reverse ns) listItemToLaTeX :: [Block] -> State WriterState Doc listItemToLaTeX lst = blockListToLaTeX lst >>= return . (text "\\item" $$) . @@ -709,14 +731,20 @@ inlineToLaTeX (Image _ (source, _)) = do source'' <- stringToLaTeX URLString source' return $ "\\includegraphics" <> braces (text source'') inlineToLaTeX (Note contents) = do + inMinipage <- gets stInMinipage modify (\s -> s{stInNote = True}) contents' <- blockListToLaTeX contents modify (\s -> s {stInNote = False}) let optnl = case reverse contents of (CodeBlock _ _ : _) -> cr _ -> empty - return $ "\\footnote" <> braces (nest 2 contents' <> optnl) - -- note: a \n before } needed when note ends with a Verbatim environment + let noteContents = nest 2 contents' <> optnl + modify $ \st -> st{ stNotes = noteContents : stNotes st } + return $ + if inMinipage + then "\\footnotemark{}" + -- note: a \n before } needed when note ends with a Verbatim environment + else "\\footnote" <> braces noteContents citationsToNatbib :: [Citation] -> State WriterState Doc citationsToNatbib (one:[]) @@ -737,9 +765,9 @@ citationsToNatbib cits | noPrefix (tail cits) && noSuffix (init cits) && ismode NormalCitation cits = citeCommand "citep" p s ks where - noPrefix = and . map (null . citationPrefix) - noSuffix = and . map (null . citationSuffix) - ismode m = and . map (((==) m) . citationMode) + noPrefix = all (null . citationPrefix) + noSuffix = all (null . citationSuffix) + ismode m = all (((==) m) . citationMode) p = citationPrefix $ head $ cits s = citationSuffix $ last $ cits ks = intercalate ", " $ map citationId cits diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index b31cc2b70..680bfef44 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -283,7 +283,7 @@ definitionListItemToMan opts (label, defs) = do mapM (\item -> blockToMan opts item) rest first' <- blockToMan opts first return $ first' $$ text ".RS" $$ rest' $$ text ".RE" - return $ text ".TP" $$ text ".B " <> labelText $$ contents + return $ text ".TP" $$ nowrap (text ".B " <> labelText) $$ contents -- | Convert list of Pandoc block elements to man. blockListToMan :: WriterOptions -- ^ Options diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index eefcd547a..278e5cc9d 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -338,7 +338,7 @@ blockToMarkdown opts (RawBlock f str) else return $ if isEnabled Ext_markdown_attribute opts then text (addMarkdownAttribute str) <> text "\n" else text str <> text "\n" - | f == "latex" || f == "tex" || f == "markdown" = do + | f `elem` ["latex", "tex", "markdown"] = do st <- get if stPlain st then return empty @@ -381,13 +381,11 @@ blockToMarkdown opts (CodeBlock (_,classes,_) str) isEnabled Ext_literate_haskell opts = return $ prefixed "> " (text str) <> blankline blockToMarkdown opts (CodeBlock attribs str) = return $ - case attribs of - x | x /= nullAttr && isEnabled Ext_fenced_code_blocks opts -> - tildes <> " " <> attrs <> cr <> text str <> - cr <> tildes <> blankline - (_,(cls:_),_) | isEnabled Ext_backtick_code_blocks opts -> - backticks <> " " <> text cls <> cr <> text str <> - cr <> backticks <> blankline + case attribs == nullAttr of + False | isEnabled Ext_backtick_code_blocks opts -> + backticks <> attrs <> cr <> text str <> cr <> backticks <> blankline + | isEnabled Ext_fenced_code_blocks opts -> + tildes <> attrs <> cr <> text str <> cr <> tildes <> blankline _ -> nest (writerTabStop opts) (text str) <> blankline where tildes = text $ case [ln | ln <- lines str, all (=='~') ln] of [] -> "~~~~" @@ -396,8 +394,10 @@ blockToMarkdown opts (CodeBlock attribs str) = return $ | otherwise -> replicate (n+1) '~' backticks = text "```" attrs = if isEnabled Ext_fenced_code_attributes opts - then nowrap $ attrsToMarkdown attribs - else empty + then nowrap $ " " <> attrsToMarkdown attribs + else case attribs of + (_,[cls],_) -> " " <> text cls + _ -> empty blockToMarkdown opts (BlockQuote blocks) = do st <- get -- if we're writing literate haskell, put a space before the bird tracks @@ -555,7 +555,14 @@ bulletListItemToMarkdown opts items = do contents <- blockListToMarkdown opts items let sps = replicate (writerTabStop opts - 2) ' ' let start = text ('-' : ' ' : sps) - return $ hang (writerTabStop opts) start $ contents <> cr + -- remove trailing blank line if it is a tight list + let contents' = case reverse items of + (BulletList xs:_) | isTightList xs -> + chomp contents <> cr + (OrderedList _ xs:_) | isTightList xs -> + chomp contents <> cr + _ -> contents + return $ hang (writerTabStop opts) start $ contents' <> cr -- | Convert ordered list item (a list of blocks) to markdown. orderedListItemToMarkdown :: WriterOptions -- ^ options @@ -621,10 +628,11 @@ getReference label (src, tit) = do Nothing -> do let label' = case find ((== label) . fst) (stRefs st) of Just _ -> -- label is used; generate numerical label - case find (\n -> not (any (== [Str (show n)]) - (map fst (stRefs st)))) [1..(10000 :: Integer)] of - Just x -> [Str (show x)] - Nothing -> error "no unique label" + case find (\n -> notElem [Str (show n)] + (map fst (stRefs st))) + [1..(10000 :: Integer)] of + Just x -> [Str (show x)] + Nothing -> error "no unique label" Nothing -> label modify (\s -> s{ stRefs = (label', (src,tit)) : stRefs st }) return label' diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index 61741a61e..83fefaa29 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -51,7 +51,7 @@ data WriterState = WriterState { writeMediaWiki :: WriterOptions -> Pandoc -> String writeMediaWiki opts document = evalState (pandocToMediaWiki opts document) - (WriterState { stNotes = False, stListLevel = [], stUseTags = False }) + WriterState { stNotes = False, stListLevel = [], stUseTags = False } -- | Return MediaWiki representation of document. pandocToMediaWiki :: WriterOptions -> Pandoc -> State WriterState String diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index cc0a06243..c3652d65d 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -30,7 +30,10 @@ Conversion of 'Pandoc' documents to ODT. -} module Text.Pandoc.Writers.ODT ( writeODT ) where import Data.IORef -import Data.List ( isPrefixOf ) +import Data.List ( isPrefixOf, isSuffixOf ) +import Data.Maybe ( fromMaybe ) +import Text.XML.Light.Output +import Text.TeXMath import qualified Data.ByteString.Lazy as B import Text.Pandoc.UTF8 ( fromStringLazy ) import Codec.Archive.Zip @@ -40,13 +43,14 @@ import Text.Pandoc.ImageSize ( imageSize, sizeInPoints ) import Text.Pandoc.MIME ( getMimeType ) import Text.Pandoc.Definition import Text.Pandoc.Walk +import Text.Pandoc.Writers.Shared ( fixDisplayMath ) import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) import Control.Monad (liftM) import Text.Pandoc.XML import Text.Pandoc.Pretty import qualified Control.Exception as E import Data.Time.Clock.POSIX ( getPOSIXTime ) -import System.FilePath ( takeExtension ) +import System.FilePath ( takeExtension, takeDirectory ) -- | Produce an ODT file from a Pandoc document. writeODT :: WriterOptions -- ^ Writer options @@ -60,9 +64,9 @@ writeODT opts doc@(Pandoc meta _) = do Just f -> B.readFile f Nothing -> (B.fromChunks . (:[])) `fmap` readDataFile datadir "reference.odt" - -- handle pictures + -- handle formulas and pictures picEntriesRef <- newIORef ([] :: [Entry]) - doc' <- walkM (transformPic opts picEntriesRef) doc + doc' <- walkM (transformPicMath opts picEntriesRef) $ walk fixDisplayMath doc let newContents = writeOpenDocument opts{writerWrapText = False} doc' epochtime <- floor `fmap` getPOSIXTime let contentEntry = toEntry "content.xml" epochtime @@ -72,7 +76,11 @@ writeODT opts doc@(Pandoc meta _) = do $ contentEntry : picEntries -- construct META-INF/manifest.xml based on archive let toFileEntry fp = case getMimeType fp of - Nothing -> empty + Nothing -> if "Formula-" `isPrefixOf` fp && "/" `isSuffixOf` fp + then selfClosingTag "manifest:file-entry" + [("manifest:media-type","application/vnd.oasis.opendocument.formula") + ,("manifest:full-path",fp)] + else empty Just m -> selfClosingTag "manifest:file-entry" [("manifest:media-type", m) ,("manifest:full-path", fp) @@ -80,6 +88,8 @@ writeODT opts doc@(Pandoc meta _) = do ] let files = [ ent | ent <- filesInArchive archive, not ("META-INF" `isPrefixOf` ent) ] + let formulas = [ takeDirectory ent ++ "/" | ent <- filesInArchive archive, + "Formula-" `isPrefixOf` ent, takeExtension ent == ".xml" ] let manifestEntry = toEntry "META-INF/manifest.xml" epochtime $ fromStringLazy $ render Nothing $ text "<?xml version=\"1.0\" encoding=\"utf-8\"?>" @@ -91,6 +101,7 @@ writeODT opts doc@(Pandoc meta _) = do [("manifest:media-type","application/vnd.oasis.opendocument.text") ,("manifest:full-path","/")] $$ vcat ( map toFileEntry $ files ) + $$ vcat ( map toFileEntry $ formulas ) ) ) let archive' = addEntryToArchive manifestEntry archive @@ -118,8 +129,8 @@ writeODT opts doc@(Pandoc meta _) = do $ addEntryToArchive metaEntry archive' return $ fromArchive archive'' -transformPic :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline -transformPic opts entriesRef (Image lab (src,_)) = do +transformPicMath :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline +transformPicMath opts entriesRef (Image lab (src,_)) = do res <- fetchItem (writerSourceURL opts) src case res of Left (_ :: E.SomeException) -> do @@ -127,7 +138,7 @@ transformPic opts entriesRef (Image lab (src,_)) = do return $ Emph lab Right (img, _) -> do let size = imageSize img - let (w,h) = maybe (0,0) id $ sizeInPoints `fmap` size + let (w,h) = fromMaybe (0,0) $ sizeInPoints `fmap` size let tit' = show w ++ "x" ++ show h entries <- readIORef entriesRef let newsrc = "Pictures/" ++ show (length entries) ++ takeExtension src @@ -136,5 +147,29 @@ transformPic opts entriesRef (Image lab (src,_)) = do let entry = toEntry newsrc epochtime $ toLazy img modifyIORef entriesRef (entry:) return $ Image lab (newsrc, tit') -transformPic _ _ x = return x +transformPicMath _ entriesRef (Math t math) = do + entries <- readIORef entriesRef + let dt = if t == InlineMath then DisplayInline else DisplayBlock + case texMathToMathML dt math of + Left _ -> return $ Math t math + Right r -> do + let conf = useShortEmptyTags (const False) defaultConfigPP + let mathml = ppcTopElement conf r + epochtime <- floor `fmap` getPOSIXTime + let dirname = "Formula-" ++ show (length entries) ++ "/" + let fname = dirname ++ "content.xml" + let entry = toEntry fname epochtime (fromStringLazy mathml) + modifyIORef entriesRef (entry:) + return $ RawInline (Format "opendocument") $ render Nothing $ + inTags False "draw:frame" [("text:anchor-type", + if t == DisplayMath + then "paragraph" + else "as-char") + ,("style:vertical-pos", "middle") + ,("style:vertical-rel", "text")] $ + selfClosingTag "draw:object" [("xlink:href", dirname) + , ("xlink:type", "simple") + , ("xlink:show", "embed") + , ("xlink:actuate", "onLoad")] +transformPicMath _ _ x = return x diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 565f5f869..0029c3296 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -64,6 +64,7 @@ data WriterState = , stInDefinition :: Bool , stTight :: Bool , stFirstPara :: Bool + , stImageId :: Int } defaultWriterState :: WriterState @@ -78,6 +79,7 @@ defaultWriterState = , stInDefinition = False , stTight = False , stFirstPara = False + , stImageId = 1 } when :: Bool -> Doc -> Doc @@ -283,8 +285,12 @@ blocksToOpenDocument o b = vcat <$> mapM (blockToOpenDocument o) b -- | Convert a Pandoc block element to OpenDocument. blockToOpenDocument :: WriterOptions -> Block -> State WriterState Doc blockToOpenDocument o bs - | Plain b <- bs = inParagraphTags =<< inlinesToOpenDocument o b - | Para b <- bs = inParagraphTags =<< inlinesToOpenDocument o b + | Plain b <- bs = if null b + then return empty + else inParagraphTags =<< inlinesToOpenDocument o b + | Para b <- bs = if null b + then return empty + else inParagraphTags =<< inlinesToOpenDocument o b | Div _ xs <- bs = blocksToOpenDocument o xs | Header i _ b <- bs = setFirstPara >> (inHeaderTags i =<< inlinesToOpenDocument o b) @@ -296,8 +302,8 @@ blockToOpenDocument o bs | Table c a w h r <- bs = setFirstPara >> table c a w h r | HorizontalRule <- bs = setFirstPara >> return (selfClosingTag "text:p" [ ("text:style-name", "Horizontal_20_Line") ]) - | RawBlock f s <- bs = if f == "opendocument" - then preformatted s + | RawBlock f s <- bs = if f == Format "opendocument" + then return $ text s else return empty | Null <- bs = return empty | otherwise = return empty @@ -376,11 +382,11 @@ inlineToOpenDocument o ils | Code _ s <- ils = withTextStyle Pre $ inTextStyle $ preformatted s | Math t s <- ils = inlinesToOpenDocument o (readTeXMath' t s) | Cite _ l <- ils = inlinesToOpenDocument o l - | RawInline f s <- ils = if f == "opendocument" - then return $ preformatted s + | RawInline f s <- ils = if f == Format "opendocument" + then return $ text s else return empty | Link l (s,t) <- ils = mkLink s t <$> inlinesToOpenDocument o l - | Image _ (s,t) <- ils = return $ mkImg s t + | Image _ (s,t) <- ils = mkImg s t | Note l <- ils = mkNote l | otherwise = return empty where @@ -389,7 +395,11 @@ inlineToOpenDocument o ils , ("xlink:href" , s ) , ("office:name", t ) ] . inSpanTags "Definition" - mkImg s t = inTags False "draw:frame" (attrsFromTitle t) $ + mkImg s t = do + id' <- gets stImageId + modify (\st -> st{ stImageId = id' + 1 }) + return $ inTags False "draw:frame" + (("draw:name", "img" ++ show id'):attrsFromTitle t) $ selfClosingTag "draw:image" [ ("xlink:href" , s ) , ("xlink:type" , "simple") , ("xlink:show" , "embed" ) diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 51083f52b..d318c5f6a 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -129,7 +129,7 @@ blockToOrg (Para inlines) = do blockToOrg (RawBlock "html" str) = return $ blankline $$ "#+BEGIN_HTML" $$ nest 2 (text str) $$ "#+END_HTML" $$ blankline -blockToOrg (RawBlock f str) | f == "org" || f == "latex" || f == "tex" = +blockToOrg (RawBlock f str) | f `elem` ["org", "latex", "tex"] = return $ text str blockToOrg (RawBlock _ _) = return empty blockToOrg HorizontalRule = return $ blankline $$ "--------------" $$ blankline diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 1a62f7250..1e7596b21 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -219,11 +219,15 @@ blockToRST (Table caption _ widths headers rows) = do else blankline <> text "Table: " <> caption' headers' <- mapM blockListToRST headers rawRows <- mapM (mapM blockListToRST) rows - let isSimple = all (==0) widths && all (all (\bs -> length bs <= 1)) rows + -- let isSimpleCell [Plain _] = True + -- isSimpleCell [Para _] = True + -- isSimpleCell [] = True + -- isSimpleCell _ = False + -- let isSimple = all (==0) widths && all (all isSimpleCell) rows let numChars = maximum . map offset opts <- get >>= return . stOptions let widthsInChars = - if isSimple + if all (== 0) widths then map ((+2) . numChars) $ transpose (headers' : rawRows) else map (floor . (fromIntegral (writerColumns opts) *)) widths let hpipeBlocks blocks = hcat [beg, middle, end] @@ -287,7 +291,7 @@ definitionListItemToRST (label, defs) = do label' <- inlineListToRST label contents <- liftM vcat $ mapM blockListToRST defs tabstop <- get >>= (return . writerTabStop . stOptions) - return $ label' $$ nest tabstop (contents <> cr) + return $ label' $$ nest tabstop (nestle contents <> cr) -- | Convert list of Pandoc block elements to RST. blockListToRST :: [Block] -- ^ List of block elements diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index 9cb08803c..604aac1c9 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -34,6 +34,7 @@ module Text.Pandoc.Writers.Shared ( , setField , defField , tagWithAttrs + , fixDisplayMath ) where import Text.Pandoc.Definition @@ -46,6 +47,7 @@ import qualified Data.Map as M import qualified Data.Text as T import Data.Aeson (FromJSON(..), fromJSON, ToJSON (..), Value(Object), Result(..)) import qualified Data.Traversable as Traversable +import Data.List ( groupBy ) -- | Create JSON value for template from a 'Meta' and an association list -- of variables, specified at the command line or in the writer. @@ -65,8 +67,7 @@ metaToJSON opts blockWriter inlineWriter (Meta metamap) renderedMap <- Traversable.mapM (metaValueToJSON blockWriter inlineWriter) metamap - return $ M.foldWithKey (\key val obj -> defField key val obj) - baseContext renderedMap + return $ M.foldWithKey defField baseContext renderedMap | otherwise = return (Object H.empty) metaValueToJSON :: Monad m @@ -137,3 +138,28 @@ tagWithAttrs tag (ident,classes,kvs) = hsep ,hsep (map (\(k,v) -> text k <> "=" <> doubleQuotes (text (escapeStringForXML v))) kvs) ] <> ">" + +isDisplayMath :: Inline -> Bool +isDisplayMath (Math DisplayMath _) = True +isDisplayMath _ = False + +stripLeadingTrailingSpace :: [Inline] -> [Inline] +stripLeadingTrailingSpace = go . reverse . go . reverse + where go (Space:xs) = xs + go xs = xs + +-- Put display math in its own block (for ODT/DOCX). +fixDisplayMath :: Block -> Block +fixDisplayMath (Plain lst) + | any isDisplayMath lst && not (all isDisplayMath lst) = + -- chop into several paragraphs so each displaymath is its own + Div ("",["math"],[]) $ map (Plain . stripLeadingTrailingSpace) $ + groupBy (\x y -> (isDisplayMath x && isDisplayMath y) || + not (isDisplayMath x || isDisplayMath y)) lst +fixDisplayMath (Para lst) + | any isDisplayMath lst && not (all isDisplayMath lst) = + -- chop into several paragraphs so each displaymath is its own + Div ("",["math"],[]) $ map (Para . stripLeadingTrailingSpace) $ + groupBy (\x y -> (isDisplayMath x && isDisplayMath y) || + not (isDisplayMath x || isDisplayMath y)) lst +fixDisplayMath x = x diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index d62e50880..bf3df8035 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -293,7 +293,7 @@ blockListToTexinfo (x:xs) = do case x of Header level _ _ -> do -- We need need to insert a menu for this node. - let (before, after) = break isHeader xs + let (before, after) = break isHeaderBlock xs before' <- blockListToTexinfo before let menu = if level < 4 then collectNodes (level + 1) after @@ -315,10 +315,6 @@ blockListToTexinfo (x:xs) = do xs' <- blockListToTexinfo xs return $ x' $$ xs' -isHeader :: Block -> Bool -isHeader (Header _ _ _) = True -isHeader _ = False - collectNodes :: Int -> [Block] -> [Block] collectNodes _ [] = [] collectNodes level (x:xs) = diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index 7c102cc86..95aedf780 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -51,7 +51,7 @@ data WriterState = WriterState { writeTextile :: WriterOptions -> Pandoc -> String writeTextile opts document = evalState (pandocToTextile opts document) - (WriterState { stNotes = [], stListLevel = [], stUseTags = False }) + WriterState { stNotes = [], stListLevel = [], stUseTags = False } -- | Return Textile representation of document. pandocToTextile :: WriterOptions -> Pandoc -> State WriterState String |
