diff options
author | Clare Macrae <github@cfmacrae.fastmail.co.uk> | 2014-06-29 19:22:31 +0100 |
---|---|---|
committer | Clare Macrae <github@cfmacrae.fastmail.co.uk> | 2014-06-29 19:22:31 +0100 |
commit | 717e16660d1ee83f690b35d0aa9b60c8ac9d6b61 (patch) | |
tree | aa850d4ee99fa0b14da9ba0396ba6aa67e2037e3 /src/Text/Pandoc/Writers | |
parent | fccfc8429cf4d002df37977f03508c9aae457416 (diff) | |
parent | ce69021e42d7bf50deccba2a52ed4717f6ddac10 (diff) | |
download | pandoc-717e16660d1ee83f690b35d0aa9b60c8ac9d6b61.tar.gz |
Merge remote-tracking branch 'jgm/master' into dokuwiki
Diffstat (limited to 'src/Text/Pandoc/Writers')
24 files changed, 2763 insertions, 733 deletions
diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index 6c3c6955e..19112d8f5 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.AsciiDoc - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -132,7 +132,9 @@ blockToAsciiDoc opts (Para inlines) = do then text "\\" else empty return $ esc <> contents <> blankline -blockToAsciiDoc _ (RawBlock _ _) = return empty +blockToAsciiDoc _ (RawBlock f s) + | f == "asciidoc" = return $ text s + | otherwise = return empty blockToAsciiDoc _ HorizontalRule = return $ blankline <> text "'''''" <> blankline blockToAsciiDoc opts (Header level (ident,_,_) inlines) = do @@ -215,7 +217,9 @@ blockToAsciiDoc opts (Table caption aligns widths headers rows) = do let makeCell [Plain x] = do d <- blockListToAsciiDoc opts [Plain x] return $ text "|" <> chomp d makeCell [Para x] = makeCell [Plain x] - makeCell _ = return $ text "|" <> "[multiblock cell omitted]" + makeCell [] = return $ text "|" + makeCell bs = do d <- blockListToAsciiDoc opts bs + return $ text "a|" $$ d let makeRow cells = hsep `fmap` mapM makeCell cells rows' <- mapM makeRow rows head' <- makeRow headers @@ -225,7 +229,7 @@ blockToAsciiDoc opts (Table caption aligns widths headers rows) = do else 100000 let maxwidth = maximum $ map offset (head':rows') let body = if maxwidth > colwidth then vsep rows' else vcat rows' - let border = text $ "|" ++ replicate ((min maxwidth colwidth) - 1) '=' + let border = text $ "|" ++ replicate (max 5 (min maxwidth colwidth) - 1) '=' return $ caption'' $$ tablespec $$ border $$ head'' $$ body $$ border $$ blankline blockToAsciiDoc opts (BulletList items) = do @@ -246,6 +250,7 @@ blockToAsciiDoc opts (OrderedList (_start, sty, _delim) items) = do blockToAsciiDoc opts (DefinitionList items) = do contents <- mapM (definitionListItemToAsciiDoc opts) items return $ cat contents <> blankline +blockToAsciiDoc opts (Div _ bs) = blockListToAsciiDoc opts bs -- | Convert bullet list item (list of blocks) to asciidoc. bulletListItemToAsciiDoc :: WriterOptions -> [Block] -> State WriterState Doc @@ -346,7 +351,9 @@ inlineToAsciiDoc _ (Math InlineMath str) = return $ "latexmath:[$" <> text str <> "$]" inlineToAsciiDoc _ (Math DisplayMath str) = return $ "latexmath:[\\[" <> text str <> "\\]]" -inlineToAsciiDoc _ (RawInline _ _) = return empty +inlineToAsciiDoc _ (RawInline f s) + | f == "asciidoc" = return $ text s + | otherwise = return empty inlineToAsciiDoc _ (LineBreak) = return $ " +" <> cr inlineToAsciiDoc _ Space = return space inlineToAsciiDoc opts (Cite _ lst) = inlineListToAsciiDoc opts lst @@ -383,3 +390,4 @@ inlineToAsciiDoc opts (Note [Plain inlines]) = do return $ text "footnote:[" <> contents <> "]" -- asciidoc can't handle blank lines in notes inlineToAsciiDoc _ (Note _) = return "[multiblock footnote omitted]" +inlineToAsciiDoc opts (Span _ ils) = inlineListToAsciiDoc opts ils diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 32588dc8f..3b321cc19 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2007-2010 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2007-2014 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.ConTeXt - Copyright : Copyright (C) 2007-2010 John MacFarlane + Copyright : Copyright (C) 2007-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -33,9 +33,9 @@ import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Pandoc.Writers.Shared import Text.Pandoc.Options -import Text.Pandoc.Generic (queryWith) +import Text.Pandoc.Walk (query) import Text.Printf ( printf ) -import Data.List ( intercalate, isPrefixOf ) +import Data.List ( intercalate ) import Control.Monad.State import Text.Pandoc.Pretty import Text.Pandoc.Templates ( renderTemplate' ) @@ -130,7 +130,7 @@ blockToConTeXt (Plain lst) = inlineListToConTeXt lst -- title beginning with fig: indicates that the image is a figure blockToConTeXt (Para [Image txt (src,'f':'i':'g':':':_)]) = do capt <- inlineListToConTeXt txt - return $ blankline $$ "\\placefigure[here,nonumber]" <> braces capt <> + return $ blankline $$ "\\placefigure" <> braces capt <> braces ("\\externalfigure" <> brackets (text src)) <> blankline blockToConTeXt (Para lst) = do contents <- inlineListToConTeXt lst @@ -143,6 +143,7 @@ blockToConTeXt (CodeBlock _ str) = -- blankline because \stoptyping can't have anything after it, inc. '}' blockToConTeXt (RawBlock "context" str) = return $ text str <> blankline blockToConTeXt (RawBlock _ _ ) = return empty +blockToConTeXt (Div _ bs) = blockListToConTeXt bs blockToConTeXt (BulletList lst) = do contents <- mapM listItemToConTeXt lst return $ ("\\startitemize" <> if isTightList lst @@ -204,9 +205,9 @@ blockToConTeXt (Table caption aligns widths heads rows) = do else liftM ($$ "\\HL") $ tableRowToConTeXt heads captionText <- inlineListToConTeXt caption rows' <- mapM tableRowToConTeXt rows - return $ "\\placetable" <> brackets ("here" <> if null caption - then ",none" - else "") + return $ "\\placetable" <> (if null caption + then brackets "none" + else empty) <> braces captionText $$ "\\starttable" <> brackets (text colDescriptors) $$ "\\HL" $$ headers $$ @@ -282,14 +283,6 @@ inlineToConTeXt (RawInline "tex" str) = return $ text str inlineToConTeXt (RawInline _ _) = return empty inlineToConTeXt (LineBreak) = return $ text "\\crlf" <> cr inlineToConTeXt Space = return space --- autolink -inlineToConTeXt (Link [Str str] (src, tit)) - | if "mailto:" `isPrefixOf` src - then src == escapeURI ("mailto:" ++ str) - else src == escapeURI str = - inlineToConTeXt (Link - [RawInline "context" "\\hyphenatedurl{", Str str, RawInline "context" "}"] - (src, tit)) -- Handle HTML-like internal document references to sections inlineToConTeXt (Link txt (('#' : ref), _)) = do opts <- gets stOptions @@ -304,6 +297,7 @@ inlineToConTeXt (Link txt (('#' : ref), _)) = do <> brackets (text ref) inlineToConTeXt (Link txt (src, _)) = do + let isAutolink = txt == [Str src] st <- get let next = stNextRef st put $ st {stNextRef = next + 1} @@ -312,8 +306,9 @@ inlineToConTeXt (Link txt (src, _)) = do return $ "\\useURL" <> brackets (text ref) <> brackets (text $ escapeStringUsing [('#',"\\#"),('%',"\\%")] src) - <> brackets empty - <> brackets label + <> (if isAutolink + then empty + else brackets empty <> brackets label) <> "\\from" <> brackets (text ref) inlineToConTeXt (Image _ (src, _)) = do @@ -325,11 +320,12 @@ inlineToConTeXt (Note contents) = do contents' <- blockListToConTeXt contents let codeBlock x@(CodeBlock _ _) = [x] codeBlock _ = [] - let codeBlocks = queryWith codeBlock contents + let codeBlocks = query codeBlock contents return $ if null codeBlocks then text "\\footnote{" <> nest 2 contents' <> char '}' else text "\\startbuffer " <> nest 2 contents' <> text "\\stopbuffer\\footnote{\\getbuffer}" +inlineToConTeXt (Span _ ils) = inlineListToConTeXt ils -- | Craft the section header, inserting the secton reference, if supplied. sectionHeader :: Attr diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index 732497616..88f590c43 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverlappingInstances, FlexibleInstances, OverloadedStrings #-} {-# OPTIONS_GHC -fno-warn-orphans #-} -{- Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu> +{- Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Custom - Copyright : Copyright (C) 2012 John MacFarlane + Copyright : Copyright (C) 2012-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -33,6 +33,7 @@ module Text.Pandoc.Writers.Custom ( writeCustom ) where import Text.Pandoc.Definition import Text.Pandoc.Options import Data.List ( intersperse ) +import Data.Char ( toLower ) import Scripting.Lua (LuaState, StackValue, callfunc) import qualified Scripting.Lua as Lua import Text.Pandoc.UTF8 (fromString, toString) @@ -78,6 +79,11 @@ instance StackValue a => StackValue [a] where return (Just lst) valuetype _ = Lua.TTABLE +instance StackValue Format where + push lua (Format f) = Lua.push lua (map toLower f) + peek l n = fmap Format `fmap` Lua.peek l n + valuetype _ = Lua.TSTRING + instance (StackValue a, StackValue b) => StackValue (M.Map a b) where push lua m = do let xs = M.toList m @@ -110,12 +116,14 @@ instance StackValue [Block] where instance StackValue MetaValue where push l (MetaMap m) = Lua.push l m push l (MetaList xs) = Lua.push l xs + push l (MetaBool x) = Lua.push l x push l (MetaString s) = Lua.push l s push l (MetaInlines ils) = Lua.push l ils push l (MetaBlocks bs) = Lua.push l bs peek _ _ = undefined valuetype (MetaMap _) = Lua.TTABLE valuetype (MetaList _) = Lua.TTABLE + valuetype (MetaBool _) = Lua.TBOOLEAN valuetype (MetaString _) = Lua.TSTRING valuetype (MetaInlines _) = Lua.TSTRING valuetype (MetaBlocks _) = Lua.TSTRING @@ -123,7 +131,7 @@ instance StackValue MetaValue where -- | Convert Pandoc to custom markup. writeCustom :: FilePath -> WriterOptions -> Pandoc -> IO String writeCustom luaFile opts doc = do - luaScript <- readFile luaFile + luaScript <- C8.unpack `fmap` C8.readFile luaFile lua <- Lua.newstate Lua.openlibs lua Lua.loadstring lua luaScript "custom" @@ -176,6 +184,9 @@ blockToCustom lua (OrderedList (num,sty,delim) items) = blockToCustom lua (DefinitionList items) = callfunc lua "DefinitionList" items +blockToCustom lua (Div attr items) = + callfunc lua "Div" items (attrToMap attr) + -- | Convert list of Pandoc block elements to Custom. blockListToCustom :: LuaState -- ^ Options -> [Block] -- ^ List of block elements @@ -238,3 +249,5 @@ inlineToCustom lua (Image alt (src,tit)) = inlineToCustom lua (Note contents) = callfunc lua "Note" contents +inlineToCustom lua (Span attr items) = + callfunc lua "Span" items (attrToMap attr) diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 6f4b61a79..ba6a92a08 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -1,5 +1,6 @@ +{-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Docbook - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -31,12 +32,14 @@ module Text.Pandoc.Writers.Docbook ( writeDocbook) where import Text.Pandoc.Definition import Text.Pandoc.XML import Text.Pandoc.Shared +import Text.Pandoc.Walk import Text.Pandoc.Writers.Shared import Text.Pandoc.Options import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Readers.TeXMath import Data.List ( isPrefixOf, intercalate, isSuffixOf ) import Data.Char ( toLower ) +import Data.Monoid ( Any(..) ) import Text.Pandoc.Highlighting ( languages, languagesByExtension ) import Text.Pandoc.Pretty import qualified Text.Pandoc.Builder as B @@ -84,8 +87,9 @@ writeDocbook opts (Pandoc meta blocks) = auths' = map (authorToDocbook opts) $ docAuthors meta meta' = B.setMeta "author" auths' meta Just metadata = metaToJSON opts - (Just . render colwidth . blocksToDocbook opts) - (Just . render colwidth . inlinesToDocbook opts) + (Just . render colwidth . (vcat . + (map (elementToDocbook opts' startLvl)) . hierarchicalize)) + (Just . render colwidth . inlinesToDocbook opts') meta' main = render' $ vcat (map (elementToDocbook opts' startLvl) elements) context = defField "body" main @@ -148,6 +152,7 @@ listItemToDocbook opts item = -- | Convert a Pandoc block element to Docbook. blockToDocbook :: WriterOptions -> Block -> Doc blockToDocbook _ Null = empty +blockToDocbook opts (Div _ bs) = blocksToDocbook opts $ map plainToPara bs blockToDocbook _ (Header _ _ _) = empty -- should not occur after hierarchicalize blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst -- title beginning with fig: indicates that the image is a figure @@ -162,8 +167,9 @@ blockToDocbook opts (Para [Image txt (src,'f':'i':'g':':':_)]) = (inTagsIndented "imageobject" (selfClosingTag "imagedata" [("fileref",src)])) $$ inTagsSimple "textobject" (inTagsSimple "phrase" alt)) -blockToDocbook opts (Para lst) = - inTagsIndented "para" $ inlinesToDocbook opts lst +blockToDocbook opts (Para lst) + | hasLineBreaks lst = flush $ nowrap $ inTagsSimple "literallayout" $ inlinesToDocbook opts lst + | otherwise = inTagsIndented "para" $ inlinesToDocbook opts lst blockToDocbook opts (BlockQuote blocks) = inTagsIndented "blockquote" $ blocksToDocbook opts blocks blockToDocbook _ (CodeBlock (_,classes,_) str) = @@ -179,10 +185,11 @@ blockToDocbook _ (CodeBlock (_,classes,_) str) = else languagesByExtension . map toLower $ s langs = concatMap langsFrom classes blockToDocbook opts (BulletList lst) = - inTagsIndented "itemizedlist" $ listItemsToDocbook opts lst + let attribs = [("spacing", "compact") | isTightList lst] + in inTags True "itemizedlist" attribs $ listItemsToDocbook opts lst blockToDocbook _ (OrderedList _ []) = empty blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = - let attribs = case numstyle of + let numeration = case numstyle of DefaultStyle -> [] Decimal -> [("numeration", "arabic")] Example -> [("numeration", "arabic")] @@ -190,18 +197,21 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = LowerAlpha -> [("numeration", "loweralpha")] UpperRoman -> [("numeration", "upperroman")] LowerRoman -> [("numeration", "lowerroman")] - items = if start == 1 - then listItemsToDocbook opts (first:rest) - else (inTags True "listitem" [("override",show start)] - (blocksToDocbook opts $ map plainToPara first)) $$ - listItemsToDocbook opts rest + spacing = [("spacing", "compact") | isTightList (first:rest)] + attribs = numeration ++ spacing + items = if start == 1 + then listItemsToDocbook opts (first:rest) + else (inTags True "listitem" [("override",show start)] + (blocksToDocbook opts $ map plainToPara first)) $$ + listItemsToDocbook opts rest in inTags True "orderedlist" attribs items blockToDocbook opts (DefinitionList lst) = - inTagsIndented "variablelist" $ deflistItemsToDocbook opts lst -blockToDocbook _ (RawBlock "docbook" str) = text str -- raw XML block --- we allow html for compatibility with earlier versions of pandoc -blockToDocbook _ (RawBlock "html" str) = text str -- raw XML block -blockToDocbook _ (RawBlock _ _) = empty + let attribs = [("spacing", "compact") | isTightList $ concatMap snd lst] + in inTags True "variablelist" attribs $ deflistItemsToDocbook opts lst +blockToDocbook _ (RawBlock f str) + | f == "docbook" = text str -- raw XML block + | f == "html" = text str -- allow html for backwards compatibility + | otherwise = empty blockToDocbook _ HorizontalRule = empty -- not semantic blockToDocbook opts (Table caption aligns widths headers rows) = let captionDoc = if null caption @@ -223,6 +233,16 @@ blockToDocbook opts (Table caption aligns widths headers rows) = (inTags True "tgroup" [("cols", show (length headers))] $ coltags $$ head' $$ body') +hasLineBreaks :: [Inline] -> Bool +hasLineBreaks = getAny . query isLineBreak . walk removeNote + where + removeNote :: Inline -> Inline + removeNote (Note _) = Str "" + removeNote x = x + isLineBreak :: Inline -> Any + isLineBreak LineBreak = Any True + isLineBreak _ = Any False + alignmentToString :: Alignment -> [Char] alignmentToString alignment = case alignment of AlignLeft -> "left" @@ -267,6 +287,8 @@ inlineToDocbook opts (Quoted _ lst) = inTagsSimple "quote" $ inlinesToDocbook opts lst inlineToDocbook opts (Cite _ lst) = inlinesToDocbook opts lst +inlineToDocbook opts (Span _ ils) = + inlinesToDocbook opts ils inlineToDocbook _ (Code _ str) = inTagsSimple "literal" $ text (escapeStringForXML str) inlineToDocbook opts (Math t str) @@ -277,8 +299,8 @@ inlineToDocbook opts (Math t str) $ fixNS $ removeAttr r Left _ -> inlinesToDocbook opts - $ readTeXMath str - | otherwise = inlinesToDocbook opts $ readTeXMath str + $ readTeXMath' t str + | otherwise = inlinesToDocbook opts $ readTeXMath' t str where (dt, tagtype) = case t of InlineMath -> (DisplayInline,"inlineequation") DisplayMath -> (DisplayBlock,"informalequation") @@ -288,7 +310,7 @@ inlineToDocbook opts (Math t str) fixNS = everywhere (mkT fixNS') inlineToDocbook _ (RawInline f x) | f == "html" || f == "docbook" = text x | otherwise = empty -inlineToDocbook _ LineBreak = flush $ inTagsSimple "literallayout" (text "\n") +inlineToDocbook _ LineBreak = text "\n" inlineToDocbook _ Space = space inlineToDocbook opts (Link txt (src, _)) = if isPrefixOf "mailto:" src diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index e899200f6..31e64f14e 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -1,6 +1,6 @@ {-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Docx - Copyright : Copyright (C) 2012 John MacFarlane + Copyright : Copyright (C) 2012-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -29,22 +29,25 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to docx. -} module Text.Pandoc.Writers.Docx ( writeDocx ) where -import Data.List ( intercalate, groupBy ) +import Data.Maybe (fromMaybe) +import Data.List ( intercalate, isPrefixOf, isSuffixOf ) import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BL8 import qualified Data.Map as M import qualified Text.Pandoc.UTF8 as UTF8 -import Data.Monoid ((<>)) +import Text.Pandoc.Compat.Monoid ((<>)) import Codec.Archive.Zip import Data.Time.Clock.POSIX import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.ImageSize import Text.Pandoc.Shared hiding (Element) +import Text.Pandoc.Writers.Shared (fixDisplayMath) import Text.Pandoc.Options import Text.Pandoc.Readers.TeXMath import Text.Pandoc.Highlighting ( highlight ) +import Text.Pandoc.Walk import Text.Highlighting.Kate.Types () import Text.XML.Light import Text.TeXMath @@ -54,8 +57,33 @@ import Data.Unique (hashUnique, newUnique) import System.Random (randomRIO) import Text.Printf (printf) import qualified Control.Exception as E -import System.FilePath (takeExtension) -import Text.Pandoc.MIME (getMimeType) +import Text.Pandoc.MIME (getMimeType, extensionFromMimeType) +import Control.Applicative ((<|>)) +import Data.Maybe (mapMaybe) + +data ListMarker = NoMarker + | BulletMarker + | NumberMarker ListNumberStyle ListNumberDelim Int + deriving (Show, Read, Eq, Ord) + +listMarkerToId :: ListMarker -> String +listMarkerToId NoMarker = "990" +listMarkerToId BulletMarker = "991" +listMarkerToId (NumberMarker sty delim n) = + '9' : '9' : styNum : delimNum : show n + where styNum = case sty of + DefaultStyle -> '2' + Example -> '3' + Decimal -> '4' + LowerRoman -> '5' + UpperRoman -> '6' + LowerAlpha -> '7' + UpperAlpha -> '8' + delimNum = case delim of + DefaultDelim -> '0' + Period -> '1' + OneParen -> '2' + TwoParens -> '3' data WriterState = WriterState{ stTextProperties :: [Element] @@ -66,15 +94,9 @@ data WriterState = WriterState{ , stImages :: M.Map FilePath (String, String, Maybe String, Element, B.ByteString) , stListLevel :: Int , stListNumId :: Int - , stNumStyles :: M.Map ListMarker Int , stLists :: [ListMarker] } -data ListMarker = NoMarker - | BulletMarker - | NumberMarker ListNumberStyle ListNumberDelim Int - deriving (Show, Read, Eq, Ord) - defaultWriterState :: WriterState defaultWriterState = WriterState{ stTextProperties = [] @@ -85,7 +107,6 @@ defaultWriterState = WriterState{ , stImages = M.empty , stListLevel = -1 , stListNumId = 1 - , stNumStyles = M.fromList [(NoMarker, 0)] , stLists = [NoMarker] } @@ -108,17 +129,52 @@ writeDocx :: WriterOptions -- ^ Writer options -> IO BL.ByteString writeDocx opts doc@(Pandoc meta _) = do let datadir = writerUserDataDir opts - let doc' = bottomUp (concatMap fixDisplayMath) doc + let doc' = walk fixDisplayMath doc refArchive <- liftM (toArchive . toLazy) $ case writerReferenceDocx opts of Just f -> B.readFile f Nothing -> readDataFile datadir "reference.docx" + distArchive <- liftM (toArchive . toLazy) $ readDataFile Nothing "reference.docx" ((contents, footnotes), st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc') defaultWriterState epochtime <- floor `fmap` getPOSIXTime let imgs = M.elems $ stImages st + -- create entries for images in word/media/... + let toImageEntry (_,path,_,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img + let imageEntries = map toImageEntry imgs + + -- adjust contents to add sectPr from reference.docx + parsedDoc <- parseXml refArchive distArchive "word/document.xml" + let wname f qn = qPrefix qn == Just "w" && f (qName qn) + let mbsectpr = filterElementName (wname (=="sectPr")) parsedDoc + + let sectpr = maybe (mknode "w:sectPr" [] $ ()) id mbsectpr + + let stdAttributes = + [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main") + ,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math") + ,("xmlns:r","http://schemas.openxmlformats.org/officeDocument/2006/relationships") + ,("xmlns:o","urn:schemas-microsoft-com:office:office") + ,("xmlns:v","urn:schemas-microsoft-com:vml") + ,("xmlns:w10","urn:schemas-microsoft-com:office:word") + ,("xmlns:a","http://schemas.openxmlformats.org/drawingml/2006/main") + ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture") + ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")] + + let contents' = contents ++ [sectpr] + let docContents = mknode "w:document" stdAttributes + $ mknode "w:body" [] $ contents' + + parsedRels <- parseXml refArchive distArchive "word/_rels/document.xml.rels" + let isHeaderNode e = findAttr (QName "Type" Nothing Nothing) e == Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/header" + let isFooterNode e = findAttr (QName "Type" Nothing Nothing) e == Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer" + let headers = filterElements isHeaderNode parsedRels + let footers = filterElements isFooterNode parsedRels + + let extractTarget e = findAttr (QName "Target" Nothing Nothing) e + -- we create [Content_Types].xml and word/_rels/document.xml.rels -- from scratch rather than reading from reference.docx, -- because Word sometimes changes these files when a reference.docx is modified, @@ -129,8 +185,12 @@ writeDocx opts doc@(Pandoc meta _) = do let mkOverrideNode (part', contentType') = mknode "Override" [("PartName",part'),("ContentType",contentType')] () let mkImageOverride (_, imgpath, mbMimeType, _, _) = - mkOverrideNode ("/word/" ++ imgpath, maybe "application/octet-stream" id mbMimeType) - let overrides = map mkOverrideNode + mkOverrideNode ("/word/" ++ imgpath, + fromMaybe "application/octet-stream" mbMimeType) + let mkMediaOverride imgpath = mkOverrideNode ('/':imgpath, + fromMaybe "application/octet-stream" + $ getMimeType imgpath) + let overrides = map mkOverrideNode ( [("/word/webSettings.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml") ,("/word/numbering.xml", @@ -151,7 +211,15 @@ writeDocx opts doc@(Pandoc meta _) = do "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml") ,("/word/footnotes.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml") - ] ++ map mkImageOverride imgs + ] ++ + map (\x -> (maybe "" ("/word/" ++) $ extractTarget x, + "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml")) headers ++ + map (\x -> (maybe "" ("/word/" ++) $ extractTarget x, + "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml")) footers) ++ + map mkImageOverride imgs ++ + map mkMediaOverride [ eRelativePath e | e <- zEntries refArchive + , "word/media/" `isPrefixOf` eRelativePath e ] + let defaultnodes = [mknode "Default" [("Extension","xml"),("ContentType","application/xml")] (), mknode "Default" @@ -186,7 +254,9 @@ writeDocx opts doc@(Pandoc meta _) = do "theme/theme1.xml") ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes", "rId7", - "footnotes.xml")] + "footnotes.xml") + ] ++ + headers ++ footers let toImgRel (ident,path,_,_,_) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] () let imgrels = map toImgRel imgs let toLinkRel (src,ident) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] () @@ -195,15 +265,14 @@ writeDocx opts doc@(Pandoc meta _) = do let relEntry = toEntry "word/_rels/document.xml.rels" epochtime $ renderXml reldoc - -- create entries for images in word/media/... - let toImageEntry (_,path,_,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img - let imageEntries = map toImageEntry imgs -- word/document.xml - let contentEntry = toEntry "word/document.xml" epochtime $ renderXml contents + let contentEntry = toEntry "word/document.xml" epochtime + $ renderXml docContents -- footnotes - let footnotesEntry = toEntry "word/footnotes.xml" epochtime $ renderXml footnotes + let notes = mknode "w:footnotes" stdAttributes footnotes + let footnotesEntry = toEntry "word/footnotes.xml" epochtime $ renderXml notes -- footnote rels let footnoteRelEntry = toEntry "word/_rels/footnotes.xml.rels" epochtime @@ -213,14 +282,22 @@ writeDocx opts doc@(Pandoc meta _) = do -- styles let newstyles = styleToOpenXml $ writerHighlightStyle opts let stylepath = "word/styles.xml" - styledoc <- parseXml refArchive stylepath - let styledoc' = styledoc{ elContent = elContent styledoc ++ map Elem newstyles } + styledoc <- parseXml refArchive distArchive stylepath + let styledoc' = styledoc{ elContent = elContent styledoc ++ + [Elem x | x <- newstyles, writerHighlight opts] } let styleEntry = toEntry stylepath epochtime $ renderXml styledoc' -- construct word/numbering.xml let numpath = "word/numbering.xml" - numEntry <- (toEntry numpath epochtime . renderXml) - `fmap` mkNumbering (stNumStyles st) (stLists st) + numbering <- parseXml refArchive distArchive numpath + newNumElts <- mkNumbering (stLists st) + let allElts = onlyElems (elContent numbering) ++ newNumElts + let numEntry = toEntry numpath epochtime $ renderXml numbering{ elContent = + -- we want all the abstractNums first, then the nums, + -- otherwise things break: + [Elem e | e <- allElts + , qName (elName e) == "abstractNum" ] ++ + [Elem e | e <- allElts, qName (elName e) == "num" ] } let docPropsPath = "docProps/core.xml" let docProps = mknode "cp:coreProperties" [("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties") @@ -229,10 +306,11 @@ writeDocx opts doc@(Pandoc meta _) = do ,("xmlns:dcmitype","http://purl.org/dc/dcmitype/") ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")] $ mknode "dc:title" [] (stringify $ docTitle meta) - : mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] - (maybe "" id $ normalizeDate $ stringify $ docDate meta) - : mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] () -- put current time here - : map (mknode "dc:creator" [] . stringify) (docAuthors meta) + : mknode "dc:creator" [] (intercalate "; " (map stringify $ docAuthors meta)) + : maybe [] + (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] $ x + , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] $ x + ]) (normalizeDate $ stringify $ docDate meta) let docPropsEntry = toEntry docPropsPath epochtime $ renderXml docProps let relsPath = "_rels/.rels" @@ -245,24 +323,41 @@ writeDocx opts doc@(Pandoc meta _) = do ,("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties") ,("Target","docProps/app.xml")] , [("Id","rId3") - ,("Type","http://schemas.openxmlformats.org/officedocument/2006/relationships/metadata/core-properties") + ,("Type","http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties") ,("Target","docProps/core.xml")] ] let relsEntry = toEntry relsPath epochtime $ renderXml rels - let entryFromArchive path = (toEntry path epochtime . renderXml) `fmap` - parseXml refArchive path - docPropsAppEntry <- entryFromArchive "docProps/app.xml" - themeEntry <- entryFromArchive "word/theme/theme1.xml" - fontTableEntry <- entryFromArchive "word/fontTable.xml" - webSettingsEntry <- entryFromArchive "word/webSettings.xml" + let entryFromArchive arch path = + maybe (fail $ path ++ " corrupt or missing in reference docx") + return + (findEntryByPath path arch `mplus` findEntryByPath path distArchive) + docPropsAppEntry <- entryFromArchive refArchive "docProps/app.xml" + themeEntry <- entryFromArchive refArchive "word/theme/theme1.xml" + fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml" + -- we use dist archive for settings.xml, because Word sometimes + -- adds references to footnotes or endnotes we don't have... + settingsEntry <- entryFromArchive distArchive "word/settings.xml" + webSettingsEntry <- entryFromArchive refArchive "word/webSettings.xml" + headerFooterEntries <- mapM (entryFromArchive refArchive) $ + mapMaybe (\e -> fmap ("word/" ++) $ extractTarget e) + (headers ++ footers) + let miscRelEntries = [ e | e <- zEntries refArchive + , "word/_rels/" `isPrefixOf` (eRelativePath e) + , ".xml.rels" `isSuffixOf` (eRelativePath e) + , eRelativePath e /= "word/_rels/document.xml.rels" + , eRelativePath e /= "word/_rels/footnotes.xml.rels" ] + let otherMediaEntries = [ e | e <- zEntries refArchive + , "word/media/" `isPrefixOf` eRelativePath e ] -- Create archive let archive = foldr addEntryToArchive emptyArchive $ contentTypesEntry : relsEntry : contentEntry : relEntry : footnoteRelEntry : numEntry : styleEntry : footnotesEntry : docPropsEntry : docPropsAppEntry : themeEntry : - fontTableEntry : webSettingsEntry : imageEntries + fontTableEntry : settingsEntry : webSettingsEntry : + imageEntries ++ headerFooterEntries ++ + miscRelEntries ++ otherMediaEntries return $ fromArchive archive styleToOpenXml :: Style -> [Element] @@ -300,29 +395,30 @@ styleToOpenXml style = parStyle : map toStyle alltoktypes $ backgroundColor style ) ] -mkNumbering :: M.Map ListMarker Int -> [ListMarker] -> IO Element -mkNumbering markers lists = do - elts <- mapM mkAbstractNum (M.toList markers) - return $ mknode "w:numbering" - [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")] - $ elts ++ zipWith (mkNum markers) lists [1..(length lists)] +-- this is the lowest number used for a list numId +baseListId :: Int +baseListId = 1000 -mkNum :: M.Map ListMarker Int -> ListMarker -> Int -> Element -mkNum markers marker numid = +mkNumbering :: [ListMarker] -> IO [Element] +mkNumbering lists = do + elts <- mapM mkAbstractNum (ordNub lists) + return $ elts ++ zipWith mkNum lists [baseListId..(baseListId + length lists - 1)] + +mkNum :: ListMarker -> Int -> Element +mkNum marker numid = mknode "w:num" [("w:numId",show numid)] - $ mknode "w:abstractNumId" [("w:val",show absnumid)] () + $ mknode "w:abstractNumId" [("w:val",listMarkerToId marker)] () : case marker of NoMarker -> [] BulletMarker -> [] NumberMarker _ _ start -> map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))] $ mknode "w:startOverride" [("w:val",show start)] ()) [0..6] - where absnumid = maybe 0 id $ M.lookup marker markers -mkAbstractNum :: (ListMarker,Int) -> IO Element -mkAbstractNum (marker,numid) = do +mkAbstractNum :: ListMarker -> IO Element +mkAbstractNum marker = do nsid <- randomRIO (0x10000000 :: Integer, 0xFFFFFFFF :: Integer) - return $ mknode "w:abstractNum" [("w:abstractNumId",show numid)] + return $ mknode "w:abstractNum" [("w:abstractNumId",listMarkerToId marker)] $ mknode "w:nsid" [("w:val", printf "%8x" nsid)] () : mknode "w:multiLevelType" [("w:val","multilevel")] () : map (mkLvl marker) [0..6] @@ -374,10 +470,11 @@ mkLvl marker lvl = patternFor _ s = s ++ "." getNumId :: WS Int -getNumId = length `fmap` gets stLists +getNumId = ((999 +) . length) `fmap` gets stLists --- | Convert Pandoc document to two OpenXML elements (the main document and footnotes). -writeOpenXML :: WriterOptions -> Pandoc -> WS (Element, Element) +-- | Convert Pandoc document to two lists of +-- OpenXML elements (the main document and footnotes). +writeOpenXML :: WriterOptions -> Pandoc -> WS ([Element], [Element]) writeOpenXML opts (Pandoc meta blocks) = do let tit = docTitle meta ++ case lookupMeta "subtitle" meta of Just (MetaBlocks [Plain xs]) -> LineBreak : xs @@ -395,19 +492,7 @@ writeOpenXML opts (Pandoc meta blocks) = do doc' <- blocksToOpenXML opts blocks' notes' <- reverse `fmap` gets stFootnotes let meta' = title ++ authors ++ date - let stdAttributes = - [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main") - ,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math") - ,("xmlns:r","http://schemas.openxmlformats.org/officeDocument/2006/relationships") - ,("xmlns:o","urn:schemas-microsoft-com:office:office") - ,("xmlns:v","urn:schemas-microsoft-com:vml") - ,("xmlns:w10","urn:schemas-microsoft-com:office:word") - ,("xmlns:a","http://schemas.openxmlformats.org/drawingml/2006/main") - ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture") - ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")] - let doc = mknode "w:document" stdAttributes $ mknode "w:body" [] (meta' ++ doc') - let notes = mknode "w:footnotes" stdAttributes notes' - return (doc, notes) + return (meta' ++ doc', notes') -- | Convert a list of Pandoc blocks to OpenXML. blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element] @@ -427,6 +512,7 @@ getUniqueId = liftIO $ (show . (+ 20) . hashUnique) `fmap` newUnique -- | Convert a Pandoc block element to OpenXML. blockToOpenXML :: WriterOptions -> Block -> WS [Element] blockToOpenXML _ Null = return [] +blockToOpenXML opts (Div _ bs) = blocksToOpenXML opts bs blockToOpenXML opts (Header lev (ident,_,_) lst) = do contents <- withParaProp (pStyle $ "Heading" ++ show lev) $ blockToOpenXML opts (Para lst) @@ -458,8 +544,8 @@ blockToOpenXML opts (Para lst) = do contents <- inlinesToOpenXML opts lst return [mknode "w:p" [] (paraProps ++ contents)] blockToOpenXML _ (RawBlock format str) - | format == "openxml" = return [ x | Elem x <- parseXML str ] - | otherwise = return [] + | format == Format "openxml" = return [ x | Elem x <- parseXML str ] + | otherwise = return [] blockToOpenXML opts (BlockQuote blocks) = withParaProp (pStyle "BlockQuote") $ blocksToOpenXML opts blocks blockToOpenXML opts (CodeBlock attrs str) = @@ -485,10 +571,12 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do [ mknode "w:tcBorders" [] $ mknode "w:bottom" [("w:val","single")] () , mknode "w:vAlign" [("w:val","bottom")] () ] + let emptyCell = [mknode "w:p" [] [mknode "w:pPr" [] $ + [mknode "w:pStyle" [("w:val","Compact")] ()]]] let mkcell border contents = mknode "w:tc" [] $ [ borderProps | border ] ++ if null contents - then [mknode "w:p" [] ()] + then emptyCell else contents let mkrow border cells = mknode "w:tr" [] $ map (mkcell border) cells let textwidth = 7920 -- 5.5 in in twips, 1/20 pt @@ -533,17 +621,13 @@ addList :: ListMarker -> WS () addList marker = do lists <- gets stLists modify $ \st -> st{ stLists = lists ++ [marker] } - numStyles <- gets stNumStyles - case M.lookup marker numStyles of - Just _ -> return () - Nothing -> modify $ \st -> - st{ stNumStyles = M.insert marker (M.size numStyles + 1) numStyles } listItemToOpenXML :: WriterOptions -> Int -> [Block] -> WS [Element] listItemToOpenXML _ _ [] = return [] listItemToOpenXML opts numid (first:rest) = do first' <- withNumId numid $ blockToOpenXML opts first - rest' <- withNumId 1 $ blocksToOpenXML opts rest + -- baseListId is the code for no list marker: + rest' <- withNumId baseListId $ blocksToOpenXML opts rest return $ first' ++ rest' alignmentToString :: Alignment -> [Char] @@ -632,6 +716,12 @@ formattedString str = do inlineToOpenXML :: WriterOptions -> Inline -> WS [Element] inlineToOpenXML _ (Str str) = formattedString str inlineToOpenXML opts Space = inlineToOpenXML opts (Str " ") +inlineToOpenXML opts (Span (_,classes,_) ils) = do + let off x = withTextProp (mknode x [("w:val","0")] ()) + ((if "csl-no-emph" `elem` classes then off "w:i" else id) . + (if "csl-no-strong" `elem` classes then off "w:b" else id) . + (if "csl-no-smallcaps" `elem` classes then off "w:smallCaps" else id)) + $ inlinesToOpenXML opts ils inlineToOpenXML opts (Strong lst) = withTextProp (mknode "w:b" [] ()) $ inlinesToOpenXML opts lst inlineToOpenXML opts (Emph lst) = @@ -650,8 +740,8 @@ inlineToOpenXML opts (Strikeout lst) = $ inlinesToOpenXML opts lst inlineToOpenXML _ LineBreak = return [br] inlineToOpenXML _ (RawInline f str) - | f == "openxml" = return [ x | Elem x <- parseXML str ] - | otherwise = return [] + | f == Format "openxml" = return [ x | Elem x <- parseXML str ] + | otherwise = return [] inlineToOpenXML opts (Quoted quoteType lst) = inlinesToOpenXML opts $ [Str open] ++ lst ++ [Str close] where (open, close) = case quoteType of @@ -663,15 +753,18 @@ inlineToOpenXML opts (Math mathType str) = do else DisplayInline case texMathToOMML displayType str of Right r -> return [r] - Left _ -> inlinesToOpenXML opts (readTeXMath str) + Left _ -> inlinesToOpenXML opts (readTeXMath' mathType str) inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst -inlineToOpenXML _ (Code attrs str) = +inlineToOpenXML opts (Code attrs str) = withTextProp (rStyle "VerbatimChar") - $ case highlight formatOpenXML attrs str of - Nothing -> intercalate [br] - `fmap` (mapM formattedString $ lines str) - Just h -> return h - where formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) + $ if writerHighlight opts + then case highlight formatOpenXML attrs str of + Nothing -> unhighlighted + Just h -> return h + else unhighlighted + where unhighlighted = intercalate [br] `fmap` + (mapM formattedString $ lines str) + formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) toHlTok (toktype,tok) = mknode "w:r" [] [ mknode "w:rPr" [] [ rStyle $ show toktype ] @@ -682,7 +775,7 @@ inlineToOpenXML opts (Note bs) = do let notemarker = mknode "w:r" [] [ mknode "w:rPr" [] (rStyle "FootnoteRef") , mknode "w:footnoteRef" [] () ] - let notemarkerXml = RawInline "openxml" $ ppElement notemarker + let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : ils) : xs insertNoteRef (Para ils : xs) = Para (notemarkerXml : ils) : xs insertNoteRef xs = Para [notemarkerXml] : xs @@ -721,14 +814,13 @@ inlineToOpenXML opts (Image alt (src, tit)) = do case M.lookup src imgs of Just (_,_,_,elt,_) -> return [elt] Nothing -> do - let sourceDir = writerSourceDirectory opts - res <- liftIO $ E.try $ fetchItem sourceDir src + res <- liftIO $ fetchItem (writerSourceURL opts) src case res of Left (_ :: E.SomeException) -> do liftIO $ warn $ "Could not find image `" ++ src ++ "', skipping..." -- emit alt text inlinesToOpenXML opts alt - Right (img, _) -> do + Right (img, mt) -> do ident <- ("rId"++) `fmap` getUniqueId let size = imageSize img let (xpt,ypt) = maybe (120,120) sizeInPoints size @@ -767,18 +859,21 @@ inlineToOpenXML opts (Image alt (src, tit)) = do , mknode "wp:effectExtent" [("b","0"),("l","0"),("r","0"),("t","0")] () , mknode "wp:docPr" [("descr",tit),("id","1"),("name","Picture")] () , graphic ] - let imgext = case imageType img of - Just Png -> ".png" - Just Jpeg -> ".jpeg" - Just Gif -> ".gif" - Just Pdf -> ".pdf" - Nothing -> takeExtension src + let imgext = case mt >>= extensionFromMimeType of + Just x -> '.':x + Nothing -> case imageType img of + Just Png -> ".png" + Just Jpeg -> ".jpeg" + Just Gif -> ".gif" + Just Pdf -> ".pdf" + Just Eps -> ".eps" + Nothing -> "" if null imgext then -- without an extension there is no rule for content type inlinesToOpenXML opts alt -- return alt to avoid corrupted docx else do let imgpath = "media/" ++ ident ++ imgext - let mbMimeType = getMimeType imgpath + let mbMimeType = mt <|> getMimeType imgpath -- insert mime type to use in constructing [Content_Types].xml modify $ \st -> st{ stImages = M.insert src (ident, imgpath, mbMimeType, imgElt, img) @@ -788,32 +883,10 @@ inlineToOpenXML opts (Image alt (src, tit)) = do br :: Element br = mknode "w:r" [] [mknode "w:br" [("w:type","textWrapping")] () ] -parseXml :: Archive -> String -> IO Element -parseXml refArchive relpath = - case (findEntryByPath relpath refArchive >>= parseXMLDoc . UTF8.toStringLazy . fromEntry) of - Just d -> return d - Nothing -> fail $ relpath ++ " missing in reference docx" - -isDisplayMath :: Inline -> Bool -isDisplayMath (Math DisplayMath _) = True -isDisplayMath _ = False - -stripLeadingTrailingSpace :: [Inline] -> [Inline] -stripLeadingTrailingSpace = go . reverse . go . reverse - where go (Space:xs) = xs - go xs = xs - -fixDisplayMath :: Block -> [Block] -fixDisplayMath (Plain lst) - | any isDisplayMath lst && not (all isDisplayMath lst) = - -- chop into several paragraphs so each displaymath is its own - map (Plain . stripLeadingTrailingSpace) $ - groupBy (\x y -> (isDisplayMath x && isDisplayMath y) || - not (isDisplayMath x || isDisplayMath y)) lst -fixDisplayMath (Para lst) - | any isDisplayMath lst && not (all isDisplayMath lst) = - -- chop into several paragraphs so each displaymath is its own - map (Para . stripLeadingTrailingSpace) $ - groupBy (\x y -> (isDisplayMath x && isDisplayMath y) || - not (isDisplayMath x || isDisplayMath y)) lst -fixDisplayMath x = [x] +parseXml :: Archive -> Archive -> String -> IO Element +parseXml refArchive distArchive relpath = + case ((findEntryByPath relpath refArchive `mplus` + findEntryByPath relpath distArchive) + >>= parseXMLDoc . UTF8.toStringLazy . fromEntry) of + Just d -> return d + Nothing -> fail $ relpath ++ " corrupt or missing in reference docx" diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index f171a2560..b6687c330 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -1,6 +1,6 @@ -{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables #-} {- -Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2010-2014 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.EPUB - Copyright : Copyright (C) 2010 John MacFarlane + Copyright : Copyright (C) 2010-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -30,16 +30,18 @@ Conversion of 'Pandoc' documents to EPUB. -} module Text.Pandoc.Writers.EPUB ( writeEPUB ) where import Data.IORef -import Data.Maybe ( fromMaybe, isNothing ) +import qualified Data.Map as M +import Data.Maybe ( fromMaybe ) import Data.List ( isInfixOf, intercalate ) import System.Environment ( getEnv ) import Text.Printf (printf) -import System.FilePath ( (</>), takeBaseName, takeExtension, takeFileName ) +import System.FilePath ( (</>), takeExtension, takeFileName ) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as B8 -import Text.Pandoc.UTF8 ( fromStringLazy, toString ) +import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.SelfContained ( makeSelfContained ) import Codec.Archive.Zip +import Control.Applicative ((<$>)) import Data.Time.Clock.POSIX import Data.Time import System.Locale @@ -48,25 +50,18 @@ import qualified Text.Pandoc.Shared as Shared import Text.Pandoc.Builder (fromList, setMeta) import Text.Pandoc.Options import Text.Pandoc.Definition -import Text.Pandoc.Generic +import Text.Pandoc.Walk import Control.Monad.State import Text.XML.Light hiding (ppTopElement) import Text.Pandoc.UUID import Text.Pandoc.Writers.HTML import Text.Pandoc.Writers.Markdown ( writePlain ) -import Data.Char ( toLower ) -import Network.URI ( isAbsoluteURI, unEscapeString ) +import Data.Char ( toLower, isDigit, isAlphaNum ) +import Network.URI ( unEscapeString ) import Text.Pandoc.MIME (getMimeType) -#if MIN_VERSION_base(4,6,0) -#else -import Prelude hiding (catch) -#endif -import Control.Exception (catch, SomeException) -#if MIN_VERSION_blaze_html(0,5,0) +import qualified Control.Exception as E import Text.Blaze.Html.Renderer.Utf8 (renderHtml) -#else -import Text.Blaze.Renderer.Utf8 (renderHtml) -#endif +import Text.HTML.TagSoup -- A Chapter includes a list of blocks and maybe a section -- number offset. Note, some chapters are unnumbered. The section @@ -74,12 +69,260 @@ import Text.Blaze.Renderer.Utf8 (renderHtml) -- in filenames, chapter0003.xhtml. data Chapter = Chapter (Maybe [Int]) [Block] +data EPUBMetadata = EPUBMetadata{ + epubIdentifier :: [Identifier] + , epubTitle :: [Title] + , epubDate :: [Date] + , epubLanguage :: String + , epubCreator :: [Creator] + , epubContributor :: [Creator] + , epubSubject :: [String] + , epubDescription :: Maybe String + , epubType :: Maybe String + , epubFormat :: Maybe String + , epubPublisher :: Maybe String + , epubSource :: Maybe String + , epubRelation :: Maybe String + , epubCoverage :: Maybe String + , epubRights :: Maybe String + , epubCoverImage :: Maybe String + , epubStylesheet :: Maybe Stylesheet + } deriving Show + +data Stylesheet = StylesheetPath FilePath + | StylesheetContents String + deriving Show + +data Date = Date{ + dateText :: String + , dateEvent :: Maybe String + } deriving Show + +data Creator = Creator{ + creatorText :: String + , creatorRole :: Maybe String + , creatorFileAs :: Maybe String + } deriving Show + +data Identifier = Identifier{ + identifierText :: String + , identifierScheme :: Maybe String + } deriving Show + +data Title = Title{ + titleText :: String + , titleFileAs :: Maybe String + , titleType :: Maybe String + } deriving Show + +dcName :: String -> QName +dcName n = QName n Nothing (Just "dc") + +dcNode :: Node t => String -> t -> Element +dcNode = node . dcName + +opfName :: String -> QName +opfName n = QName n Nothing (Just "opf") + +plainify :: [Inline] -> String +plainify t = + trimr $ writePlain def{ writerStandalone = False } + $ Pandoc nullMeta [Plain $ walk removeNote t] + +removeNote :: Inline -> Inline +removeNote (Note _) = Str "" +removeNote x = x + +toId :: FilePath -> String +toId = map (\x -> if isAlphaNum x || x == '-' || x == '_' + then x + else '_') . takeFileName + +getEPUBMetadata :: WriterOptions -> Meta -> IO EPUBMetadata +getEPUBMetadata opts meta = do + let md = metadataFromMeta opts meta + let elts = onlyElems $ parseXML $ writerEpubMetadata opts + let md' = foldr addMetadataFromXML md elts + let addIdentifier m = + if null (epubIdentifier m) + then do + randomId <- fmap show getRandomUUID + return $ m{ epubIdentifier = [Identifier randomId Nothing] } + else return m + let addLanguage m = + if null (epubLanguage m) + then case lookup "lang" (writerVariables opts) of + Just x -> return m{ epubLanguage = x } + Nothing -> do + localeLang <- E.catch (liftM + (map (\c -> if c == '_' then '-' else c) . + takeWhile (/='.')) $ getEnv "LANG") + (\e -> let _ = (e :: E.SomeException) in return "en-US") + return m{ epubLanguage = localeLang } + else return m + let fixDate m = + if null (epubDate m) + then do + currentTime <- getCurrentTime + return $ m{ epubDate = [ Date{ + dateText = showDateTimeISO8601 currentTime + , dateEvent = Nothing } ] } + else return m + let addAuthor m = + if any (\c -> creatorRole c == Just "aut") $ epubCreator m + then return m + else do + let authors' = map plainify $ docAuthors meta + let toAuthor name = Creator{ creatorText = name + , creatorRole = Just "aut" + , creatorFileAs = Nothing } + return $ m{ epubCreator = map toAuthor authors' ++ epubCreator m } + addIdentifier md' >>= fixDate >>= addAuthor >>= addLanguage + +addMetadataFromXML :: Element -> EPUBMetadata -> EPUBMetadata +addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md + | name == "identifier" = md{ epubIdentifier = + Identifier{ identifierText = strContent e + , identifierScheme = lookupAttr (opfName "scheme") attrs + } : epubIdentifier md } + | name == "title" = md{ epubTitle = + Title{ titleText = strContent e + , titleFileAs = getAttr "file-as" + , titleType = getAttr "type" + } : epubTitle md } + | name == "date" = md{ epubDate = + Date{ dateText = fromMaybe "" $ normalizeDate' $ strContent e + , dateEvent = getAttr "event" + } : epubDate md } + | name == "language" = md{ epubLanguage = strContent e } + | name == "creator" = md{ epubCreator = + Creator{ creatorText = strContent e + , creatorRole = getAttr "role" + , creatorFileAs = getAttr "file-as" + } : epubCreator md } + | name == "contributor" = md{ epubContributor = + Creator { creatorText = strContent e + , creatorRole = getAttr "role" + , creatorFileAs = getAttr "file-as" + } : epubContributor md } + | name == "subject" = md{ epubSubject = strContent e : epubSubject md } + | name == "description" = md { epubDescription = Just $ strContent e } + | name == "type" = md { epubType = Just $ strContent e } + | name == "format" = md { epubFormat = Just $ strContent e } + | name == "type" = md { epubType = Just $ strContent e } + | name == "publisher" = md { epubPublisher = Just $ strContent e } + | name == "source" = md { epubSource = Just $ strContent e } + | name == "relation" = md { epubRelation = Just $ strContent e } + | name == "coverage" = md { epubCoverage = Just $ strContent e } + | name == "rights" = md { epubRights = Just $ strContent e } + | otherwise = md + where getAttr n = lookupAttr (opfName n) attrs +addMetadataFromXML _ md = md + +metaValueToString :: MetaValue -> String +metaValueToString (MetaString s) = s +metaValueToString (MetaInlines ils) = plainify ils +metaValueToString (MetaBlocks bs) = plainify $ query (:[]) bs +metaValueToString (MetaBool b) = show b +metaValueToString _ = "" + +getList :: String -> Meta -> (MetaValue -> a) -> [a] +getList s meta handleMetaValue = + case lookupMeta s meta of + Just (MetaList xs) -> map handleMetaValue xs + Just mv -> [handleMetaValue mv] + Nothing -> [] + +getIdentifier :: Meta -> [Identifier] +getIdentifier meta = getList "identifier" meta handleMetaValue + where handleMetaValue (MetaMap m) = + Identifier{ identifierText = maybe "" metaValueToString + $ M.lookup "text" m + , identifierScheme = metaValueToString <$> + M.lookup "scheme" m } + handleMetaValue mv = Identifier (metaValueToString mv) Nothing + +getTitle :: Meta -> [Title] +getTitle meta = getList "title" meta handleMetaValue + where handleMetaValue (MetaMap m) = + Title{ titleText = maybe "" metaValueToString $ M.lookup "text" m + , titleFileAs = metaValueToString <$> M.lookup "file-as" m + , titleType = metaValueToString <$> M.lookup "type" m } + handleMetaValue mv = Title (metaValueToString mv) Nothing Nothing + +getCreator :: String -> Meta -> [Creator] +getCreator s meta = getList s meta handleMetaValue + where handleMetaValue (MetaMap m) = + Creator{ creatorText = maybe "" metaValueToString $ M.lookup "text" m + , creatorFileAs = metaValueToString <$> M.lookup "file-as" m + , creatorRole = metaValueToString <$> M.lookup "role" m } + handleMetaValue mv = Creator (metaValueToString mv) Nothing Nothing + +getDate :: String -> Meta -> [Date] +getDate s meta = getList s meta handleMetaValue + where handleMetaValue (MetaMap m) = + Date{ dateText = maybe "" id $ + M.lookup "text" m >>= normalizeDate' . metaValueToString + , dateEvent = metaValueToString <$> M.lookup "event" m } + handleMetaValue mv = Date { dateText = maybe "" + id $ normalizeDate' $ metaValueToString mv + , dateEvent = Nothing } + +simpleList :: String -> Meta -> [String] +simpleList s meta = + case lookupMeta s meta of + Just (MetaList xs) -> map metaValueToString xs + Just x -> [metaValueToString x] + Nothing -> [] + +metadataFromMeta :: WriterOptions -> Meta -> EPUBMetadata +metadataFromMeta opts meta = EPUBMetadata{ + epubIdentifier = identifiers + , epubTitle = titles + , epubDate = date + , epubLanguage = language + , epubCreator = creators + , epubContributor = contributors + , epubSubject = subjects + , epubDescription = description + , epubType = epubtype + , epubFormat = format + , epubPublisher = publisher + , epubSource = source + , epubRelation = relation + , epubCoverage = coverage + , epubRights = rights + , epubCoverImage = coverImage + , epubStylesheet = stylesheet + } + where identifiers = getIdentifier meta + titles = getTitle meta + date = getDate "date" meta + language = maybe "" metaValueToString $ + lookupMeta "language" meta `mplus` lookupMeta "lang" meta + creators = getCreator "creator" meta + contributors = getCreator "contributor" meta + subjects = simpleList "subject" meta + description = metaValueToString <$> lookupMeta "description" meta + epubtype = metaValueToString <$> lookupMeta "type" meta + format = metaValueToString <$> lookupMeta "format" meta + publisher = metaValueToString <$> lookupMeta "publisher" meta + source = metaValueToString <$> lookupMeta "source" meta + relation = metaValueToString <$> lookupMeta "relation" meta + coverage = metaValueToString <$> lookupMeta "coverage" meta + rights = metaValueToString <$> lookupMeta "rights" meta + coverImage = lookup "epub-cover-image" (writerVariables opts) `mplus` + (metaValueToString <$> lookupMeta "cover-image" meta) + stylesheet = (StylesheetContents <$> writerEpubStylesheet opts) `mplus` + ((StylesheetPath . metaValueToString) <$> + lookupMeta "stylesheet" meta) + -- | Produce an EPUB file from a Pandoc document. writeEPUB :: WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert -> IO B.ByteString writeEPUB opts doc@(Pandoc meta _) = do - let version = maybe EPUB2 id (writerEpubVersion opts) + let version = fromMaybe EPUB2 (writerEpubVersion opts) let epub3 = version == EPUB3 epochtime <- floor `fmap` getPOSIXTime let mkEntry path content = toEntry path epochtime content @@ -97,17 +340,16 @@ writeEPUB opts doc@(Pandoc meta _) = do then MathML Nothing else writerHTMLMathMethod opts , writerWrapText = False } - let sourceDir = writerSourceDirectory opts' - let mbCoverImage = lookup "epub-cover-image" vars + metadata <- getEPUBMetadata opts' meta -- cover page (cpgEntry, cpicEntry) <- - case mbCoverImage of + case epubCoverImage metadata of Nothing -> return ([],[]) Just img -> do - let coverImage = "cover-image" ++ takeExtension img + let coverImage = "media/" ++ takeFileName img let cpContent = renderHtml $ writeHtml opts' - (Pandoc meta [RawBlock "html" $ "<div id=\"cover-image\">\n<img src=\"" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"]) + (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"]) imgContent <- B.readFile img return ( [mkEntry "cover.xhtml" cpContent] , [mkEntry coverImage imgContent] ) @@ -119,14 +361,19 @@ writeEPUB opts doc@(Pandoc meta _) = do let tpEntry = mkEntry "title_page.xhtml" tpContent -- handle pictures - picsRef <- newIORef [] - Pandoc _ blocks <- bottomUpM - (transformInline opts' sourceDir picsRef) doc - pics <- readIORef picsRef - let readPicEntry (oldsrc, newsrc) = do - (img,_) <- fetchItem sourceDir oldsrc - return $ toEntry newsrc epochtime $ B.fromChunks . (:[]) $ img - picEntries <- mapM readPicEntry pics + mediaRef <- newIORef [] + Pandoc _ blocks <- walkM (transformInline opts' mediaRef) doc >>= + walkM (transformBlock opts' mediaRef) + pics <- readIORef mediaRef + let readPicEntry entries (oldsrc, newsrc) = do + res <- fetchItem (writerSourceURL opts') oldsrc + case res of + Left _ -> do + warn $ "Could not find media `" ++ oldsrc ++ "', skipping..." + return entries + Right (img,_) -> return $ + (toEntry newsrc epochtime $ B.fromChunks . (:[]) $ img) : entries + picEntries <- foldM readPicEntry [] pics -- handle fonts let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f @@ -179,59 +426,60 @@ writeEPUB opts doc@(Pandoc meta _) = do chapToEntry num (Chapter mbnum bs) = mkEntry (showChapter num) $ renderHtml $ writeHtml opts'{ writerNumberOffset = - maybe [] id mbnum } + fromMaybe [] mbnum } $ case bs of (Header _ _ xs : _) -> - Pandoc (setMeta "title" (fromList xs) nullMeta) bs + -- remove notes or we get doubled footnotes + Pandoc (setMeta "title" (walk removeNote $ fromList xs) + nullMeta) bs _ -> Pandoc nullMeta bs let chapterEntries = zipWith chapToEntry [1..] chapters -- incredibly inefficient (TODO): - let containsMathML ent = "<math" `isInfixOf` (B8.unpack $ fromEntry ent) + let containsMathML ent = epub3 && + "<math" `isInfixOf` (B8.unpack $ fromEntry ent) + let containsSVG ent = epub3 && + "<svg" `isInfixOf` (B8.unpack $ fromEntry ent) + let props ent = ["mathml" | containsMathML ent] ++ ["svg" | containsSVG ent] -- contents.opf - localeLang <- catch (liftM (map (\c -> if c == '_' then '-' else c) . - takeWhile (/='.')) $ getEnv "LANG") - (\e -> let _ = (e :: SomeException) in return "en-US") - let lang = case lookup "lang" (writerVariables opts') of - Just x -> x - Nothing -> localeLang - uuid <- getRandomUUID let chapterNode ent = unode "item" ! - ([("id", takeBaseName $ eRelativePath ent), + ([("id", toId $ eRelativePath ent), ("href", eRelativePath ent), ("media-type", "application/xhtml+xml")] - ++ [("properties","mathml") | epub3 && - containsMathML ent]) $ () + ++ case props ent of + [] -> [] + xs -> [("properties", unwords xs)]) + $ () let chapterRefNode ent = unode "itemref" ! - [("idref", takeBaseName $ eRelativePath ent)] $ () + [("idref", toId $ eRelativePath ent)] $ () let pictureNode ent = unode "item" ! - [("id", takeBaseName $ eRelativePath ent), + [("id", toId $ eRelativePath ent), ("href", eRelativePath ent), ("media-type", fromMaybe "application/octet-stream" - $ imageTypeOf $ eRelativePath ent)] $ () + $ mediaTypeOf $ eRelativePath ent)] $ () let fontNode ent = unode "item" ! - [("id", takeBaseName $ eRelativePath ent), + [("id", toId $ eRelativePath ent), ("href", eRelativePath ent), - ("media-type", maybe "" id $ getMimeType $ eRelativePath ent)] $ () - let plainify t = trimr $ - writePlain opts'{ writerStandalone = False } $ - Pandoc meta [Plain t] - let plainTitle = plainify $ docTitle meta - let plainAuthors = map plainify $ docAuthors meta + ("media-type", fromMaybe "" $ getMimeType $ eRelativePath ent)] $ () + let plainTitle = case docTitle meta of + [] -> case epubTitle metadata of + [] -> "UNTITLED" + (x:_) -> titleText x + x -> plainify x + let uuid = case epubIdentifier metadata of + (x:_) -> identifierText x -- use first identifier as UUID + [] -> error "epubIdentifier is null" -- shouldn't happen currentTime <- getCurrentTime - let plainDate = maybe (showDateTimeISO8601 currentTime) id - $ normalizeDate $ stringify $ docDate meta - let contentsData = fromStringLazy $ ppTopElement $ + let contentsData = UTF8.fromStringLazy $ ppTopElement $ unode "package" ! [("version", case version of EPUB2 -> "2.0" EPUB3 -> "3.0") ,("xmlns","http://www.idpf.org/2007/opf") - ,("unique-identifier","BookId")] $ - [ metadataElement version (writerEpubMetadata opts') - uuid lang plainTitle plainAuthors plainDate currentTime mbCoverImage + ,("unique-identifier","epub-id-1")] $ + [ metadataElement version metadata currentTime , unode "manifest" $ [ unode "item" ! [("id","ncx"), ("href","toc.ncx") ,("media-type","application/x-dtbncx+xml")] $ () @@ -243,14 +491,19 @@ writeEPUB opts doc@(Pandoc meta _) = do [("properties","nav") | epub3 ]) $ () ] ++ map chapterNode (cpgEntry ++ (tpEntry : chapterEntries)) ++ - map pictureNode (cpicEntry ++ picEntries) ++ + (case cpicEntry of + [] -> [] + (x:_) -> [add_attrs + [Attr (unqual "properties") "cover-image" | epub3] + (pictureNode x)]) ++ + map pictureNode picEntries ++ map fontNode fontEntries , unode "spine" ! [("toc","ncx")] $ - case mbCoverImage of + case epubCoverImage metadata of Nothing -> [] Just _ -> [ unode "itemref" ! - [("idref", "cover"),("linear","no")] $ () ] - ++ ((unode "itemref" ! [("idref", "title_page") + [("idref", "cover_xhtml"),("linear","no")] $ () ] + ++ ((unode "itemref" ! [("idref", "title_page_xhtml") ,("linear", if null (docTitle meta) then "no" else "yes")] $ ()) : @@ -260,8 +513,13 @@ writeEPUB opts doc@(Pandoc meta _) = do else "no")] $ ()) : map chapterRefNode chapterEntries) , unode "guide" $ - unode "reference" ! - [("type","toc"),("title",plainTitle),("href","nav.xhtml")] $ () + [ unode "reference" ! + [("type","toc"),("title",plainTitle), + ("href","nav.xhtml")] $ () + ] ++ + [ unode "reference" ! + [("type","cover"),("title","Cover"),("href","cover.xhtml")] $ () | epubCoverImage metadata /= Nothing + ] ] let contentsEntry = mkEntry "content.opf" contentsData @@ -303,22 +561,22 @@ writeEPUB opts doc@(Pandoc meta _) = do [ unode "navLabel" $ unode "text" (plainify $ docTitle meta) , unode "content" ! [("src","title_page.xhtml")] $ () ] - let tocData = fromStringLazy $ ppTopElement $ + let tocData = UTF8.fromStringLazy $ ppTopElement $ unode "ncx" ! [("version","2005-1") ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $ [ unode "head" $ [ unode "meta" ! [("name","dtb:uid") - ,("content", show uuid)] $ () + ,("content", uuid)] $ () , unode "meta" ! [("name","dtb:depth") ,("content", "1")] $ () , unode "meta" ! [("name","dtb:totalPageCount") ,("content", "0")] $ () , unode "meta" ! [("name","dtb:maxPageNumber") ,("content", "0")] $ () - ] ++ case mbCoverImage of + ] ++ case epubCoverImage metadata of Nothing -> [] - Just _ -> [unode "meta" ! [("name","cover"), - ("content","cover-image")] $ ()] + Just img -> [unode "meta" ! [("name","cover"), + ("content", toId img)] $ ()] , unode "docTitle" $ unode "text" $ plainTitle , unode "navMap" $ tpNode : evalState (mapM (navPointNode navMapFormatter) secs) 1 @@ -335,7 +593,7 @@ writeEPUB opts doc@(Pandoc meta _) = do (_:_) -> [unode "ol" ! [("class","toc")] $ subs] let navtag = if epub3 then "nav" else "div" - let navData = fromStringLazy $ ppTopElement $ + let navData = UTF8.fromStringLazy $ ppTopElement $ unode "html" ! [("xmlns","http://www.w3.org/1999/xhtml") ,("xmlns:epub","http://www.idpf.org/2007/ops")] $ [ unode "head" $ @@ -349,10 +607,10 @@ writeEPUB opts doc@(Pandoc meta _) = do let navEntry = mkEntry "nav.xhtml" navData -- mimetype - let mimetypeEntry = mkEntry "mimetype" $ fromStringLazy "application/epub+zip" + let mimetypeEntry = mkEntry "mimetype" $ UTF8.fromStringLazy "application/epub+zip" -- container.xml - let containerData = fromStringLazy $ ppTopElement $ + let containerData = UTF8.fromStringLazy $ ppTopElement $ unode "container" ! [("version","1.0") ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $ unode "rootfiles" $ @@ -361,18 +619,19 @@ writeEPUB opts doc@(Pandoc meta _) = do let containerEntry = mkEntry "META-INF/container.xml" containerData -- com.apple.ibooks.display-options.xml - let apple = fromStringLazy $ ppTopElement $ + let apple = UTF8.fromStringLazy $ ppTopElement $ unode "display_options" $ unode "platform" ! [("name","*")] $ unode "option" ! [("name","specified-fonts")] $ "true" let appleEntry = mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple -- stylesheet - stylesheet <- case writerEpubStylesheet opts of - Just s -> return s - Nothing -> toString `fmap` + stylesheet <- case epubStylesheet metadata of + Just (StylesheetPath fp) -> UTF8.readFile fp + Just (StylesheetContents s) -> return s + Nothing -> UTF8.toString `fmap` readDataFile (writerUserDataDir opts) "epub.css" - let stylesheetEntry = mkEntry "stylesheet.css" $ fromStringLazy stylesheet + let stylesheetEntry = mkEntry "stylesheet.css" $ UTF8.fromStringLazy stylesheet -- construct archive let archive = foldr addEntryToArchive emptyArchive @@ -381,64 +640,167 @@ writeEPUB opts doc@(Pandoc meta _) = do (picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries ++ fontEntries)) return $ fromArchive archive -metadataElement :: EPUBVersion -> String -> UUID -> String -> String -> [String] - -> String -> UTCTime -> Maybe a -> Element -metadataElement version metadataXML uuid lang title authors date currentTime mbCoverImage = - let userNodes = parseXML metadataXML - elt = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/") - ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ - filter isMetadataElement $ onlyElems userNodes - dublinElements = ["contributor","coverage","creator","date", - "description","format","identifier","language","publisher", - "relation","rights","source","subject","title","type"] - isMetadataElement e = (qPrefix (elName e) == Just "dc" && - qName (elName e) `elem` dublinElements) || - (qPrefix (elName e) == Nothing && - qName (elName e) `elem` ["link","meta"]) - contains e n = not (null (findElements (QName n Nothing (Just "dc")) e)) - newNodes = [ unode "dc:title" title | not (elt `contains` "title") ] ++ - [ unode "dc:language" lang | not (elt `contains` "language") ] ++ - [ unode "dc:identifier" ! [("id","BookId")] $ show uuid | - not (elt `contains` "identifier") ] ++ - [ unode "dc:creator" ! [("opf:role","aut") | version == EPUB2] - $ a | a <- authors, not (elt `contains` "creator") ] ++ - [ unode "dc:date" date | not (elt `contains` "date") ] ++ - [ unode "meta" ! [("property", "dcterms:modified")] $ - (showDateTimeISO8601 currentTime) | version == EPUB3] ++ - [ unode "meta" ! [("name","cover"), ("content","cover-image")] $ () | - not (isNothing mbCoverImage) ] - in elt{ elContent = elContent elt ++ map Elem newNodes } +metadataElement :: EPUBVersion -> EPUBMetadata -> UTCTime -> Element +metadataElement version md currentTime = + unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/") + ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ mdNodes + where mdNodes = identifierNodes ++ titleNodes ++ dateNodes ++ languageNodes + ++ creatorNodes ++ contributorNodes ++ subjectNodes + ++ descriptionNodes ++ typeNodes ++ formatNodes + ++ publisherNodes ++ sourceNodes ++ relationNodes + ++ coverageNodes ++ rightsNodes ++ coverImageNodes + ++ modifiedNodes + withIds base f = concat . zipWith f (map (\x -> base ++ ('-' : show x)) + ([1..] :: [Int])) + identifierNodes = withIds "epub-id" toIdentifierNode $ + epubIdentifier md + titleNodes = withIds "epub-title" toTitleNode $ epubTitle md + dateNodes = if version == EPUB2 + then withIds "epub-date" toDateNode $ epubDate md + else -- epub3 allows only one dc:date + -- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-opf-dcdate + case epubDate md of + [] -> [] + (x:_) -> [dcNode "date" ! [("id","epub-date")] + $ dateText x] + languageNodes = [dcTag "language" $ epubLanguage md] + creatorNodes = withIds "epub-creator" (toCreatorNode "creator") $ + epubCreator md + contributorNodes = withIds "epub-contributor" + (toCreatorNode "contributor") $ epubContributor md + subjectNodes = map (dcTag "subject") $ epubSubject md + descriptionNodes = maybe [] (dcTag' "description") $ epubDescription md + typeNodes = maybe [] (dcTag' "type") $ epubType md + formatNodes = maybe [] (dcTag' "format") $ epubFormat md + publisherNodes = maybe [] (dcTag' "publisher") $ epubPublisher md + sourceNodes = maybe [] (dcTag' "source") $ epubSource md + relationNodes = maybe [] (dcTag' "relation") $ epubRelation md + coverageNodes = maybe [] (dcTag' "coverage") $ epubCoverage md + rightsNodes = maybe [] (dcTag' "rights") $ epubRights md + coverImageNodes = maybe [] + (\img -> [unode "meta" ! [("name","cover"), + ("content",toId img)] $ ()]) + $ epubCoverImage md + modifiedNodes = [ unode "meta" ! [("property", "dcterms:modified")] $ + (showDateTimeISO8601 currentTime) | version == EPUB3 ] + dcTag n s = unode ("dc:" ++ n) s + dcTag' n s = [dcTag n s] + toIdentifierNode id' (Identifier txt scheme) + | version == EPUB2 = [dcNode "identifier" ! + ([("id",id')] ++ maybe [] (\x -> [("opf:scheme", x)]) scheme) $ + txt] + | otherwise = [dcNode "identifier" ! [("id",id')] $ txt] ++ + maybe [] (\x -> [unode "meta" ! + [("refines",'#':id'),("property","identifier-type"), + ("scheme","onix:codelist5")] $ x]) + (schemeToOnix `fmap` scheme) + toCreatorNode s id' creator + | version == EPUB2 = [dcNode s ! + (("id",id') : + maybe [] (\x -> [("opf:file-as",x)]) (creatorFileAs creator) ++ + maybe [] (\x -> [("opf:role",x)]) + (creatorRole creator >>= toRelator)) $ creatorText creator] + | otherwise = [dcNode s ! [("id",id')] $ creatorText creator] ++ + maybe [] (\x -> [unode "meta" ! + [("refines",'#':id'),("property","file-as")] $ x]) + (creatorFileAs creator) ++ + maybe [] (\x -> [unode "meta" ! + [("refines",'#':id'),("property","role"), + ("scheme","marc:relators")] $ x]) + (creatorRole creator >>= toRelator) + toTitleNode id' title + | version == EPUB2 = [dcNode "title" ! + (("id",id') : + maybe [] (\x -> [("opf:file-as",x)]) (titleFileAs title) ++ + maybe [] (\x -> [("opf:title-type",x)]) (titleType title)) $ + titleText title] + | otherwise = [dcNode "title" ! [("id",id')] $ titleText title] + ++ + maybe [] (\x -> [unode "meta" ! + [("refines",'#':id'),("property","file-as")] $ x]) + (titleFileAs title) ++ + maybe [] (\x -> [unode "meta" ! + [("refines",'#':id'),("property","title-type")] $ x]) + (titleType title) + toDateNode id' date = [dcNode "date" ! + (("id",id') : + maybe [] (\x -> [("opf:event",x)]) (dateEvent date)) $ + dateText date] + schemeToOnix "ISBN-10" = "02" + schemeToOnix "GTIN-13" = "03" + schemeToOnix "UPC" = "04" + schemeToOnix "ISMN-10" = "05" + schemeToOnix "DOI" = "06" + schemeToOnix "LCCN" = "13" + schemeToOnix "GTIN-14" = "14" + schemeToOnix "ISBN-13" = "15" + schemeToOnix "Legal deposit number" = "17" + schemeToOnix "URN" = "22" + schemeToOnix "OCLC" = "23" + schemeToOnix "ISMN-13" = "25" + schemeToOnix "ISBN-A" = "26" + schemeToOnix "JP" = "27" + schemeToOnix "OLCC" = "28" + schemeToOnix _ = "01" showDateTimeISO8601 :: UTCTime -> String showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ" +transformTag :: WriterOptions + -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media + -> Tag String + -> IO (Tag String) +transformTag opts mediaRef tag@(TagOpen name attr) + | name == "video" || name == "source" || name == "img" = do + let src = fromAttrib "src" tag + let poster = fromAttrib "poster" tag + let oldsrc = maybe src (</> src) $ writerSourceURL opts + let oldposter = maybe poster (</> poster) $ writerSourceURL opts + newsrc <- modifyMediaRef mediaRef oldsrc + newposter <- modifyMediaRef mediaRef oldposter + let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++ + [("src", newsrc) | not (null newsrc)] ++ + [("poster", newposter) | not (null newposter)] + return $ TagOpen name attr' +transformTag _ _ tag = return tag + +modifyMediaRef :: IORef [(FilePath, FilePath)] -> FilePath -> IO FilePath +modifyMediaRef _ "" = return "" +modifyMediaRef mediaRef oldsrc = do + media <- readIORef mediaRef + case lookup oldsrc media of + Just n -> return n + Nothing -> do + let new = "media/file" ++ show (length media) ++ + takeExtension oldsrc + modifyIORef mediaRef ( (oldsrc, new): ) + return new + +transformBlock :: WriterOptions + -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media + -> Block + -> IO Block +transformBlock opts mediaRef (RawBlock fmt raw) + | fmt == Format "html" = do + let tags = parseTags raw + tags' <- mapM (transformTag opts mediaRef) tags + return $ RawBlock fmt (renderTags tags') +transformBlock _ _ b = return b + transformInline :: WriterOptions - -> FilePath - -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) images + -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media -> Inline -> IO Inline -transformInline opts sourceDir picsRef (Image lab (src,tit)) - | isAbsoluteURI src = do - raw <- makeSelfContained Nothing - $ writeHtmlInline opts (Image lab (src,tit)) - return $ RawInline "html" raw - | otherwise = do +transformInline opts mediaRef (Image lab (src,tit)) = do let src' = unEscapeString src - pics <- readIORef picsRef - let oldsrc = sourceDir </> src' - let ext = takeExtension src' - newsrc <- case lookup oldsrc pics of - Just n -> return n - Nothing -> do - let new = "images/img" ++ show (length pics) ++ ext - modifyIORef picsRef ( (oldsrc, new): ) - return new + let oldsrc = maybe src' (</> src) $ writerSourceURL opts + newsrc <- modifyMediaRef mediaRef oldsrc return $ Image lab (newsrc, tit) -transformInline opts _ _ (x@(Math _ _)) +transformInline opts _ (x@(Math _ _)) | WebTeX _ <- writerHTMLMathMethod opts = do raw <- makeSelfContained Nothing $ writeHtmlInline opts x - return $ RawInline "html" raw -transformInline _ _ _ x = return x + return $ RawInline (Format "html") raw +transformInline _ _ x = return x writeHtmlInline :: WriterOptions -> Inline -> String writeHtmlInline opts z = trimr $ @@ -462,16 +824,12 @@ ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . Nothing -> '&':'#':unEntity xs unEntity (x:xs) = x : unEntity xs -imageTypeOf :: FilePath -> Maybe String -imageTypeOf x = case drop 1 (map toLower (takeExtension x)) of - "jpg" -> Just "image/jpeg" - "jpeg" -> Just "image/jpeg" - "jfif" -> Just "image/jpeg" - "png" -> Just "image/png" - "gif" -> Just "image/gif" - "svg" -> Just "image/svg+xml" - _ -> Nothing - +mediaTypeOf :: FilePath -> Maybe String +mediaTypeOf x = case getMimeType x of + Just y@('i':'m':'a':'g':'e':_) -> Just y + Just y@('v':'i':'d':'e':'o':_) -> Just y + Just y@('a':'u':'d':'i':'o':_) -> Just y + _ -> Nothing data IdentState = IdentState{ chapterNumber :: Int, @@ -519,9 +877,293 @@ correlateRefs chapterHeaderLevel bs = -- Replace internal link references using the table produced -- by correlateRefs. replaceRefs :: [(String,String)] -> [Block] -> [Block] -replaceRefs refTable = bottomUp replaceOneRef +replaceRefs refTable = walk replaceOneRef where replaceOneRef x@(Link lab ('#':xs,tit)) = case lookup xs refTable of Just url -> Link lab (url,tit) Nothing -> x replaceOneRef x = x + +-- Variant of normalizeDate that allows partial dates: YYYY, YYYY-MM +normalizeDate' :: String -> Maybe String +normalizeDate' xs = + let xs' = trim xs in + case xs' of + [y1,y2,y3,y4] | all isDigit [y1,y2,y3,y4] -> Just xs' -- YYYY + [y1,y2,y3,y4,'-',m1,m2] | all isDigit [y1,y2,y3,y4,m1,m2] -- YYYY-MM + -> Just xs' + _ -> normalizeDate xs' + +toRelator :: String -> Maybe String +toRelator x + | x `elem` relators = Just x + | otherwise = lookup (map toLower x) relatorMap + +relators :: [String] +relators = map snd relatorMap + +relatorMap :: [(String, String)] +relatorMap = + [("abridger", "abr") + ,("actor", "act") + ,("adapter", "adp") + ,("addressee", "rcp") + ,("analyst", "anl") + ,("animator", "anm") + ,("annotator", "ann") + ,("appellant", "apl") + ,("appellee", "ape") + ,("applicant", "app") + ,("architect", "arc") + ,("arranger", "arr") + ,("art copyist", "acp") + ,("art director", "adi") + ,("artist", "art") + ,("artistic director", "ard") + ,("assignee", "asg") + ,("associated name", "asn") + ,("attributed name", "att") + ,("auctioneer", "auc") + ,("author", "aut") + ,("author in quotations or text abstracts", "aqt") + ,("author of afterword, colophon, etc.", "aft") + ,("author of dialog", "aud") + ,("author of introduction, etc.", "aui") + ,("autographer", "ato") + ,("bibliographic antecedent", "ant") + ,("binder", "bnd") + ,("binding designer", "bdd") + ,("blurb writer", "blw") + ,("book designer", "bkd") + ,("book producer", "bkp") + ,("bookjacket designer", "bjd") + ,("bookplate designer", "bpd") + ,("bookseller", "bsl") + ,("braille embosser", "brl") + ,("broadcaster", "brd") + ,("calligrapher", "cll") + ,("cartographer", "ctg") + ,("caster", "cas") + ,("censor", "cns") + ,("choreographer", "chr") + ,("cinematographer", "cng") + ,("client", "cli") + ,("collection registrar", "cor") + ,("collector", "col") + ,("collotyper", "clt") + ,("colorist", "clr") + ,("commentator", "cmm") + ,("commentator for written text", "cwt") + ,("compiler", "com") + ,("complainant", "cpl") + ,("complainant-appellant", "cpt") + ,("complainant-appellee", "cpe") + ,("composer", "cmp") + ,("compositor", "cmt") + ,("conceptor", "ccp") + ,("conductor", "cnd") + ,("conservator", "con") + ,("consultant", "csl") + ,("consultant to a project", "csp") + ,("contestant", "cos") + ,("contestant-appellant", "cot") + ,("contestant-appellee", "coe") + ,("contestee", "cts") + ,("contestee-appellant", "ctt") + ,("contestee-appellee", "cte") + ,("contractor", "ctr") + ,("contributor", "ctb") + ,("copyright claimant", "cpc") + ,("copyright holder", "cph") + ,("corrector", "crr") + ,("correspondent", "crp") + ,("costume designer", "cst") + ,("court governed", "cou") + ,("court reporter", "crt") + ,("cover designer", "cov") + ,("creator", "cre") + ,("curator", "cur") + ,("dancer", "dnc") + ,("data contributor", "dtc") + ,("data manager", "dtm") + ,("dedicatee", "dte") + ,("dedicator", "dto") + ,("defendant", "dfd") + ,("defendant-appellant", "dft") + ,("defendant-appellee", "dfe") + ,("degree granting institution", "dgg") + ,("delineator", "dln") + ,("depicted", "dpc") + ,("depositor", "dpt") + ,("designer", "dsr") + ,("director", "drt") + ,("dissertant", "dis") + ,("distribution place", "dbp") + ,("distributor", "dst") + ,("donor", "dnr") + ,("draftsman", "drm") + ,("dubious author", "dub") + ,("editor", "edt") + ,("editor of compilation", "edc") + ,("editor of moving image work", "edm") + ,("electrician", "elg") + ,("electrotyper", "elt") + ,("enacting jurisdiction", "enj") + ,("engineer", "eng") + ,("engraver", "egr") + ,("etcher", "etr") + ,("event place", "evp") + ,("expert", "exp") + ,("facsimilist", "fac") + ,("field director", "fld") + ,("film director", "fmd") + ,("film distributor", "fds") + ,("film editor", "flm") + ,("film producer", "fmp") + ,("filmmaker", "fmk") + ,("first party", "fpy") + ,("forger", "frg") + ,("former owner", "fmo") + ,("funder", "fnd") + ,("geographic information specialist", "gis") + ,("honoree", "hnr") + ,("host", "hst") + ,("host institution", "his") + ,("illuminator", "ilu") + ,("illustrator", "ill") + ,("inscriber", "ins") + ,("instrumentalist", "itr") + ,("interviewee", "ive") + ,("interviewer", "ivr") + ,("inventor", "inv") + ,("issuing body", "isb") + ,("judge", "jud") + ,("jurisdiction governed", "jug") + ,("laboratory", "lbr") + ,("laboratory director", "ldr") + ,("landscape architect", "lsa") + ,("lead", "led") + ,("lender", "len") + ,("libelant", "lil") + ,("libelant-appellant", "lit") + ,("libelant-appellee", "lie") + ,("libelee", "lel") + ,("libelee-appellant", "let") + ,("libelee-appellee", "lee") + ,("librettist", "lbt") + ,("licensee", "lse") + ,("licensor", "lso") + ,("lighting designer", "lgd") + ,("lithographer", "ltg") + ,("lyricist", "lyr") + ,("manufacture place", "mfp") + ,("manufacturer", "mfr") + ,("marbler", "mrb") + ,("markup editor", "mrk") + ,("metadata contact", "mdc") + ,("metal-engraver", "mte") + ,("moderator", "mod") + ,("monitor", "mon") + ,("music copyist", "mcp") + ,("musical director", "msd") + ,("musician", "mus") + ,("narrator", "nrt") + ,("onscreen presenter", "osp") + ,("opponent", "opn") + ,("organizer of meeting", "orm") + ,("originator", "org") + ,("other", "oth") + ,("owner", "own") + ,("panelist", "pan") + ,("papermaker", "ppm") + ,("patent applicant", "pta") + ,("patent holder", "pth") + ,("patron", "pat") + ,("performer", "prf") + ,("permitting agency", "pma") + ,("photographer", "pht") + ,("plaintiff", "ptf") + ,("plaintiff-appellant", "ptt") + ,("plaintiff-appellee", "pte") + ,("platemaker", "plt") + ,("praeses", "pra") + ,("presenter", "pre") + ,("printer", "prt") + ,("printer of plates", "pop") + ,("printmaker", "prm") + ,("process contact", "prc") + ,("producer", "pro") + ,("production company", "prn") + ,("production designer", "prs") + ,("production manager", "pmn") + ,("production personnel", "prd") + ,("production place", "prp") + ,("programmer", "prg") + ,("project director", "pdr") + ,("proofreader", "pfr") + ,("provider", "prv") + ,("publication place", "pup") + ,("publisher", "pbl") + ,("publishing director", "pbd") + ,("puppeteer", "ppt") + ,("radio director", "rdd") + ,("radio producer", "rpc") + ,("recording engineer", "rce") + ,("recordist", "rcd") + ,("redaktor", "red") + ,("renderer", "ren") + ,("reporter", "rpt") + ,("repository", "rps") + ,("research team head", "rth") + ,("research team member", "rtm") + ,("researcher", "res") + ,("respondent", "rsp") + ,("respondent-appellant", "rst") + ,("respondent-appellee", "rse") + ,("responsible party", "rpy") + ,("restager", "rsg") + ,("restorationist", "rsr") + ,("reviewer", "rev") + ,("rubricator", "rbr") + ,("scenarist", "sce") + ,("scientific advisor", "sad") + ,("screenwriter", "aus") + ,("scribe", "scr") + ,("sculptor", "scl") + ,("second party", "spy") + ,("secretary", "sec") + ,("seller", "sll") + ,("set designer", "std") + ,("setting", "stg") + ,("signer", "sgn") + ,("singer", "sng") + ,("sound designer", "sds") + ,("speaker", "spk") + ,("sponsor", "spn") + ,("stage director", "sgd") + ,("stage manager", "stm") + ,("standards body", "stn") + ,("stereotyper", "str") + ,("storyteller", "stl") + ,("supporting host", "sht") + ,("surveyor", "srv") + ,("teacher", "tch") + ,("technical director", "tcd") + ,("television director", "tld") + ,("television producer", "tlp") + ,("thesis advisor", "ths") + ,("transcriber", "trc") + ,("translator", "trl") + ,("type designer", "tyd") + ,("typographer", "tyg") + ,("university place", "uvp") + ,("videographer", "vdg") + ,("witness", "wit") + ,("wood engraver", "wde") + ,("woodcutter", "wdc") + ,("writer of accompanying material", "wam") + ,("writer of added commentary", "wac") + ,("writer of added lyrics", "wal") + ,("writer of added text", "wat") + ] + diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 27f0c8305..803617f95 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -44,8 +44,8 @@ import qualified Text.XML.Light.Cursor as XC import Text.Pandoc.Definition import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..), def) -import Text.Pandoc.Shared (orderedListMarkers) -import Text.Pandoc.Generic (bottomUp) +import Text.Pandoc.Shared (orderedListMarkers, isHeaderBlock) +import Text.Pandoc.Walk -- | Data to be written at the end of the document: -- (foot)notes, URLs, references, images. @@ -157,9 +157,7 @@ renderSection level (ttl, body) = do else cMapM blockToXml body return $ el "section" (title ++ content) where - hasSubsections = any isHeader - isHeader (Header _ _ _) = True - isHeader _ = False + hasSubsections = any isHeaderBlock -- | Only <p> and <empty-line> are allowed within <title> in FB2. formatTitle :: [Inline] -> [Content] @@ -324,6 +322,7 @@ blockToXml (CodeBlock _ s) = return . spaceBeforeAfter . map (el "p" . el "code") . lines $ s blockToXml (RawBlock _ s) = return . spaceBeforeAfter . map (el "p" . el "code") . lines $ s +blockToXml (Div _ bs) = cMapM blockToXml bs blockToXml (BlockQuote bs) = liftM (list . el "cite") $ cMapM blockToXml bs blockToXml (OrderedList a bss) = do state <- get @@ -422,15 +421,20 @@ indent = indentBlock indentLines ins = let lns = split isLineBreak ins :: [[Inline]] in intercalate [LineBreak] $ map ((Str spacer):) lns +capitalize :: Inline -> Inline +capitalize (Str xs) = Str $ map toUpper xs +capitalize x = x + -- | Convert a Pandoc's Inline element to FictionBook XML representation. toXml :: Inline -> FBM [Content] toXml (Str s) = return [txt s] +toXml (Span _ ils) = cMapM toXml ils toXml (Emph ss) = list `liftM` wrap "emphasis" ss toXml (Strong ss) = list `liftM` wrap "strong" ss toXml (Strikeout ss) = list `liftM` wrap "strikethrough" ss toXml (Superscript ss) = list `liftM` wrap "sup" ss toXml (Subscript ss) = list `liftM` wrap "sub" ss -toXml (SmallCaps ss) = cMapM toXml $ bottomUp (map toUpper) ss +toXml (SmallCaps ss) = cMapM toXml $ walk capitalize ss toXml (Quoted SingleQuote ss) = do -- FIXME: should be language-specific inner <- cMapM toXml ss return $ [txt "‘"] ++ inner ++ [txt "’"] @@ -560,6 +564,7 @@ list = (:[]) plain :: Inline -> String plain (Str s) = s plain (Emph ss) = concat (map plain ss) +plain (Span _ ss) = concat (map plain ss) plain (Strong ss) = concat (map plain ss) plain (Strikeout ss) = concat (map plain ss) plain (Superscript ss) = concat (map plain ss) diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 57bf2a349..9a26cf2ac 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings, CPP #-} {-# OPTIONS_GHC -fno-warn-deprecations #-} {- -Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.HTML - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -39,13 +39,14 @@ import Text.Pandoc.Readers.TeXMath import Text.Pandoc.Slides import Text.Pandoc.Highlighting ( highlight, styleToCss, formatHtmlInline, formatHtmlBlock ) -import Text.Pandoc.XML (fromEntities) +import Text.Pandoc.XML (fromEntities, escapeStringForXML) +import Network.URI ( parseURIReference, URI(..) ) import Network.HTTP ( urlEncode ) import Numeric ( showHex ) import Data.Char ( ord, toLower ) import Data.List ( isPrefixOf, intersperse ) import Data.String ( fromString ) -import Data.Maybe ( catMaybes ) +import Data.Maybe ( catMaybes, fromMaybe ) import Control.Monad.State import Text.Blaze.Html hiding(contents) import Text.Blaze.Internal(preEscapedString) @@ -115,9 +116,10 @@ pandocToHtml opts (Pandoc meta blocks) = do (fmap renderHtml . blockListToHtml opts) (fmap renderHtml . inlineListToHtml opts) meta - let authsMeta = map stringify $ docAuthors meta - let dateMeta = stringify $ docDate meta - let slideLevel = maybe (getSlideLevel blocks) id $ writerSlideLevel opts + let stringifyHTML = escapeStringForXML . stringify + let authsMeta = map stringifyHTML $ docAuthors meta + let dateMeta = stringifyHTML $ docDate meta + let slideLevel = fromMaybe (getSlideLevel blocks) $ writerSlideLevel opts let sects = hierarchicalize $ if writerSlideVariant opts == NoSlides then blocks @@ -143,7 +145,11 @@ pandocToHtml opts (Pandoc meta blocks) = do MathJax url -> H.script ! A.src (toValue url) ! A.type_ "text/javascript" - $ mempty + $ case writerSlideVariant opts of + SlideousSlides -> + preEscapedString + "MathJax.Hub.Queue([\"Typeset\",MathJax.Hub]);" + _ -> mempty JsMath (Just url) -> H.script ! A.src (toValue url) ! A.type_ "text/javascript" @@ -167,7 +173,7 @@ pandocToHtml opts (Pandoc meta blocks) = do maybe id (defField "toc" . renderHtml) toc $ defField "author-meta" authsMeta $ maybe id (defField "date-meta") (normalizeDate dateMeta) $ - defField "pagetitle" (stringify $ docTitle meta) $ + defField "pagetitle" (stringifyHTML $ docTitle meta) $ defField "idprefix" (writerIdentifierPrefix opts) $ -- these should maybe be set in pandoc.hs defField "slidy-url" @@ -267,11 +273,23 @@ elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elemen else blockToHtml opts (Header level' (id',classes,keyvals) title') let isSec (Sec _ _ _ _ _) = True isSec (Blk _) = False + let isPause (Blk x) = x == Para [Str ".",Space,Str ".",Space,Str "."] + isPause _ = False + let fragmentClass = case writerSlideVariant opts of + RevealJsSlides -> "fragment" + _ -> "incremental" + let inDiv xs = Blk (RawBlock (Format "html") ("<div class=\"" + ++ fragmentClass ++ "\">")) : + (xs ++ [Blk (RawBlock (Format "html") "</div>")]) innerContents <- mapM (elementToHtml slideLevel opts) $ if titleSlide -- title slides have no content of their own then filter isSec elements - else elements + else if slide + then case splitBy isPause elements of + [] -> [] + (x:xs) -> x ++ concatMap inDiv xs + else elements let inNl x = mconcat $ nl opts : intersperse (nl opts) x ++ [nl opts] let classes' = ["titleslide" | titleSlide] ++ ["slide" | slide] ++ ["section" | (slide || writerSectionDivs opts) && @@ -379,7 +397,10 @@ imageExts = [ "art", "bmp", "cdr", "cdt", "cpt", "cr2", "crw", "djvu", "erf", treatAsImage :: FilePath -> Bool treatAsImage fp = - let ext = map toLower $ drop 1 $ takeExtension fp + let path = case uriPath `fmap` parseURIReference fp of + Nothing -> fp + Just up -> up + ext = map toLower $ drop 1 $ takeExtension path in null ext || ext `elem` imageExts -- | Convert Pandoc block element to HTML. @@ -400,15 +421,22 @@ blockToHtml opts (Para [Image txt (s,'f':'i':'g':':':tit)]) = do [nl opts, img, capt, nl opts] else H.div ! A.class_ "figure" $ mconcat [nl opts, img, capt, nl opts] --- . . . indicates a pause in a slideshow -blockToHtml opts (Para [Str ".",Space,Str ".",Space,Str "."]) - | writerSlideVariant opts == RevealJsSlides = - blockToHtml opts (RawBlock "html" "<div class=\"fragment\" />") blockToHtml opts (Para lst) = do contents <- inlineListToHtml opts lst return $ H.p contents -blockToHtml _ (RawBlock "html" str) = return $ preEscapedString str -blockToHtml _ (RawBlock _ _) = return mempty +blockToHtml opts (Div attr@(_,classes,_) bs) = do + contents <- blockListToHtml opts bs + let contents' = nl opts >> contents >> nl opts + return $ + if "notes" `elem` classes + then case writerSlideVariant opts of + RevealJsSlides -> addAttrs opts attr $ H5.aside $ contents' + NoSlides -> addAttrs opts attr $ H.div $ contents' + _ -> mempty + else addAttrs opts attr $ H.div $ contents' +blockToHtml _ (RawBlock f str) + | f == Format "html" = return $ preEscapedString str + | otherwise = return mempty blockToHtml opts (HorizontalRule) = return $ if writerHtml5 opts then H5.hr else H.hr blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do let tolhs = isEnabled Ext_literate_haskell opts && @@ -422,7 +450,10 @@ blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do adjCode = if tolhs then unlines . map ("> " ++) . lines $ rawCode else rawCode - case highlight formatHtmlBlock (id',classes',keyvals) adjCode of + hlCode = if writerHighlight opts -- check highlighting options + then highlight formatHtmlBlock (id',classes',keyvals) adjCode + else Nothing + case hlCode of Nothing -> return $ addAttrs opts (id',classes,keyvals) $ H.pre $ H.code $ toHtml adjCode Just h -> modify (\st -> st{ stHighlighting = True }) >> @@ -448,28 +479,22 @@ blockToHtml opts (BlockQuote blocks) = else do contents <- blockListToHtml opts blocks return $ H.blockquote $ nl opts >> contents >> nl opts -blockToHtml opts (Header level (ident,_,_) lst) = do +blockToHtml opts (Header level (_,classes,_) lst) = do contents <- inlineListToHtml opts lst secnum <- liftM stSecNum get let contents' = if writerNumberSections opts && not (null secnum) + && "unnumbered" `notElem` classes then (H.span ! A.class_ "header-section-number" $ toHtml $ showSecNum secnum) >> strToHtml " " >> contents else contents - let revealSlash = ['/' | writerSlideVariant opts == RevealJsSlides] - let contents'' = if writerTableOfContents opts && not (null ident) - then H.a ! A.href (toValue $ - '#' : revealSlash ++ - writerIdentifierPrefix opts ++ - ident) $ contents' - else contents' return $ case level of - 1 -> H.h1 contents'' - 2 -> H.h2 contents'' - 3 -> H.h3 contents'' - 4 -> H.h4 contents'' - 5 -> H.h5 contents'' - 6 -> H.h6 contents'' - _ -> H.p contents'' + 1 -> H.h1 contents' + 2 -> H.h2 contents' + 3 -> H.h3 contents' + 4 -> H.h4 contents' + 5 -> H.h5 contents' + 6 -> H.h6 contents' + _ -> H.p contents' blockToHtml opts (BulletList lst) = do contents <- mapM (blockListToHtml opts) lst return $ unordList opts contents @@ -497,7 +522,7 @@ blockToHtml opts (DefinitionList lst) = do contents <- mapM (\(term, defs) -> do term' <- if null term then return mempty - else liftM (H.dt) $ inlineListToHtml opts term + else liftM H.dt $ inlineListToHtml opts term defs' <- mapM ((liftM (\x -> H.dd $ (x >> nl opts))) . blockListToHtml opts) defs return $ mconcat $ nl opts : term' : nl opts : @@ -512,11 +537,16 @@ blockToHtml opts (Table capt aligns widths headers rows') = do let percent w = show (truncate (100*w) :: Integer) ++ "%" let coltags = if all (== 0.0) widths then mempty - else mconcat $ map (\w -> - if writerHtml5 opts - then H.col ! A.style (toValue $ "width: " ++ percent w) - else H.col ! A.width (toValue $ percent w) >> nl opts) - widths + else do + H.colgroup $ do + nl opts + mapM_ (\w -> do + if writerHtml5 opts + then H.col ! A.style (toValue $ "width: " ++ + percent w) + else H.col ! A.width (toValue $ percent w) + nl opts) widths + nl opts head' <- if all null headers then return mempty else do @@ -572,8 +602,7 @@ toListItem opts item = nl opts >> H.li item blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html blockListToHtml opts lst = - mapM (blockToHtml opts) lst >>= - return . mconcat . intersperse (nl opts) + fmap (mconcat . intersperse (nl opts)) $ mapM (blockToHtml opts) lst -- | Convert list of Pandoc inline elements to HTML. inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html @@ -587,16 +616,35 @@ inlineToHtml opts inline = (Str str) -> return $ strToHtml str (Space) -> return $ strToHtml " " (LineBreak) -> return $ if writerHtml5 opts then H5.br else H.br + (Span (id',classes,kvs) ils) + -> inlineListToHtml opts ils >>= + return . addAttrs opts attr' . H.span + where attr' = (id',classes',kvs') + classes' = filter (`notElem` ["csl-no-emph", + "csl-no-strong", + "csl-no-smallcaps"]) classes + kvs' = if null styles + then kvs + else (("style", concat styles) : kvs) + styles = ["font-style:normal;" + | "csl-no-emph" `elem` classes] + ++ ["font-weight:normal;" + | "csl-no-strong" `elem` classes] + ++ ["font-variant:normal;" + | "csl-no-smallcaps" `elem` classes] (Emph lst) -> inlineListToHtml opts lst >>= return . H.em (Strong lst) -> inlineListToHtml opts lst >>= return . H.strong - (Code attr str) -> case highlight formatHtmlInline attr str of + (Code attr str) -> case hlCode of Nothing -> return $ addAttrs opts attr $ H.code $ strToHtml str Just h -> do modify $ \st -> st{ stHighlighting = True } return $ addAttrs opts (id',[],keyvals) h - where (id',_,keyvals) = attr + where (id',_,keyvals) = attr + hlCode = if writerHighlight opts + then highlight formatHtmlInline attr str + else Nothing (Strikeout lst) -> inlineListToHtml opts lst >>= return . H.del (SmallCaps lst) -> inlineListToHtml opts lst >>= @@ -654,25 +702,27 @@ inlineToHtml opts inline = Right r -> return $ preEscapedString $ ppcElement conf r Left _ -> inlineListToHtml opts - (readTeXMath str) >>= return . + (readTeXMath' t str) >>= return . (H.span ! A.class_ "math") MathJax _ -> return $ H.span ! A.class_ "math" $ toHtml $ case t of InlineMath -> "\\(" ++ str ++ "\\)" DisplayMath -> "\\[" ++ str ++ "\\]" PlainMath -> do - x <- inlineListToHtml opts (readTeXMath str) + x <- inlineListToHtml opts (readTeXMath' t str) let m = H.span ! A.class_ "math" $ x let brtag = if writerHtml5 opts then H5.br else H.br return $ case t of InlineMath -> m DisplayMath -> brtag >> m >> brtag ) - (RawInline "latex" str) -> case writerHTMLMathMethod opts of + (RawInline f str) + | f == Format "latex" -> + case writerHTMLMathMethod opts of LaTeXMathML _ -> do modify (\st -> st {stMath = True}) return $ toHtml str _ -> return mempty - (RawInline "html" str) -> return $ preEscapedString str - (RawInline _ _) -> return mempty + | f == Format "html" -> return $ preEscapedString str + | otherwise -> return mempty (Link [Str str] (s,_)) | "mailto:" `isPrefixOf` s && s == escapeURI ("mailto" ++ str) -> -- autolink @@ -709,7 +759,9 @@ inlineToHtml opts inline = else [A.title $ toValue tit]) return $ foldl (!) H5.embed attributes -- note: null title included, as in Markdown.pl - (Note contents) -> do + (Note contents) + | writerIgnoreNotes opts -> return mempty + | otherwise -> do st <- get let notes = stNotes st let number = (length notes) + 1 @@ -724,11 +776,11 @@ inlineToHtml opts inline = writerIdentifierPrefix opts ++ "fn" ++ ref) ! A.class_ "footnoteRef" ! prefixedId opts ("fnref" ++ ref) + $ H.sup $ toHtml ref - let link' = case writerEpubVersion opts of - Just EPUB3 -> link ! customAttribute "epub:type" "noteref" - _ -> link - return $ H.sup $ link' + return $ case writerEpubVersion opts of + Just EPUB3 -> link ! customAttribute "epub:type" "noteref" + _ -> link (Cite cits il)-> do contents <- inlineListToHtml opts il let citationIds = unwords $ map citationId cits let result = H.span ! A.class_ "citation" $ contents diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs new file mode 100644 index 000000000..1c82839d0 --- /dev/null +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -0,0 +1,346 @@ +{-# LANGUAGE OverloadedStrings, TupleSections, ScopedTypeVariables #-} +{- +Copyright (C) 2014 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.Haddock + Copyright : Copyright (C) 2014 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to haddock markup. + +Haddock: <http://www.haskell.org/haddock/doc/html/> +-} +module Text.Pandoc.Writers.Haddock (writeHaddock) where +import Text.Pandoc.Definition +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Shared +import Text.Pandoc.Writers.Shared +import Text.Pandoc.Options +import Data.List ( intersperse, transpose ) +import Text.Pandoc.Pretty +import Control.Monad.State +import Text.Pandoc.Readers.TeXMath (readTeXMath') +import Network.URI (isURI) +import Data.Default + +type Notes = [[Block]] +data WriterState = WriterState { stNotes :: Notes } +instance Default WriterState + where def = WriterState{ stNotes = [] } + +-- | Convert Pandoc to Haddock. +writeHaddock :: WriterOptions -> Pandoc -> String +writeHaddock opts document = + evalState (pandocToHaddock opts{ + writerWrapText = writerWrapText opts } document) def + +-- | Return haddock representation of document. +pandocToHaddock :: WriterOptions -> Pandoc -> State WriterState String +pandocToHaddock opts (Pandoc meta blocks) = do + let colwidth = if writerWrapText opts + then Just $ writerColumns opts + else Nothing + body <- blockListToHaddock opts blocks + st <- get + notes' <- notesToHaddock opts (reverse $ stNotes st) + let render' :: Doc -> String + render' = render colwidth + let main = render' $ body <> + (if isEmpty notes' then empty else blankline <> notes') + metadata <- metaToJSON opts + (fmap (render colwidth) . blockListToHaddock opts) + (fmap (render colwidth) . inlineListToHaddock opts) + meta + let context = defField "body" main + $ metadata + if writerStandalone opts + then return $ renderTemplate' (writerTemplate opts) context + else return main + +-- | Return haddock representation of notes. +notesToHaddock :: WriterOptions -> [[Block]] -> State WriterState Doc +notesToHaddock opts notes = + if null notes + then return empty + else do + contents <- blockToHaddock opts $ OrderedList (1,DefaultStyle,DefaultDelim) notes + return $ text "#notes#" <> blankline <> contents + +-- | Escape special characters for Haddock. +escapeString :: String -> String +escapeString = escapeStringUsing haddockEscapes + where haddockEscapes = backslashEscapes "\\/'`\"@<" + +-- | Convert Pandoc block element to haddock. +blockToHaddock :: WriterOptions -- ^ Options + -> Block -- ^ Block element + -> State WriterState Doc +blockToHaddock _ Null = return empty +blockToHaddock opts (Div _ ils) = do + contents <- blockListToHaddock opts ils + return $ contents <> blankline +blockToHaddock opts (Plain inlines) = do + contents <- inlineListToHaddock opts inlines + return $ contents <> cr +-- title beginning with fig: indicates figure +blockToHaddock opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = + blockToHaddock opts (Para [Image alt (src,tit)]) +blockToHaddock opts (Para inlines) = + -- TODO: if it contains linebreaks, we need to use a @...@ block + (<> blankline) `fmap` blockToHaddock opts (Plain inlines) +blockToHaddock _ (RawBlock f str) + | f == "haddock" = do + return $ text str <> text "\n" + | otherwise = return empty +blockToHaddock opts HorizontalRule = + return $ blankline <> text (replicate (writerColumns opts) '_') <> blankline +blockToHaddock opts (Header level (ident,_,_) inlines) = do + contents <- inlineListToHaddock opts inlines + let attr' = if null ident + then empty + else cr <> text "#" <> text ident <> text "#" + return $ nowrap (text (replicate level '=') <> space <> contents) + <> attr' <> blankline +blockToHaddock _ (CodeBlock (_,_,_) str) = + return $ prefixed "> " (text str) <> blankline +-- Nothing in haddock corresponds to block quotes: +blockToHaddock opts (BlockQuote blocks) = + blockListToHaddock opts blocks +-- Haddock doesn't have tables. Use haddock tables in code. +blockToHaddock opts (Table caption aligns widths headers rows) = do + caption' <- inlineListToHaddock opts caption + let caption'' = if null caption + then empty + else blankline <> caption' <> blankline + rawHeaders <- mapM (blockListToHaddock opts) headers + rawRows <- mapM (mapM (blockListToHaddock opts)) rows + let isSimple = all (==0) widths + let isPlainBlock (Plain _) = True + isPlainBlock _ = False + let hasBlocks = not (all isPlainBlock $ concat . concat $ headers:rows) + (nst,tbl) <- case True of + _ | isSimple -> fmap (nest 2,) $ + pandocTable opts (all null headers) aligns widths + rawHeaders rawRows + | not hasBlocks -> fmap (nest 2,) $ + pandocTable opts (all null headers) aligns widths + rawHeaders rawRows + | otherwise -> fmap (id,) $ + gridTable opts (all null headers) aligns widths + rawHeaders rawRows + return $ (prefixed "> " $ nst $ tbl $$ blankline $$ caption'') $$ blankline +blockToHaddock opts (BulletList items) = do + contents <- mapM (bulletListItemToHaddock opts) items + return $ cat contents <> blankline +blockToHaddock opts (OrderedList (start,_,delim) items) = do + let attribs = (start, Decimal, delim) + let markers = orderedListMarkers attribs + let markers' = map (\m -> if length m < 3 + then m ++ replicate (3 - length m) ' ' + else m) markers + contents <- mapM (\(item, num) -> orderedListItemToHaddock opts item num) $ + zip markers' items + return $ cat contents <> blankline +blockToHaddock opts (DefinitionList items) = do + contents <- mapM (definitionListItemToHaddock opts) items + return $ cat contents <> blankline + +pandocTable :: WriterOptions -> Bool -> [Alignment] -> [Double] + -> [Doc] -> [[Doc]] -> State WriterState Doc +pandocTable opts headless aligns widths rawHeaders rawRows = do + let isSimple = all (==0) widths + let alignHeader alignment = case alignment of + AlignLeft -> lblock + AlignCenter -> cblock + AlignRight -> rblock + AlignDefault -> lblock + let numChars = maximum . map offset + let widthsInChars = if isSimple + then map ((+2) . numChars) + $ transpose (rawHeaders : rawRows) + else map + (floor . (fromIntegral (writerColumns opts) *)) + widths + let makeRow = hcat . intersperse (lblock 1 (text " ")) . + (zipWith3 alignHeader aligns widthsInChars) + let rows' = map makeRow rawRows + let head' = makeRow rawHeaders + let maxRowHeight = maximum $ map height (head':rows') + let underline = cat $ intersperse (text " ") $ + map (\width -> text (replicate width '-')) widthsInChars + let border = if maxRowHeight > 1 + then text (replicate (sum widthsInChars + + length widthsInChars - 1) '-') + else if headless + then underline + else empty + let head'' = if headless + then empty + else border <> cr <> head' + let body = if maxRowHeight > 1 + then vsep rows' + else vcat rows' + let bottom = if headless + then underline + else border + return $ head'' $$ underline $$ body $$ bottom + +gridTable :: WriterOptions -> Bool -> [Alignment] -> [Double] + -> [Doc] -> [[Doc]] -> State WriterState Doc +gridTable opts headless _aligns widths headers' rawRows = do + let numcols = length headers' + let widths' = if all (==0) widths + then replicate numcols (1.0 / fromIntegral numcols) + else widths + let widthsInChars = map (floor . (fromIntegral (writerColumns opts) *)) widths' + let hpipeBlocks blocks = hcat [beg, middle, end] + where h = maximum (map height blocks) + sep' = lblock 3 $ vcat (map text $ replicate h " | ") + beg = lblock 2 $ vcat (map text $ replicate h "| ") + end = lblock 2 $ vcat (map text $ replicate h " |") + middle = chomp $ hcat $ intersperse sep' blocks + let makeRow = hpipeBlocks . zipWith lblock widthsInChars + let head' = makeRow headers' + let rows' = map (makeRow . map chomp) rawRows + let border ch = char '+' <> char ch <> + (hcat $ intersperse (char ch <> char '+' <> char ch) $ + map (\l -> text $ replicate l ch) widthsInChars) <> + char ch <> char '+' + let body = vcat $ intersperse (border '-') rows' + let head'' = if headless + then empty + else head' $$ border '=' + return $ border '-' $$ head'' $$ body $$ border '-' + +-- | Convert bullet list item (list of blocks) to haddock +bulletListItemToHaddock :: WriterOptions -> [Block] -> State WriterState Doc +bulletListItemToHaddock opts items = do + contents <- blockListToHaddock opts items + let sps = replicate (writerTabStop opts - 2) ' ' + let start = text ('-' : ' ' : sps) + -- remove trailing blank line if it is a tight list + let contents' = case reverse items of + (BulletList xs:_) | isTightList xs -> + chomp contents <> cr + (OrderedList _ xs:_) | isTightList xs -> + chomp contents <> cr + _ -> contents + return $ hang (writerTabStop opts) start $ contents' <> cr + +-- | Convert ordered list item (a list of blocks) to haddock +orderedListItemToHaddock :: WriterOptions -- ^ options + -> String -- ^ list item marker + -> [Block] -- ^ list item (list of blocks) + -> State WriterState Doc +orderedListItemToHaddock opts marker items = do + contents <- blockListToHaddock opts items + let sps = case length marker - writerTabStop opts of + n | n > 0 -> text $ replicate n ' ' + _ -> text " " + let start = text marker <> sps + return $ hang (writerTabStop opts) start $ contents <> cr + +-- | Convert definition list item (label, list of blocks) to haddock +definitionListItemToHaddock :: WriterOptions + -> ([Inline],[[Block]]) + -> State WriterState Doc +definitionListItemToHaddock opts (label, defs) = do + labelText <- inlineListToHaddock opts label + defs' <- mapM (mapM (blockToHaddock opts)) defs + let contents = vcat $ map (\d -> hang 4 empty $ vcat d <> cr) defs' + return $ nowrap (brackets labelText) <> cr <> contents <> cr + +-- | Convert list of Pandoc block elements to haddock +blockListToHaddock :: WriterOptions -- ^ Options + -> [Block] -- ^ List of block elements + -> State WriterState Doc +blockListToHaddock opts blocks = + mapM (blockToHaddock opts) blocks >>= return . cat + +-- | Convert list of Pandoc inline elements to haddock. +inlineListToHaddock :: WriterOptions -> [Inline] -> State WriterState Doc +inlineListToHaddock opts lst = + mapM (inlineToHaddock opts) lst >>= return . cat + +-- | Convert Pandoc inline element to haddock. +inlineToHaddock :: WriterOptions -> Inline -> State WriterState Doc +inlineToHaddock opts (Span (ident,_,_) ils) = do + contents <- inlineListToHaddock opts ils + if not (null ident) && null ils + then return $ "#" <> text ident <> "#" + else return contents +inlineToHaddock opts (Emph lst) = do + contents <- inlineListToHaddock opts lst + return $ "/" <> contents <> "/" +inlineToHaddock opts (Strong lst) = do + contents <- inlineListToHaddock opts lst + return $ "__" <> contents <> "__" +inlineToHaddock opts (Strikeout lst) = do + contents <- inlineListToHaddock opts lst + -- not supported in haddock, but we fake it: + return $ "~~" <> contents <> "~~" +-- not supported in haddock: +inlineToHaddock opts (Superscript lst) = inlineListToHaddock opts lst +-- not supported in haddock: +inlineToHaddock opts (Subscript lst) = inlineListToHaddock opts lst +-- not supported in haddock: +inlineToHaddock opts (SmallCaps lst) = inlineListToHaddock opts lst +inlineToHaddock opts (Quoted SingleQuote lst) = do + contents <- inlineListToHaddock opts lst + return $ "‘" <> contents <> "’" +inlineToHaddock opts (Quoted DoubleQuote lst) = do + contents <- inlineListToHaddock opts lst + return $ "“" <> contents <> "”" +inlineToHaddock _ (Code _ str) = + return $ "@" <> text (escapeString str) <> "@" +inlineToHaddock _ (Str str) = do + return $ text $ escapeString str +inlineToHaddock opts (Math mt str) = do + let adjust x = case mt of + DisplayMath -> cr <> x <> cr + InlineMath -> x + adjust `fmap` (inlineListToHaddock opts $ readTeXMath' mt str) +inlineToHaddock _ (RawInline f str) + | f == "haddock" = return $ text str + | otherwise = return empty +-- no line break in haddock (see above on CodeBlock) +inlineToHaddock _ (LineBreak) = return cr +inlineToHaddock _ Space = return space +inlineToHaddock opts (Cite _ lst) = inlineListToHaddock opts lst +inlineToHaddock opts (Link txt (src, _)) = do + linktext <- inlineListToHaddock opts txt + let useAuto = isURI src && + case txt of + [Str s] | escapeURI s == src -> True + _ -> False + return $ nowrap $ "<" <> text src <> + (if useAuto then empty else space <> linktext) <> ">" +inlineToHaddock opts (Image alternate (source, tit)) = do + linkhaddock <- inlineToHaddock opts (Link alternate (source, tit)) + return $ "<" <> linkhaddock <> ">" +-- haddock doesn't have notes, but we can fake it: +inlineToHaddock opts (Note contents) = do + modify (\st -> st{ stNotes = contents : stNotes st }) + st <- get + let ref = text $ writerIdentifierPrefix opts ++ show (length $ stNotes st) + return $ "<#notes [" <> ref <> "]>" diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs new file mode 100644 index 000000000..19d486b25 --- /dev/null +++ b/src/Text/Pandoc/Writers/ICML.hs @@ -0,0 +1,525 @@ +{-# LANGUAGE OverloadedStrings #-} + +{- | + Module : Text.Pandoc.Writers.ICML + Copyright : Copyright (C) 2013 github.com/mb21 + License : GNU GPL, version 2 or above + + Stability : alpha + +Conversion of 'Pandoc' documents to Adobe InCopy ICML, a stand-alone XML format +which is a subset of the zipped IDML format for which the documentation is +available here: http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/indesign/sdk/cs6/idml/idml-specification.pdf +InCopy is the companion word-processor to Adobe InDesign and ICML documents can be integrated +into InDesign with File -> Place. +-} +module Text.Pandoc.Writers.ICML (writeICML) where +import Text.Pandoc.Definition +import Text.Pandoc.XML +import Text.Pandoc.Writers.Shared +import Text.Pandoc.Shared (splitBy) +import Text.Pandoc.Options +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Pretty +import Data.List (isPrefixOf, isInfixOf, stripPrefix) +import Data.Text as Text (breakOnAll, pack) +import Data.Monoid (mappend) +import Control.Monad.State +import qualified Data.Set as Set + +type Style = [String] +type Hyperlink = [(Int, String)] + +data WriterState = WriterState{ + blockStyles :: Set.Set String + , inlineStyles :: Set.Set String + , links :: Hyperlink + , listDepth :: Int + , maxListDepth :: Int + } + +type WS a = State WriterState a + +defaultWriterState :: WriterState +defaultWriterState = WriterState{ + blockStyles = Set.empty + , inlineStyles = Set.empty + , links = [] + , listDepth = 1 + , maxListDepth = 0 + } + +-- inline names (appear in InDesign's character styles pane) +emphName :: String +strongName :: String +strikeoutName :: String +superscriptName :: String +subscriptName :: String +smallCapsName :: String +codeName :: String +linkName :: String +emphName = "Italic" +strongName = "Bold" +strikeoutName = "Strikeout" +superscriptName = "Superscript" +subscriptName = "Subscript" +smallCapsName = "SmallCaps" +codeName = "Code" +linkName = "Link" + +-- block element names (appear in InDesign's paragraph styles pane) +paragraphName :: String +codeBlockName :: String +rawBlockName :: String +blockQuoteName :: String +orderedListName :: String +bulletListName :: String +defListTermName :: String +defListDefName :: String +headerName :: String +tableName :: String +tableHeaderName :: String +tableCaptionName :: String +alignLeftName :: String +alignRightName :: String +alignCenterName :: String +firstListItemName :: String +beginsWithName :: String +lowerRomanName :: String +upperRomanName :: String +lowerAlphaName :: String +upperAlphaName :: String +subListParName :: String +footnoteName :: String +paragraphName = "Paragraph" +codeBlockName = "CodeBlock" +rawBlockName = "Rawblock" +blockQuoteName = "Blockquote" +orderedListName = "NumList" +bulletListName = "BulList" +defListTermName = "DefListTerm" +defListDefName = "DefListDef" +headerName = "Header" +tableName = "TablePar" +tableHeaderName = "TableHeader" +tableCaptionName = "TableCaption" +alignLeftName = "LeftAlign" +alignRightName = "RightAlign" +alignCenterName = "CenterAlign" +firstListItemName = "first" +beginsWithName = "beginsWith-" +lowerRomanName = "lowerRoman" +upperRomanName = "upperRoman" +lowerAlphaName = "lowerAlpha" +upperAlphaName = "upperAlpha" +subListParName = "subParagraph" +footnoteName = "Footnote" + + +-- | Convert Pandoc document to string in ICML format. +writeICML :: WriterOptions -> Pandoc -> String +writeICML opts (Pandoc meta blocks) = + let colwidth = if writerWrapText opts + then Just $ writerColumns opts + else Nothing + render' = render colwidth + renderMeta f s = Just $ render' $ fst $ runState (f opts [] s) defaultWriterState + Just metadata = metaToJSON opts + (renderMeta blocksToICML) + (renderMeta inlinesToICML) + meta + (doc, st) = runState (blocksToICML opts [] blocks) defaultWriterState + main = render' doc + context = defField "body" main + $ defField "charStyles" (render' $ charStylesToDoc st) + $ defField "parStyles" (render' $ parStylesToDoc st) + $ defField "hyperlinks" (render' $ hyperlinksToDoc $ links st) + $ metadata + in if writerStandalone opts + then renderTemplate' (writerTemplate opts) context + else main + +-- | Auxilary functions for parStylesToDoc and charStylesToDoc. +contains :: String -> (String, (String, String)) -> [(String, String)] +contains s rule = + if isInfixOf (fst rule) s + then [snd rule] + else [] + +-- | The monospaced font to use as default. +monospacedFont :: Doc +monospacedFont = inTags False "AppliedFont" [("type", "string")] $ text "Courier New" + +-- | How much to indent blockquotes etc. +defaultIndent :: Int +defaultIndent = 20 + +-- | How much to indent numbered lists before the number. +defaultListIndent :: Int +defaultListIndent = 10 + +-- other constants +lineSeparator :: String +lineSeparator = "
" + +-- | Convert a WriterState with its block styles to the ICML listing of Paragraph Styles. +parStylesToDoc :: WriterState -> Doc +parStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ blockStyles st + where + makeStyle s = + let countSubStrs sub str = length $ Text.breakOnAll (Text.pack sub) (Text.pack str) + attrs = concat $ map (contains s) $ [ + (defListTermName, ("BulletsAndNumberingListType", "BulletList")) + , (defListTermName, ("FontStyle", "Bold")) + , (tableHeaderName, ("FontStyle", "Bold")) + , (alignLeftName, ("Justification", "LeftAlign")) + , (alignRightName, ("Justification", "RightAlign")) + , (alignCenterName, ("Justification", "CenterAlign")) + , (headerName++"1", ("PointSize", "36")) + , (headerName++"2", ("PointSize", "30")) + , (headerName++"3", ("PointSize", "24")) + , (headerName++"4", ("PointSize", "18")) + , (headerName++"5", ("PointSize", "14")) + ] + -- what is the most nested list type, if any? + (isBulletList, isOrderedList) = findList $ reverse $ splitBy (==' ') s + where + findList [] = (False, False) + findList (x:xs) | x == bulletListName = (True, False) + | x == orderedListName = (False, True) + | otherwise = findList xs + nBuls = countSubStrs bulletListName s + nOrds = countSubStrs orderedListName s + attrs' = numbering ++ listType ++ indent ++ attrs + where + numbering | isOrderedList = [("NumberingExpression", "^#.^t"), ("NumberingLevel", show nOrds)] + | otherwise = [] + listType | isOrderedList && (not $ isInfixOf subListParName s) + = [("BulletsAndNumberingListType", "NumberedList")] + | isBulletList && (not $ isInfixOf subListParName s) + = [("BulletsAndNumberingListType", "BulletList")] + | otherwise = [] + indent = [("LeftIndent", show indt)] + where + nBlockQuotes = countSubStrs blockQuoteName s + nDefLists = countSubStrs defListDefName s + indt = max 0 $ defaultListIndent*(nBuls + nOrds - 1) + defaultIndent*(nBlockQuotes + nDefLists) + props = inTags True "Properties" [] $ (basedOn $$ tabList $$ numbForm) + where + font = if isInfixOf codeBlockName s + then monospacedFont + else empty + basedOn = inTags False "BasedOn" [("type", "object")] (text "$ID/NormalParagraphStyle") $$ font + tabList = if isBulletList + then inTags True "TabList" [("type","list")] $ inTags True "ListItem" [("type","record")] + $ vcat [ + inTags False "Alignment" [("type","enumeration")] $ text "LeftAlign" + , inTags False "AlignmentCharacter" [("type","string")] $ text "." + , selfClosingTag "Leader" [("type","string")] + , inTags False "Position" [("type","unit")] $ text + $ show $ defaultListIndent * (nBuls + nOrds) + ] + else empty + makeNumb name = inTags False "NumberingFormat" [("type", "string")] (text name) + numbForm | isInfixOf lowerRomanName s = makeNumb "i, ii, iii, iv..." + | isInfixOf upperRomanName s = makeNumb "I, II, III, IV..." + | isInfixOf lowerAlphaName s = makeNumb "a, b, c, d..." + | isInfixOf upperAlphaName s = makeNumb "A, B, C, D..." + | otherwise = empty + in inTags True "ParagraphStyle" ([("Self", "ParagraphStyle/"++s), ("Name", s)] ++ attrs') props + +-- | Convert a WriterState with its inline styles to the ICML listing of Character Styles. +charStylesToDoc :: WriterState -> Doc +charStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ inlineStyles st + where + makeStyle s = + let attrs = concat $ map (contains s) [ + (strikeoutName, ("StrikeThru", "true")) + , (superscriptName, ("Position", "Superscript")) + , (subscriptName, ("Position", "Subscript")) + , (smallCapsName, ("Capitalization", "SmallCaps")) + ] + attrs' | isInfixOf emphName s && isInfixOf strongName s = ("FontStyle", "Bold Italic") : attrs + | isInfixOf strongName s = ("FontStyle", "Bold") : attrs + | isInfixOf emphName s = ("FontStyle", "Italic") : attrs + | otherwise = attrs + props = inTags True "Properties" [] $ + inTags False "BasedOn" [("type", "object")] (text "$ID/NormalCharacterStyle") $$ font + where + font = + if isInfixOf codeName s + then monospacedFont + else empty + in inTags True "CharacterStyle" ([("Self", "CharacterStyle/"++s), ("Name", s)] ++ attrs') props + +-- | Convert a list of (identifier, url) pairs to the ICML listing of hyperlinks. +hyperlinksToDoc :: Hyperlink -> Doc +hyperlinksToDoc [] = empty +hyperlinksToDoc (x:xs) = hyp x $$ hyperlinksToDoc xs + where + hyp (ident, url) = hdest $$ hlink + where + hdest = selfClosingTag "HyperlinkURLDestination" + [("Self", "HyperlinkURLDestination/"++url), ("Name","link"), ("DestinationURL",url), ("DestinationUniqueKey","1")] + hlink = inTags True "Hyperlink" [("Self","uf-"++show ident), ("Name",url), + ("Source","htss-"++show ident), ("Visible","true"), ("DestinationUniqueKey","1")] + $ inTags True "Properties" [] + $ inTags False "BorderColor" [("type","enumeration")] (text "Black") + $$ (inTags False "Destination" [("type","object")] + $ text $ "HyperlinkURLDestination/"++(escapeStringForXML url)) + + +-- | Convert a list of Pandoc blocks to ICML. +blocksToICML :: WriterOptions -> Style -> [Block] -> WS Doc +blocksToICML opts style lst = vcat `fmap` mapM (blockToICML opts style) lst + +-- | Convert a Pandoc block element to ICML. +blockToICML :: WriterOptions -> Style -> Block -> WS Doc +blockToICML opts style (Plain lst) = parStyle opts style lst +blockToICML opts style (Para lst) = parStyle opts (paragraphName:style) lst +blockToICML opts style (CodeBlock _ str) = parStyle opts (codeBlockName:style) $ [Str str] +blockToICML opts style (RawBlock _ str) = parStyle opts (rawBlockName:style) $ [Str str] +blockToICML opts style (BlockQuote blocks) = blocksToICML opts (blockQuoteName:style) blocks +blockToICML opts style (OrderedList attribs lst) = listItemsToICML opts orderedListName style (Just attribs) lst +blockToICML opts style (BulletList lst) = listItemsToICML opts bulletListName style Nothing lst +blockToICML opts style (DefinitionList lst) = vcat `fmap` mapM (definitionListItemToICML opts style) lst +blockToICML opts style (Header lvl _ lst) = + let stl = (headerName ++ show lvl):style + in parStyle opts stl lst +blockToICML _ _ HorizontalRule = return empty -- we could insert a page break instead +blockToICML opts style (Table caption aligns widths headers rows) = + let style' = tableName : style + noHeader = all null headers + nrHeaders = if noHeader + then "0" + else "1" + nrRows = length rows + nrCols = if null rows + then 0 + else length $ head rows + rowsToICML [] _ = return empty + rowsToICML (col:rest) rowNr = + liftM2 ($$) (colsToICML col rowNr (0::Int)) $ rowsToICML rest (rowNr+1) + colsToICML [] _ _ = return empty + colsToICML (cell:rest) rowNr colNr = do + let stl = if rowNr == 0 && not noHeader + then tableHeaderName:style' + else style' + alig = aligns !! colNr + stl' | alig == AlignLeft = alignLeftName : stl + | alig == AlignRight = alignRightName : stl + | alig == AlignCenter = alignCenterName : stl + | otherwise = stl + c <- blocksToICML opts stl' cell + let cl = return $ inTags True "Cell" + [("Name", show colNr ++":"++ show rowNr), ("AppliedCellStyle","CellStyle/Cell")] c + liftM2 ($$) cl $ colsToICML rest rowNr (colNr+1) + in do + let tabl = if noHeader + then rows + else headers:rows + cells <- rowsToICML tabl (0::Int) + let colWidths w = if w > 0 + then [("SingleColumnWidth",show $ 500 * w)] + else [] + let tupToDoc tup = selfClosingTag "Column" $ [("Name",show $ fst tup)] ++ (colWidths $ snd tup) + let colDescs = vcat $ map tupToDoc $ zip [0..nrCols-1] widths + let tableDoc = return $ inTags True "Table" [ + ("AppliedTableStyle","TableStyle/Table") + , ("HeaderRowCount", nrHeaders) + , ("BodyRowCount", show nrRows) + , ("ColumnCount", show nrCols) + ] (colDescs $$ cells) + liftM2 ($$) tableDoc $ parStyle opts (tableCaptionName:style) caption +blockToICML opts style (Div _ lst) = blocksToICML opts style lst +blockToICML _ _ Null = return empty + +-- | Convert a list of lists of blocks to ICML list items. +listItemsToICML :: WriterOptions -> String -> Style -> Maybe ListAttributes -> [[Block]] -> WS Doc +listItemsToICML _ _ _ _ [] = return empty +listItemsToICML opts listType style attribs (first:rest) = do + st <- get + put st{ listDepth = 1 + listDepth st} + let stl = listType:style + let f = listItemToICML opts stl True attribs first + let r = map (listItemToICML opts stl False attribs) rest + docs <- sequence $ f:r + s <- get + let maxD = max (maxListDepth s) (listDepth s) + put s{ listDepth = 1, maxListDepth = maxD } + return $ vcat docs + +-- | Convert a list of blocks to ICML list items. +listItemToICML :: WriterOptions -> Style -> Bool-> Maybe ListAttributes -> [Block] -> WS Doc +listItemToICML opts style isFirst attribs item = + let makeNumbStart (Just (beginsWith, numbStl, _)) = + let doN DefaultStyle = [] + doN LowerRoman = [lowerRomanName] + doN UpperRoman = [upperRomanName] + doN LowerAlpha = [lowerAlphaName] + doN UpperAlpha = [upperAlphaName] + doN _ = [] + bw = if beginsWith > 1 + then [beginsWithName ++ show beginsWith] + else [] + in doN numbStl ++ bw + makeNumbStart Nothing = [] + stl = if isFirst + then firstListItemName:style + else style + stl' = makeNumbStart attribs ++ stl + in if length item > 1 + then do + let insertTab (Para lst) = blockToICML opts (subListParName:style) $ Para $ (Str "\t"):lst + insertTab block = blockToICML opts style block + f <- blockToICML opts stl' $ head item + r <- fmap vcat $ mapM insertTab $ tail item + return $ f $$ r + else blocksToICML opts stl' item + +definitionListItemToICML :: WriterOptions -> Style -> ([Inline],[[Block]]) -> WS Doc +definitionListItemToICML opts style (term,defs) = do + term' <- parStyle opts (defListTermName:style) term + defs' <- vcat `fmap` mapM (blocksToICML opts (defListDefName:style)) defs + return $ term' $$ defs' + + +-- | Convert a list of inline elements to ICML. +inlinesToICML :: WriterOptions -> Style -> [Inline] -> WS Doc +inlinesToICML opts style lst = vcat `fmap` mapM (inlineToICML opts style) (mergeSpaces lst) + +-- | Convert an inline element to ICML. +inlineToICML :: WriterOptions -> Style -> Inline -> WS Doc +inlineToICML _ style (Str str) = charStyle style $ text $ escapeStringForXML str +inlineToICML opts style (Emph lst) = inlinesToICML opts (emphName:style) lst +inlineToICML opts style (Strong lst) = inlinesToICML opts (strongName:style) lst +inlineToICML opts style (Strikeout lst) = inlinesToICML opts (strikeoutName:style) lst +inlineToICML opts style (Superscript lst) = inlinesToICML opts (superscriptName:style) lst +inlineToICML opts style (Subscript lst) = inlinesToICML opts (subscriptName:style) lst +inlineToICML opts style (SmallCaps lst) = inlinesToICML opts (smallCapsName:style) lst +inlineToICML opts style (Quoted SingleQuote lst) = inlinesToICML opts style $ [Str "‘"] ++ lst ++ [Str "’"] +inlineToICML opts style (Quoted DoubleQuote lst) = inlinesToICML opts style $ [Str "“"] ++ lst ++ [Str "”"] +inlineToICML opts style (Cite _ lst) = footnoteToICML opts style [Para lst] +inlineToICML _ style (Code _ str) = charStyle (codeName:style) $ text $ escapeStringForXML str +inlineToICML _ style Space = charStyle style space +inlineToICML _ style LineBreak = charStyle style $ text lineSeparator +inlineToICML _ style (Math _ str) = charStyle style $ text $ escapeStringForXML str --InDesign doesn't really do math +inlineToICML _ style (RawInline _ str) = charStyle style $ text $ escapeStringForXML str +inlineToICML opts style (Link lst (url, title)) = do + content <- inlinesToICML opts (linkName:style) lst + state $ \st -> + let ident = if null $ links st + then 1::Int + else 1 + (fst $ head $ links st) + newst = st{ links = (ident, url):(links st) } + cont = inTags True "HyperlinkTextSource" + [("Self","htss-"++show ident), ("Name",title), ("Hidden","false")] content + in (cont, newst) +inlineToICML opts style (Image alt target) = imageICML opts style alt target +inlineToICML opts style (Note lst) = footnoteToICML opts style lst +inlineToICML opts style (Span _ lst) = inlinesToICML opts style lst + +-- | Convert a list of block elements to an ICML footnote. +footnoteToICML :: WriterOptions -> Style -> [Block] -> WS Doc +footnoteToICML opts style lst = + let insertTab (Para ls) = blockToICML opts (footnoteName:style) $ Para $ (Str "\t"):ls + insertTab block = blockToICML opts (footnoteName:style) block + in do + contents <- mapM insertTab lst + let number = inTags True "ParagraphStyleRange" [] $ + inTags True "CharacterStyleRange" [] $ inTagsSimple "Content" "<?ACE 4?>" + return $ inTags True "CharacterStyleRange" + [("AppliedCharacterStyle","$ID/NormalCharacterStyle"), ("Position","Superscript")] + $ inTags True "Footnote" [] $ number $$ vcat contents + +-- | Auxiliary function to merge Space elements into the adjacent Strs. +mergeSpaces :: [Inline] -> [Inline] +mergeSpaces ((Str s):(Space:((Str s'):xs))) = mergeSpaces $ Str(s++" "++s') : xs +mergeSpaces (Space:((Str s):xs)) = mergeSpaces $ Str (" "++s) : xs +mergeSpaces ((Str s):(Space:xs)) = mergeSpaces $ Str (s++" ") : xs +mergeSpaces (x:xs) = x : (mergeSpaces xs) +mergeSpaces [] = [] + +-- | Wrap a list of inline elements in an ICML Paragraph Style +parStyle :: WriterOptions -> Style -> [Inline] -> WS Doc +parStyle opts style lst = + let slipIn x y = if null y + then x + else x ++ " > " ++ y + stlStr = foldr slipIn [] $ reverse style + stl = if null stlStr + then "" + else "ParagraphStyle/" ++ stlStr + attrs = ("AppliedParagraphStyle", stl) + attrs' = if firstListItemName `elem` style + then let ats = attrs : [("NumberingContinue", "false")] + begins = filter (isPrefixOf beginsWithName) style + in if null begins + then ats + else let i = maybe "" id $ stripPrefix beginsWithName $ head begins + in ("NumberingStartAt", i) : ats + else [attrs] + in do + content <- inlinesToICML opts [] lst + let cont = inTags True "ParagraphStyleRange" attrs' + $ mappend content $ selfClosingTag "Br" [] + state $ \st -> (cont, st{ blockStyles = Set.insert stlStr $ blockStyles st }) + +-- | Wrap a Doc in an ICML Character Style. +charStyle :: Style -> Doc -> WS Doc +charStyle style content = + let (stlStr, attrs) = styleToStrAttr style + doc = inTags True "CharacterStyleRange" attrs $ inTagsSimple "Content" $ flush content + in do + state $ \st -> + let styles = if null stlStr + then st + else st{ inlineStyles = Set.insert stlStr $ inlineStyles st } + in (doc, styles) + +-- | Transform a Style to a tuple of String (eliminating duplicates and ordered) and corresponding attribute. +styleToStrAttr :: Style -> (String, [(String, String)]) +styleToStrAttr style = + let stlStr = unwords $ Set.toAscList $ Set.fromList style + stl = if null style + then "$ID/NormalCharacterStyle" + else "CharacterStyle/" ++ stlStr + attrs = [("AppliedCharacterStyle", stl)] + in (stlStr, attrs) + +-- | Assemble an ICML Image. +imageICML :: WriterOptions -> Style -> [Inline] -> Target -> WS Doc +imageICML _ style _ (linkURI, _) = + let imgWidth = 300::Int --TODO: set width, height dynamically as in Docx.hs + imgHeight = 200::Int + scaleFact = show (1::Double) --TODO: set scaling factor so image is scaled exactly to imgWidth x imgHeight + hw = show $ imgWidth `div` 2 + hh = show $ imgHeight `div` 2 + qw = show $ imgWidth `div` 4 + qh = show $ imgHeight `div` 4 + (stlStr, attrs) = styleToStrAttr style + props = inTags True "Properties" [] $ inTags True "PathGeometry" [] + $ inTags True "GeometryPathType" [("PathOpen","false")] + $ inTags True "PathPointArray" [] + $ vcat [ + selfClosingTag "PathPointType" [("Anchor", "-"++qw++" -"++qh), + ("LeftDirection", "-"++qw++" -"++qh), ("RightDirection", "-"++qw++" -"++qh)] + , selfClosingTag "PathPointType" [("Anchor", "-"++qw++" "++qh), + ("LeftDirection", "-"++qw++" "++qh), ("RightDirection", "-"++qw++" "++qh)] + , selfClosingTag "PathPointType" [("Anchor", qw++" "++qh), + ("LeftDirection", qw++" "++qh), ("RightDirection", qw++" "++qh)] + , selfClosingTag "PathPointType" [("Anchor", qw++" -"++qh), + ("LeftDirection", qw++" -"++qh), ("RightDirection", qw++" -"++qh)] + ] + image = inTags True "Image" + [("Self","ue6"), ("ItemTransform", scaleFact++" 0 0 "++scaleFact++" -"++qw++" -"++qh)] + $ vcat [ + inTags True "Properties" [] $ inTags True "Profile" [("type","string")] $ text "$ID/Embedded" + $$ selfClosingTag "GraphicBounds" [("Left","0"), ("Top","0"), ("Right", hw), ("Bottom", hh)] + , selfClosingTag "Link" [("Self", "ueb"), ("LinkResourceURI", linkURI)] + ] + doc = inTags True "CharacterStyleRange" attrs + $ inTags True "Rectangle" [("Self","uec"), ("ItemTransform", "1 0 0 1 "++qw++" -"++qh)] + $ (props $$ image) + in do + state $ \st -> (doc, st{ inlineStyles = Set.insert stlStr $ inlineStyles st } ) diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 2b4a608a7..100bf900d 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings, ScopedTypeVariables #-} {- -Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.LaTeX - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -30,20 +30,20 @@ Conversion of 'Pandoc' format into LaTeX. -} module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where import Text.Pandoc.Definition -import Text.Pandoc.Generic +import Text.Pandoc.Walk import Text.Pandoc.Shared import Text.Pandoc.Writers.Shared import Text.Pandoc.Options import Text.Pandoc.Templates import Text.Printf ( printf ) -import Network.URI ( isAbsoluteURI, unEscapeString ) +import Network.URI ( isURI, unEscapeString ) import Data.List ( (\\), isSuffixOf, isInfixOf, isPrefixOf, intercalate, intersperse ) -import Data.Char ( toLower, isPunctuation ) +import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, ord ) +import Data.Maybe ( fromMaybe ) import Control.Applicative ((<|>)) import Control.Monad.State import Text.Pandoc.Pretty -import System.FilePath (dropExtension) import Text.Pandoc.Slides import Text.Pandoc.Highlighting (highlight, styleToLaTeX, formatLaTeXInline, formatLaTeXBlock, @@ -51,6 +51,9 @@ import Text.Pandoc.Highlighting (highlight, styleToLaTeX, data WriterState = WriterState { stInNote :: Bool -- true if we're in a note + , stInQuote :: Bool -- true if in a blockquote + , stInMinipage :: Bool -- true if in minipage + , stNotes :: [Doc] -- notes in a minipage , stOLLevel :: Int -- level of ordered list nesting , stOptions :: WriterOptions -- writer options, so they don't have to be parameter , stVerbInNote :: Bool -- true if document has verbatim text in note @@ -71,7 +74,8 @@ data WriterState = writeLaTeX :: WriterOptions -> Pandoc -> String writeLaTeX options document = evalState (pandocToLaTeX options document) $ - WriterState { stInNote = False, + WriterState { stInNote = False, stInQuote = False, + stInMinipage = False, stNotes = [], stOLLevel = 1, stOptions = options, stVerbInNote = False, stTable = False, stStrikeout = False, @@ -83,10 +87,17 @@ writeLaTeX options document = pandocToLaTeX :: WriterOptions -> Pandoc -> State WriterState String pandocToLaTeX options (Pandoc meta blocks) = do + -- Strip off final 'references' header if --natbib or --biblatex + let method = writerCiteMethod options + let blocks' = if method == Biblatex || method == Natbib + then case reverse blocks of + (Div (_,["references"],_) _):xs -> reverse xs + _ -> blocks + else blocks -- see if there are internal links let isInternalLink (Link _ ('#':xs,_)) = [xs] isInternalLink _ = [] - modify $ \s -> s{ stInternalLinks = queryWith isInternalLink blocks } + modify $ \s -> s{ stInternalLinks = query isInternalLink blocks' } let template = writerTemplate options -- set stBook depending on documentclass let bookClasses = ["memoir","book","report","scrreprt","scrbook"] @@ -108,30 +119,31 @@ pandocToLaTeX options (Pandoc meta blocks) = do (fmap (render colwidth) . blockListToLaTeX) (fmap (render colwidth) . inlineListToLaTeX) meta - let (blocks', lastHeader) = if writerCiteMethod options == Citeproc then - (blocks, []) - else case last blocks of - Header 1 _ il -> (init blocks, il) - _ -> (blocks, []) - blocks'' <- if writerBeamer options - then toSlides blocks' - else return blocks' - body <- mapM (elementToLaTeX options) $ hierarchicalize blocks'' + let (blocks'', lastHeader) = if writerCiteMethod options == Citeproc then + (blocks', []) + else case last blocks' of + Header 1 _ il -> (init blocks', il) + _ -> (blocks', []) + blocks''' <- if writerBeamer options + then toSlides blocks'' + else return blocks'' + body <- mapM (elementToLaTeX options) $ hierarchicalize blocks''' (biblioTitle :: String) <- liftM (render colwidth) $ inlineListToLaTeX lastHeader let main = render colwidth $ vsep body st <- get - let biblioFiles = intercalate "," $ map dropExtension $ writerBiblioFiles options + titleMeta <- stringToLaTeX TextString $ stringify $ docTitle meta + authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta let context = defField "toc" (writerTableOfContents options) $ defField "toc-depth" (show (writerTOCDepth options - - if writerChapters options + if stBook st then 1 else 0)) $ defField "body" main $ - defField "title-meta" (stringify $ docTitle meta) $ - defField "author-meta" (intercalate "; " $ map stringify $ docAuthors meta) $ + defField "title-meta" titleMeta $ + defField "author-meta" (intercalate "; " authorsMeta) $ defField "documentclass" (if writerBeamer options then ("beamer" :: String) - else if writerChapters options + else if stBook st then "book" else "article") $ defField "verbatim-in-note" (stVerbInNote st) $ @@ -152,11 +164,9 @@ pandocToLaTeX options (Pandoc meta blocks) = do $ writerHighlightStyle options ) else id) $ (case writerCiteMethod options of - Natbib -> defField "biblio-files" biblioFiles . - defField "biblio-title" biblioTitle . + Natbib -> defField "biblio-title" biblioTitle . defField "natbib" True - Biblatex -> defField "biblio-files" biblioFiles . - defField "biblio-title" biblioTitle . + Biblatex -> defField "biblio-title" biblioTitle . defField "biblatex" True _ -> id) $ metadata @@ -183,7 +193,7 @@ stringToLaTeX _ [] = return "" stringToLaTeX ctx (x:xs) = do opts <- gets stOptions rest <- stringToLaTeX ctx xs - let ligatures = writerTeXLigatures opts && not (ctx == CodeString) + let ligatures = writerTeXLigatures opts && ctx == TextString let isUrl = ctx == URLString when (x == '€') $ modify $ \st -> st{ stUsesEuro = True } @@ -197,17 +207,20 @@ stringToLaTeX ctx (x:xs) = do '&' -> "\\&" ++ rest '_' | not isUrl -> "\\_" ++ rest '#' -> "\\#" ++ rest - '-' -> case xs of -- prevent adjacent hyphens from forming ligatures - ('-':_) -> "-{}" ++ rest + '-' | not isUrl -> case xs of + -- prevent adjacent hyphens from forming ligatures + ('-':_) -> "-\\/" ++ rest _ -> '-' : rest '~' | not isUrl -> "\\textasciitilde{}" ++ rest '^' -> "\\^{}" ++ rest - '\\' -> "\\textbackslash{}" ++ rest + '\\'| isUrl -> '/' : rest -- NB. / works as path sep even on Windows + | otherwise -> "\\textbackslash{}" ++ rest '|' -> "\\textbar{}" ++ rest '<' -> "\\textless{}" ++ rest '>' -> "\\textgreater{}" ++ rest '[' -> "{[}" ++ rest -- to avoid interpretation as ']' -> "{]}" ++ rest -- optional arguments + '\'' | ctx == CodeString -> "\\textquotesingle{}" ++ rest '\160' -> "~" ++ rest '\x2026' -> "\\ldots{}" ++ rest '\x2018' | ligatures -> "`" ++ rest @@ -218,6 +231,14 @@ stringToLaTeX ctx (x:xs) = do '\x2013' | ligatures -> "--" ++ rest _ -> x : rest +toLabel :: String -> State WriterState String +toLabel z = go `fmap` stringToLaTeX URLString z + where go [] = "" + go (x:xs) + | (isLetter x || isDigit x) && isAscii x = x:go xs + | elem x "-+=:;." = x:go xs + | otherwise = "ux" ++ printf "%x" (ord x) ++ go xs + -- | Puts contents into LaTeX command. inCmd :: String -> Doc -> Doc inCmd cmd contents = char '\\' <> text cmd <> braces contents @@ -225,13 +246,13 @@ inCmd cmd contents = char '\\' <> text cmd <> braces contents toSlides :: [Block] -> State WriterState [Block] toSlides bs = do opts <- gets stOptions - let slideLevel = maybe (getSlideLevel bs) id $ writerSlideLevel opts + let slideLevel = fromMaybe (getSlideLevel bs) $ writerSlideLevel opts let bs' = prepSlides slideLevel bs concat `fmap` (mapM (elementToBeamer slideLevel) $ hierarchicalize bs') elementToBeamer :: Int -> Element -> State WriterState [Block] elementToBeamer _slideLevel (Blk b) = return [b] -elementToBeamer slideLevel (Sec lvl _num (ident,classes,_) tit elts) +elementToBeamer slideLevel (Sec lvl _num (ident,classes,kvs) tit elts) | lvl > slideLevel = do bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts return $ Para ( RawInline "latex" "\\begin{block}{" @@ -239,7 +260,7 @@ elementToBeamer slideLevel (Sec lvl _num (ident,classes,_) tit elts) : bs ++ [RawBlock "latex" "\\end{block}"] | lvl < slideLevel = do bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts - return $ (Header lvl (ident,classes,[]) tit) : bs + return $ (Header lvl (ident,classes,kvs) tit) : bs | otherwise = do -- lvl == slideLevel -- note: [fragile] is required or verbatim breaks let hasCodeBlock (CodeBlock _ _) = [True] @@ -247,17 +268,20 @@ elementToBeamer slideLevel (Sec lvl _num (ident,classes,_) tit elts) let hasCode (Code _ _) = [True] hasCode _ = [] opts <- gets stOptions - let fragile = if not $ null $ queryWith hasCodeBlock elts ++ + let fragile = not $ null $ query hasCodeBlock elts ++ if writerListings opts - then queryWith hasCode elts + then query hasCode elts else [] - then "[fragile]" - else "" - let slideStart = Para $ RawInline "latex" ("\\begin{frame}" ++ fragile) : + let allowframebreaks = "allowframebreaks" `elem` classes + let optionslist = ["fragile" | fragile] ++ + ["allowframebreaks" | allowframebreaks] + let options = if null optionslist + then "" + else "[" ++ intercalate "," optionslist ++ "]" + let slideStart = Para $ RawInline "latex" ("\\begin{frame}" ++ options) : if tit == [Str "\0"] -- marker for hrule then [] - else (RawInline "latex" "\\frametitle{") : tit ++ - [RawInline "latex" "}"] + else (RawInline "latex" "{") : tit ++ [RawInline "latex" "}"] let slideEnd = RawBlock "latex" "\\end{frame}" -- now carve up slide into blocks if there are sections inside bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts @@ -278,16 +302,24 @@ isLineBreakOrSpace _ = False blockToLaTeX :: Block -- ^ Block to convert -> State WriterState Doc blockToLaTeX Null = return empty +blockToLaTeX (Div (_,classes,_) bs) = do + beamer <- writerBeamer `fmap` gets stOptions + contents <- blockListToLaTeX bs + if beamer && "notes" `elem` classes -- speaker notes + then return $ "\\note" <> braces contents + else return contents blockToLaTeX (Plain lst) = inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst -- title beginning with fig: indicates that the image is a figure blockToLaTeX (Para [Image txt (src,'f':'i':'g':':':tit)]) = do - capt <- if null txt - then return empty - else (\c -> "\\caption" <> braces c) `fmap` inlineListToLaTeX txt + inNote <- gets stInNote + capt <- inlineListToLaTeX txt img <- inlineToLaTeX (Image txt (src,tit)) - return $ "\\begin{figure}[htbp]" $$ "\\centering" $$ img $$ - capt $$ "\\end{figure}" + return $ if inNote + -- can't have figures in notes + then "\\begin{center}" $$ img $+$ capt $$ "\\end{center}" + else "\\begin{figure}[htbp]" $$ "\\centering" $$ img $$ + ("\\caption" <> braces capt) $$ "\\end{figure}" -- . . . indicates pause in beamer slides blockToLaTeX (Para [Str ".",Space,Str ".",Space,Str "."]) = do beamer <- writerBeamer `fmap` gets stOptions @@ -306,53 +338,68 @@ blockToLaTeX (BlockQuote lst) = do modify $ \s -> s{ stIncremental = oldIncremental } return result _ -> do + oldInQuote <- gets stInQuote + modify (\s -> s{stInQuote = True}) contents <- blockListToLaTeX lst + modify (\s -> s{stInQuote = oldInQuote}) return $ "\\begin{quote}" $$ contents $$ "\\end{quote}" -blockToLaTeX (CodeBlock (_,classes,keyvalAttr) str) = do +blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do opts <- gets stOptions + ref <- toLabel identifier + let linkAnchor = if null identifier + then empty + else "\\hyperdef{}" <> braces (text ref) <> + braces ("\\label" <> braces (text ref)) + let lhsCodeBlock = do + modify $ \s -> s{ stLHS = True } + return $ flush (linkAnchor $$ "\\begin{code}" $$ text str $$ + "\\end{code}") $$ cr + let rawCodeBlock = do + st <- get + env <- if stInNote st + then modify (\s -> s{ stVerbInNote = True }) >> + return "Verbatim" + else return "verbatim" + return $ flush (linkAnchor $$ text ("\\begin{" ++ env ++ "}") $$ + text str $$ text ("\\end{" ++ env ++ "}")) <> cr + let listingsCodeBlock = do + st <- get + let params = if writerListings (stOptions st) + then (case getListingsLanguage classes of + Just l -> [ "language=" ++ l ] + Nothing -> []) ++ + [ "numbers=left" | "numberLines" `elem` classes + || "number" `elem` classes + || "number-lines" `elem` classes ] ++ + [ (if key == "startFrom" + then "firstnumber" + else key) ++ "=" ++ attr | + (key,attr) <- keyvalAttr ] ++ + (if identifier == "" + then [] + else [ "label=" ++ ref ]) + + else [] + printParams + | null params = empty + | otherwise = brackets $ hcat (intersperse ", " (map text params)) + return $ flush ("\\begin{lstlisting}" <> printParams $$ text str $$ + "\\end{lstlisting}") $$ cr + let highlightedCodeBlock = + case highlight formatLaTeXBlock ("",classes,keyvalAttr) str of + Nothing -> rawCodeBlock + Just h -> modify (\st -> st{ stHighlighting = True }) >> + return (flush $ linkAnchor $$ text h) case () of _ | isEnabled Ext_literate_haskell opts && "haskell" `elem` classes && "literate" `elem` classes -> lhsCodeBlock | writerListings opts -> listingsCodeBlock | writerHighlight opts && not (null classes) -> highlightedCodeBlock | otherwise -> rawCodeBlock - where lhsCodeBlock = do - modify $ \s -> s{ stLHS = True } - return $ flush ("\\begin{code}" $$ text str $$ "\\end{code}") $$ cr - rawCodeBlock = do - st <- get - env <- if stInNote st - then modify (\s -> s{ stVerbInNote = True }) >> - return "Verbatim" - else return "verbatim" - return $ flush (text ("\\begin{" ++ env ++ "}") $$ text str $$ - text ("\\end{" ++ env ++ "}")) <> cr - listingsCodeBlock = do - st <- get - let params = if writerListings (stOptions st) - then (case getListingsLanguage classes of - Just l -> [ "language=" ++ l ] - Nothing -> []) ++ - [ "numbers=left" | "numberLines" `elem` classes - || "number" `elem` classes - || "number-lines" `elem` classes ] ++ - [ (if key == "startFrom" - then "firstnumber" - else key) ++ "=" ++ attr | - (key,attr) <- keyvalAttr ] - else [] - printParams - | null params = empty - | otherwise = brackets $ hcat (intersperse ", " (map text params)) - return $ flush ("\\begin{lstlisting}" <> printParams $$ text str $$ - "\\end{lstlisting}") $$ cr - highlightedCodeBlock = - case highlight formatLaTeXBlock ("",classes,keyvalAttr) str of - Nothing -> rawCodeBlock - Just h -> modify (\st -> st{ stHighlighting = True }) >> - return (flush $ text h) -blockToLaTeX (RawBlock "latex" x) = return $ text x -blockToLaTeX (RawBlock _ _) = return empty +blockToLaTeX (RawBlock f x) + | f == Format "latex" || f == Format "tex" + = return $ text x + | otherwise = return empty blockToLaTeX (BulletList []) = return empty -- otherwise latex error blockToLaTeX (BulletList lst) = do incremental <- gets stIncremental @@ -407,7 +454,7 @@ blockToLaTeX (DefinitionList lst) = do incremental <- gets stIncremental let inc = if incremental then "[<+->]" else "" items <- mapM defListItemToLaTeX lst - let spacing = if and $ map isTightList (map snd lst) + let spacing = if all isTightList (map snd lst) then text "\\itemsep1pt\\parskip0pt\\parsep0pt" else empty return $ text ("\\begin{description}" ++ inc) $$ spacing $$ vcat items $$ @@ -419,12 +466,12 @@ blockToLaTeX (Header level (id',classes,_) lst) = blockToLaTeX (Table caption aligns widths heads rows) = do headers <- if all null heads then return empty - else ($$ "\\hline\\noalign{\\medskip}") `fmap` + else ($$ "\\midrule\\endhead") `fmap` (tableRowToLaTeX True aligns widths) heads captionText <- inlineListToLaTeX caption let capt = if isEmpty captionText then empty - else text "\\noalign{\\medskip}" + else text "\\addlinespace" $$ text "\\caption" <> braces captionText rows' <- mapM (tableRowToLaTeX False aligns widths) rows let colDescriptors = text $ concat $ map toColDescriptor aligns @@ -432,10 +479,10 @@ blockToLaTeX (Table caption aligns widths heads rows) = do return $ "\\begin{longtable}[c]" <> braces ("@{}" <> colDescriptors <> "@{}") -- the @{} removes extra space at beginning and end - $$ "\\hline\\noalign{\\medskip}" + $$ "\\toprule\\addlinespace" $$ headers $$ vcat rows' - $$ "\\hline" + $$ "\\bottomrule" $$ capt $$ "\\end{longtable}" @@ -456,19 +503,61 @@ tableRowToLaTeX :: Bool -> [[Block]] -> State WriterState Doc tableRowToLaTeX header aligns widths cols = do - renderedCells <- mapM blockListToLaTeX cols + -- scale factor compensates for extra space between columns + -- so the whole table isn't larger than columnwidth + let scaleFactor = 0.97 ** fromIntegral (length aligns) + let widths' = map (scaleFactor *) widths + cells <- mapM (tableCellToLaTeX header) $ zip3 widths' aligns cols + return $ hsep (intersperse "&" cells) $$ "\\\\\\addlinespace" + +-- For simple latex tables (without minipages or parboxes), +-- we need to go to some lengths to get line breaks working: +-- as LineBreak bs = \vtop{\hbox{\strut as}\hbox{\strut bs}}. +fixLineBreaks :: Block -> Block +fixLineBreaks (Para ils) = Para $ fixLineBreaks' ils +fixLineBreaks (Plain ils) = Plain $ fixLineBreaks' ils +fixLineBreaks x = x + +fixLineBreaks' :: [Inline] -> [Inline] +fixLineBreaks' ils = case splitBy (== LineBreak) ils of + [] -> [] + [xs] -> xs + chunks -> RawInline "tex" "\\vtop{" : + concatMap tohbox chunks ++ + [RawInline "tex" "}"] + where tohbox ys = RawInline "tex" "\\hbox{\\strut " : ys ++ + [RawInline "tex" "}"] + +tableCellToLaTeX :: Bool -> (Double, Alignment, [Block]) + -> State WriterState Doc +tableCellToLaTeX _ (0, _, blocks) = + blockListToLaTeX $ walk fixLineBreaks blocks +tableCellToLaTeX header (width, align, blocks) = do + modify $ \st -> st{ stInMinipage = True, stNotes = [] } + cellContents <- blockListToLaTeX blocks + notes <- gets stNotes + modify $ \st -> st{ stInMinipage = False, stNotes = [] } let valign = text $ if header then "[b]" else "[t]" - let halign x = case x of - AlignLeft -> "\\raggedright" - AlignRight -> "\\raggedleft" - AlignCenter -> "\\centering" - AlignDefault -> "\\raggedright" - let toCell 0 _ c = c - toCell w a c = "\\begin{minipage}" <> valign <> - braces (text (printf "%.2f\\columnwidth" w)) <> - (halign a <> cr <> c <> cr) <> "\\end{minipage}" - let cells = zipWith3 toCell widths aligns renderedCells - return $ hsep (intersperse "&" cells) $$ "\\\\\\noalign{\\medskip}" + let halign = case align of + AlignLeft -> "\\raggedright" + AlignRight -> "\\raggedleft" + AlignCenter -> "\\centering" + AlignDefault -> "\\raggedright" + return $ ("\\begin{minipage}" <> valign <> + braces (text (printf "%.2f\\columnwidth" width)) <> + (halign <> cr <> cellContents <> cr) <> "\\end{minipage}") + $$ case notes of + [] -> empty + ns -> (case length ns of + n | n > 1 -> "\\addtocounter" <> + braces "footnote" <> + braces (text $ show $ 1 - n) + | otherwise -> empty) + $$ + vcat (intersperse + ("\\addtocounter" <> braces "footnote" <> braces "1") + $ map (\x -> "\\footnotetext" <> braces x) + $ reverse ns) listItemToLaTeX :: [Block] -> State WriterState Doc listItemToLaTeX lst = blockListToLaTeX lst >>= return . (text "\\item" $$) . @@ -477,8 +566,15 @@ listItemToLaTeX lst = blockListToLaTeX lst >>= return . (text "\\item" $$) . defListItemToLaTeX :: ([Inline], [[Block]]) -> State WriterState Doc defListItemToLaTeX (term, defs) = do term' <- inlineListToLaTeX term + -- put braces around term if it contains an internal link, + -- since otherwise we get bad bracket interactions: \item[\hyperref[..] + let isInternalLink (Link _ ('#':_,_)) = True + isInternalLink _ = False + let term'' = if any isInternalLink term + then braces term' + else term' def' <- liftM vsep $ mapM blockListToLaTeX defs - return $ "\\item" <> brackets term' $$ def' + return $ "\\item" <> brackets term'' $$ def' -- | Craft the section header, inserting the secton reference, if supplied. sectionHeader :: Bool -- True for unnumbered @@ -488,17 +584,19 @@ sectionHeader :: Bool -- True for unnumbered -> State WriterState Doc sectionHeader unnumbered ref level lst = do txt <- inlineListToLaTeX lst + lab <- text `fmap` toLabel ref let noNote (Note _) = Str "" noNote x = x - let lstNoNotes = bottomUp noNote lst + let lstNoNotes = walk noNote lst + txtNoNotes <- inlineListToLaTeX lstNoNotes let star = if unnumbered then text "*" else empty - -- footnotes in sections don't work unless you specify an optional - -- argument: \section[mysec]{mysec\footnote{blah}} - optional <- if lstNoNotes == lst + -- footnotes in sections don't work (except for starred variants) + -- unless you specify an optional argument: + -- \section[mysec]{mysec\footnote{blah}} + optional <- if unnumbered || lstNoNotes == lst then return empty else do - res <- inlineListToLaTeX lstNoNotes - return $ char '[' <> res <> char ']' + return $ brackets txtNoNotes let stuffing = star <> optional <> braces txt book <- gets stBook opts <- gets stOptions @@ -507,13 +605,13 @@ sectionHeader unnumbered ref level lst = do let refLabel x = (if ref `elem` internalLinks then text "\\hyperdef" <> braces empty - <> braces (text ref) + <> braces lab <> braces x else x) - let headerWith x y r = refLabel $ text x <> y <> - if null r + let headerWith x y = refLabel $ text x <> y <> + if null ref then empty - else text "\\label" <> braces (text r) + else text "\\label" <> braces lab let sectionType = case level' of 0 | writerBeamer opts -> "part" | otherwise -> "chapter" @@ -523,13 +621,20 @@ sectionHeader unnumbered ref level lst = do 4 -> "paragraph" 5 -> "subparagraph" _ -> "" + inQuote <- gets stInQuote + let prefix = if inQuote && level' >= 4 + then text "\\mbox{}%" + -- needed for \paragraph, \subparagraph in quote environment + -- see http://tex.stackexchange.com/questions/169830/ + else empty return $ if level' > 5 then txt - else headerWith ('\\':sectionType) stuffing ref + else prefix $$ + headerWith ('\\':sectionType) stuffing $$ if unnumbered then "\\addcontentsline{toc}" <> braces (text sectionType) <> - braces txt + braces txtNoNotes else empty -- | Convert list of inline elements to LaTeX. @@ -556,12 +661,29 @@ isQuoted _ = False -- | Convert inline element to LaTeX inlineToLaTeX :: Inline -- ^ Inline to convert -> State WriterState Doc +inlineToLaTeX (Span (id',classes,_) ils) = do + let noEmph = "csl-no-emph" `elem` classes + let noStrong = "csl-no-strong" `elem` classes + let noSmallCaps = "csl-no-smallcaps" `elem` classes + label' <- if null id' + then return empty + else toLabel id' >>= \x -> + return (text "\\label" <> braces (text x)) + fmap (label' <>) + ((if noEmph then inCmd "textup" else id) . + (if noStrong then inCmd "textnormal" else id) . + (if noSmallCaps then inCmd "textnormal" else id) . + (if not (noEmph || noStrong || noSmallCaps) + then braces + else id)) `fmap` inlineListToLaTeX ils inlineToLaTeX (Emph lst) = inlineListToLaTeX lst >>= return . inCmd "emph" inlineToLaTeX (Strong lst) = inlineListToLaTeX lst >>= return . inCmd "textbf" inlineToLaTeX (Strikeout lst) = do - contents <- inlineListToLaTeX lst + -- we need to protect VERB in an mbox or we get an error + -- see #1294 + contents <- inlineListToLaTeX $ protectCode lst modify $ \s -> s{ stStrikeout = True } return $ inCmd "sout" contents inlineToLaTeX (Superscript lst) = @@ -624,15 +746,16 @@ inlineToLaTeX (Math InlineMath str) = return $ char '$' <> text str <> char '$' inlineToLaTeX (Math DisplayMath str) = return $ "\\[" <> text str <> "\\]" -inlineToLaTeX (RawInline "latex" str) = return $ text str -inlineToLaTeX (RawInline "tex" str) = return $ text str -inlineToLaTeX (RawInline _ _) = return empty +inlineToLaTeX (RawInline f str) + | f == Format "latex" || f == Format "tex" + = return $ text str + | otherwise = return empty inlineToLaTeX (LineBreak) = return "\\\\" inlineToLaTeX Space = return space inlineToLaTeX (Link txt ('#':ident, _)) = do contents <- inlineListToLaTeX txt - ident' <- stringToLaTeX URLString ident - return $ text "\\hyperref" <> brackets (text ident') <> braces contents + lab <- toLabel ident + return $ text "\\hyperref" <> brackets (text lab) <> braces contents inlineToLaTeX (Link txt (src, _)) = case txt of [Str x] | x == src -> -- autolink @@ -645,19 +768,33 @@ inlineToLaTeX (Link txt (src, _)) = contents <> char '}' inlineToLaTeX (Image _ (source, _)) = do modify $ \s -> s{ stGraphics = True } - let source' = if isAbsoluteURI source + let source' = if isURI source then source else unEscapeString source - return $ "\\includegraphics" <> braces (text source') + source'' <- stringToLaTeX URLString source' + return $ "\\includegraphics" <> braces (text source'') inlineToLaTeX (Note contents) = do + inMinipage <- gets stInMinipage modify (\s -> s{stInNote = True}) contents' <- blockListToLaTeX contents modify (\s -> s {stInNote = False}) let optnl = case reverse contents of (CodeBlock _ _ : _) -> cr _ -> empty - return $ "\\footnote" <> braces (nest 2 contents' <> optnl) - -- note: a \n before } needed when note ends with a Verbatim environment + let noteContents = nest 2 contents' <> optnl + modify $ \st -> st{ stNotes = noteContents : stNotes st } + return $ + if inMinipage + then "\\footnotemark{}" + -- note: a \n before } needed when note ends with a Verbatim environment + else "\\footnote" <> braces noteContents + +protectCode :: [Inline] -> [Inline] +protectCode [] = [] +protectCode (x@(Code ("",[],[]) _) : xs) = x : protectCode xs +protectCode (x@(Code _ _) : xs) = ltx "\\mbox{" : x : ltx "}" : xs + where ltx = RawInline (Format "latex") +protectCode (x : xs) = x : protectCode xs citationsToNatbib :: [Citation] -> State WriterState Doc citationsToNatbib (one:[]) @@ -678,9 +815,9 @@ citationsToNatbib cits | noPrefix (tail cits) && noSuffix (init cits) && ismode NormalCitation cits = citeCommand "citep" p s ks where - noPrefix = and . map (null . citationPrefix) - noSuffix = and . map (null . citationSuffix) - ismode m = and . map (((==) m) . citationMode) + noPrefix = all (null . citationPrefix) + noSuffix = all (null . citationSuffix) + ismode m = all (((==) m) . citationMode) p = citationPrefix $ head $ cits s = citationSuffix $ last $ cits ks = intercalate ", " $ map citationId cits diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 0508b6c27..41eb3e5be 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2007-2010 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2007-2014 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Man - Copyright : Copyright (C) 2007-2010 John MacFarlane + Copyright : Copyright (C) 2007-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -160,14 +160,16 @@ blockToMan :: WriterOptions -- ^ Options -> Block -- ^ Block element -> State WriterState Doc blockToMan _ Null = return empty +blockToMan opts (Div _ bs) = blockListToMan opts bs blockToMan opts (Plain inlines) = liftM vcat $ mapM (inlineListToMan opts) $ splitSentences inlines blockToMan opts (Para inlines) = do contents <- liftM vcat $ mapM (inlineListToMan opts) $ splitSentences inlines return $ text ".PP" $$ contents -blockToMan _ (RawBlock "man" str) = return $ text str -blockToMan _ (RawBlock _ _) = return empty +blockToMan _ (RawBlock f str) + | f == Format "man" = return $ text str + | otherwise = return empty blockToMan _ HorizontalRule = return $ text ".PP" $$ text " * * * * *" blockToMan opts (Header level _ inlines) = do contents <- inlineListToMan opts inlines @@ -281,7 +283,7 @@ definitionListItemToMan opts (label, defs) = do mapM (\item -> blockToMan opts item) rest first' <- blockToMan opts first return $ first' $$ text ".RS" $$ rest' $$ text ".RE" - return $ text ".TP" $$ text ".B " <> labelText $$ contents + return $ text ".TP" $$ nowrap (text ".B " <> labelText) $$ contents -- | Convert list of Pandoc block elements to man. blockListToMan :: WriterOptions -- ^ Options @@ -300,6 +302,7 @@ inlineListToMan opts lst = mapM (inlineToMan opts) lst >>= (return . hcat) -- | Convert Pandoc inline element to man. inlineToMan :: WriterOptions -> Inline -> State WriterState Doc +inlineToMan opts (Span _ ils) = inlineListToMan opts ils inlineToMan opts (Emph lst) = do contents <- inlineListToMan opts lst return $ text "\\f[I]" <> contents <> text "\\f[]" @@ -327,12 +330,14 @@ inlineToMan opts (Cite _ lst) = inlineToMan _ (Code _ str) = return $ text $ "\\f[C]" ++ escapeCode str ++ "\\f[]" inlineToMan _ (Str str) = return $ text $ escapeString str -inlineToMan opts (Math InlineMath str) = inlineListToMan opts $ readTeXMath str +inlineToMan opts (Math InlineMath str) = + inlineListToMan opts $ readTeXMath' InlineMath str inlineToMan opts (Math DisplayMath str) = do - contents <- inlineListToMan opts $ readTeXMath str + contents <- inlineListToMan opts $ readTeXMath' DisplayMath str return $ cr <> text ".RS" $$ contents $$ text ".RE" -inlineToMan _ (RawInline "man" str) = return $ text str -inlineToMan _ (RawInline _ _) = return empty +inlineToMan _ (RawInline f str) + | f == Format "man" = return $ text str + | otherwise = return empty inlineToMan _ (LineBreak) = return $ cr <> text ".PD 0" $$ text ".P" $$ text ".PD" <> cr inlineToMan _ Space = return space diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 80402a757..a67271a5d 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings, TupleSections, ScopedTypeVariables #-} {- -Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Markdown - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -32,22 +32,22 @@ Markdown: <http://daringfireball.net/projects/markdown/> -} module Text.Pandoc.Writers.Markdown (writeMarkdown, writePlain) where import Text.Pandoc.Definition -import Text.Pandoc.Generic +import Text.Pandoc.Walk import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Shared import Text.Pandoc.Writers.Shared import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (blankline, char, space) import Data.List ( group, isPrefixOf, find, intersperse, transpose, sortBy ) -import Data.Char ( isSpace ) +import Data.Char ( isSpace, isPunctuation ) import Data.Ord ( comparing ) import Text.Pandoc.Pretty import Control.Monad.State import qualified Data.Set as Set import Text.Pandoc.Writers.HTML (writeHtmlString) -import Text.Pandoc.Readers.TeXMath (readTeXMath) +import Text.Pandoc.Readers.TeXMath (readTeXMath') import Text.HTML.TagSoup (renderTags, parseTags, isTagText, Tag(..)) -import Network.URI (isAbsoluteURI) +import Network.URI (isURI) import Data.Default import Data.Yaml (Value(Object,String,Array,Bool,Number)) import qualified Data.HashMap.Strict as H @@ -82,7 +82,7 @@ writePlain opts document = where document' = plainify document plainify :: Pandoc -> Pandoc -plainify = bottomUp go +plainify = walk go where go :: Inline -> Inline go (Emph xs) = SmallCaps xs go (Strong xs) = SmallCaps xs @@ -143,7 +143,7 @@ jsonToYaml (Object hashmap) = | otherwise -> (k' <> ":") $$ x (k', Object _, x) -> (k' <> ":") $$ nest 2 x (_, String "", _) -> empty - (k', _, x) -> k' <> ":" <> space <> x) + (k', _, x) -> k' <> ":" <> space <> hang 2 "" x) $ sortBy (comparing fst) $ H.toList hashmap jsonToYaml (Array vec) = vcat $ map (\v -> hang 2 "- " (jsonToYaml v)) $ V.toList vec @@ -151,7 +151,7 @@ jsonToYaml (String "") = empty jsonToYaml (String s) = case T.unpack s of x | '\n' `elem` x -> hang 2 ("|" <> cr) $ text x - | not (any (`elem` x) "\"'#:[]{}?-") -> text x + | not (any isPunctuation x) -> text x | otherwise -> text $ "'" ++ substitute "'" "''" x ++ "'" jsonToYaml (Bool b) = text $ show b jsonToYaml (Number n) = text $ show n @@ -186,7 +186,13 @@ pandocToMarkdown opts (Pandoc meta blocks) = do let toc = if writerTableOfContents opts then tableOfContents opts headerBlocks else empty - body <- blockListToMarkdown opts blocks + -- Strip off final 'references' header if markdown citations enabled + let blocks' = if not isPlain && isEnabled Ext_citations opts + then case reverse blocks of + (Div (_,["references"],_) _):xs -> reverse xs + _ -> blocks + else blocks + body <- blockListToMarkdown opts blocks' st <- get notes' <- notesToMarkdown opts (reverse $ stNotes st) st' <- get -- note that the notes may contain refs @@ -301,22 +307,34 @@ blockToMarkdown :: WriterOptions -- ^ Options -> Block -- ^ Block element -> State WriterState Doc blockToMarkdown _ Null = return empty +blockToMarkdown opts (Div attrs ils) = do + isPlain <- gets stPlain + contents <- blockListToMarkdown opts ils + return $ if isPlain || not (isEnabled Ext_markdown_in_html_blocks opts) + then contents <> blankline + else tagWithAttrs "div" attrs <> blankline <> + contents <> blankline <> "</div>" <> blankline blockToMarkdown opts (Plain inlines) = do contents <- inlineListToMarkdown opts inlines - return $ contents <> cr + -- escape if para starts with ordered list marker + st <- get + let colwidth = if writerWrapText opts + then Just $ writerColumns opts + else Nothing + let rendered = render colwidth contents + let escapeDelimiter (x:xs) | x `elem` ".()" = '\\':x:xs + | otherwise = x : escapeDelimiter xs + escapeDelimiter [] = [] + let contents' = if isEnabled Ext_all_symbols_escapable opts && + not (stPlain st) && beginsWithOrderedListMarker rendered + then text $ escapeDelimiter rendered + else contents + return $ contents' <> cr -- title beginning with fig: indicates figure blockToMarkdown opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = blockToMarkdown opts (Para [Image alt (src,tit)]) -blockToMarkdown opts (Para inlines) = do - contents <- inlineListToMarkdown opts inlines - -- escape if para starts with ordered list marker - st <- get - let esc = if isEnabled Ext_all_symbols_escapable opts && - not (stPlain st) && - beginsWithOrderedListMarker (render Nothing contents) - then text "\x200B" -- zero-width space, a hack - else empty - return $ esc <> contents <> blankline +blockToMarkdown opts (Para inlines) = + (<> blankline) `fmap` blockToMarkdown opts (Plain inlines) blockToMarkdown opts (RawBlock f str) | f == "html" = do st <- get @@ -325,7 +343,7 @@ blockToMarkdown opts (RawBlock f str) else return $ if isEnabled Ext_markdown_attribute opts then text (addMarkdownAttribute str) <> text "\n" else text str <> text "\n" - | f == "latex" || f == "tex" || f == "markdown" = do + | f `elem` ["latex", "tex", "markdown"] = do st <- get if stPlain st then return empty @@ -368,23 +386,27 @@ blockToMarkdown opts (CodeBlock (_,classes,_) str) isEnabled Ext_literate_haskell opts = return $ prefixed "> " (text str) <> blankline blockToMarkdown opts (CodeBlock attribs str) = return $ - case attribs of - x | x /= nullAttr && isEnabled Ext_fenced_code_blocks opts -> - tildes <> " " <> attrs <> cr <> text str <> - cr <> tildes <> blankline - (_,(cls:_),_) | isEnabled Ext_backtick_code_blocks opts -> - backticks <> " " <> text cls <> cr <> text str <> - cr <> backticks <> blankline + case attribs == nullAttr of + False | isEnabled Ext_backtick_code_blocks opts -> + backticks <> attrs <> cr <> text str <> cr <> backticks <> blankline + | isEnabled Ext_fenced_code_blocks opts -> + tildes <> attrs <> cr <> text str <> cr <> tildes <> blankline _ -> nest (writerTabStop opts) (text str) <> blankline where tildes = text $ case [ln | ln <- lines str, all (=='~') ln] of [] -> "~~~~" xs -> case maximum $ map length xs of n | n < 3 -> "~~~~" | otherwise -> replicate (n+1) '~' - backticks = text "```" + backticks = text $ case [ln | ln <- lines str, all (=='`') ln] of + [] -> "```" + xs -> case maximum $ map length xs of + n | n < 3 -> "```" + | otherwise -> replicate (n+1) '`' attrs = if isEnabled Ext_fenced_code_attributes opts - then nowrap $ attrsToMarkdown attribs - else empty + then nowrap $ " " <> attrsToMarkdown attribs + else case attribs of + (_,[cls],_) -> " " <> text cls + _ -> empty blockToMarkdown opts (BlockQuote blocks) = do st <- get -- if we're writing literate haskell, put a space before the bird tracks @@ -456,16 +478,24 @@ addMarkdownAttribute s = pipeTable :: Bool -> [Alignment] -> [Doc] -> [[Doc]] -> State WriterState Doc pipeTable headless aligns rawHeaders rawRows = do + let sp = text " " + let blockFor AlignLeft x y = lblock (x + 2) (sp <> y) <> lblock 0 empty + blockFor AlignCenter x y = cblock (x + 2) (sp <> y) <> lblock 0 empty + blockFor AlignRight x y = rblock (x + 2) (sp <> y) <> lblock 0 empty + blockFor _ x y = lblock (x + 2) (sp <> y) <> lblock 0 empty + let widths = map (max 3 . maximum . map offset) $ transpose (rawHeaders : rawRows) let torow cs = nowrap $ text "|" <> - hcat (intersperse (text "|") $ map chomp cs) <> text "|" - let toborder (a, h) = let wid = max (offset h) 3 - in text $ case a of - AlignLeft -> ':':replicate (wid - 1) '-' - AlignCenter -> ':':replicate (wid - 2) '-' ++ ":" - AlignRight -> replicate (wid - 1) '-' ++ ":" - AlignDefault -> replicate wid '-' + hcat (intersperse (text "|") $ + zipWith3 blockFor aligns widths (map chomp cs)) + <> text "|" + let toborder (a, w) = text $ case a of + AlignLeft -> ':':replicate (w + 1) '-' + AlignCenter -> ':':replicate w '-' ++ ":" + AlignRight -> replicate (w + 1) '-' ++ ":" + AlignDefault -> replicate (w + 2) '-' let header = if headless then empty else torow rawHeaders - let border = torow $ map toborder $ zip aligns rawHeaders + let border = nowrap $ text "|" <> hcat (intersperse (text "|") $ + map toborder $ zip aligns widths) <> text "|" let body = vcat $ map torow rawRows return $ header $$ border $$ body @@ -542,7 +572,14 @@ bulletListItemToMarkdown opts items = do contents <- blockListToMarkdown opts items let sps = replicate (writerTabStop opts - 2) ' ' let start = text ('-' : ' ' : sps) - return $ hang (writerTabStop opts) start $ contents <> cr + -- remove trailing blank line if it is a tight list + let contents' = case reverse items of + (BulletList xs:_) | isTightList xs -> + chomp contents <> cr + (OrderedList _ xs:_) | isTightList xs -> + chomp contents <> cr + _ -> contents + return $ hang (writerTabStop opts) start $ contents' <> cr -- | Convert ordered list item (a list of blocks) to markdown. orderedListItemToMarkdown :: WriterOptions -- ^ options @@ -608,10 +645,11 @@ getReference label (src, tit) = do Nothing -> do let label' = case find ((== label) . fst) (stRefs st) of Just _ -> -- label is used; generate numerical label - case find (\n -> not (any (== [Str (show n)]) - (map fst (stRefs st)))) [1..(10000 :: Integer)] of - Just x -> [Str (show x)] - Nothing -> error "no unique label" + case find (\n -> notElem [Str (show n)] + (map fst (stRefs st))) + [1..(10000 :: Integer)] of + Just x -> [Str (show x)] + Nothing -> error "no unique label" Nothing -> label modify (\s -> s{ stRefs = (label', (src,tit)) : stRefs st }) return label' @@ -628,6 +666,12 @@ escapeSpaces x = x -- | Convert Pandoc inline element to markdown. inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc +inlineToMarkdown opts (Span attrs ils) = do + st <- get + contents <- inlineListToMarkdown opts ils + return $ if stPlain st + then contents + else tagWithAttrs "span" attrs <> contents <> text "</span>" inlineToMarkdown opts (Emph lst) = do contents <- inlineListToMarkdown opts lst return $ "*" <> contents <> "*" @@ -640,13 +684,13 @@ inlineToMarkdown opts (Strikeout lst) = do then "~~" <> contents <> "~~" else "<s>" <> contents <> "</s>" inlineToMarkdown opts (Superscript lst) = do - let lst' = bottomUp escapeSpaces lst + let lst' = walk escapeSpaces lst contents <- inlineListToMarkdown opts lst' return $ if isEnabled Ext_superscript opts then "^" <> contents <> "^" else "<sup>" <> contents <> "</sup>" inlineToMarkdown opts (Subscript lst) = do - let lst' = bottomUp escapeSpaces lst + let lst' = walk escapeSpaces lst contents <- inlineListToMarkdown opts lst' return $ if isEnabled Ext_subscript opts then "~" <> contents <> "~" @@ -681,7 +725,7 @@ inlineToMarkdown opts (Math InlineMath str) return $ "\\(" <> text str <> "\\)" | isEnabled Ext_tex_math_double_backslash opts = return $ "\\\\(" <> text str <> "\\\\)" - | otherwise = inlineListToMarkdown opts $ readTeXMath str + | otherwise = inlineListToMarkdown opts $ readTeXMath' InlineMath str inlineToMarkdown opts (Math DisplayMath str) | isEnabled Ext_tex_math_dollars opts = return $ "$$" <> text str <> "$$" @@ -690,7 +734,7 @@ inlineToMarkdown opts (Math DisplayMath str) | isEnabled Ext_tex_math_double_backslash opts = return $ "\\\\[" <> text str <> "\\\\]" | otherwise = (\x -> cr <> x <> cr) `fmap` - inlineListToMarkdown opts (readTeXMath str) + inlineListToMarkdown opts (readTeXMath' DisplayMath str) inlineToMarkdown opts (RawInline f str) | f == "html" || f == "markdown" || (isEnabled Ext_raw_tex opts && (f == "latex" || f == "tex")) = @@ -701,17 +745,20 @@ inlineToMarkdown opts (LineBreak) | isEnabled Ext_escaped_line_breaks opts = return $ "\\" <> cr | otherwise = return $ " " <> cr inlineToMarkdown _ Space = return space -inlineToMarkdown opts (Cite (c:cs) lst@[RawInline "latex" _]) +inlineToMarkdown opts (Cite [] lst) = inlineListToMarkdown opts lst +inlineToMarkdown opts (Cite (c:cs) lst) | not (isEnabled Ext_citations opts) = inlineListToMarkdown opts lst - | citationMode c == AuthorInText = do - suffs <- inlineListToMarkdown opts $ citationSuffix c - rest <- mapM convertOne cs - let inbr = suffs <+> joincits rest - br = if isEmpty inbr then empty else char '[' <> inbr <> char ']' - return $ text ("@" ++ citationId c) <+> br - | otherwise = do - cits <- mapM convertOne (c:cs) - return $ text "[" <> joincits cits <> text "]" + | otherwise = + if citationMode c == AuthorInText + then do + suffs <- inlineListToMarkdown opts $ citationSuffix c + rest <- mapM convertOne cs + let inbr = suffs <+> joincits rest + br = if isEmpty inbr then empty else char '[' <> inbr <> char ']' + return $ text ("@" ++ citationId c) <+> br + else do + cits <- mapM convertOne (c:cs) + return $ text "[" <> joincits cits <> text "]" where joincits = hcat . intersperse (text "; ") . filter (not . isEmpty) convertOne Citation { citationId = k @@ -728,14 +775,13 @@ inlineToMarkdown opts (Cite (c:cs) lst@[RawInline "latex" _]) return $ pdoc <+> r modekey SuppressAuthor = "-" modekey _ = "" -inlineToMarkdown opts (Cite _ lst) = inlineListToMarkdown opts lst inlineToMarkdown opts (Link txt (src, tit)) = do linktext <- inlineListToMarkdown opts txt let linktitle = if null tit then empty else text $ " \"" ++ tit ++ "\"" let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src - let useAuto = isAbsoluteURI src && + let useAuto = isURI src && case txt of [Str s] | escapeURI s == srcSuffix -> True _ -> False diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index b3b319c2a..3b987ba2b 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2008-2010 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2008-2014 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.MediaWiki - Copyright : Copyright (C) 2008-2010 John MacFarlane + Copyright : Copyright (C) 2008-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -34,9 +34,10 @@ import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Shared import Text.Pandoc.Writers.Shared +import Text.Pandoc.Pretty (render) import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.XML ( escapeStringForXML ) -import Data.List ( intersect, intercalate ) +import Data.List ( intersect, intercalate, intersperse ) import Network.URI ( isURI ) import Control.Monad.State @@ -50,7 +51,7 @@ data WriterState = WriterState { writeMediaWiki :: WriterOptions -> Pandoc -> String writeMediaWiki opts document = evalState (pandocToMediaWiki opts document) - (WriterState { stNotes = False, stListLevel = [], stUseTags = False }) + WriterState { stNotes = False, stListLevel = [], stUseTags = False } -- | Return MediaWiki representation of document. pandocToMediaWiki :: WriterOptions -> Pandoc -> State WriterState String @@ -83,6 +84,11 @@ blockToMediaWiki :: WriterOptions -- ^ Options blockToMediaWiki _ Null = return "" +blockToMediaWiki opts (Div attrs bs) = do + contents <- blockListToMediaWiki opts bs + return $ render Nothing (tagWithAttrs "div" attrs) ++ "\n\n" ++ + contents ++ "\n\n" ++ "</div>" + blockToMediaWiki opts (Plain inlines) = inlineListToMediaWiki opts inlines @@ -104,9 +110,10 @@ blockToMediaWiki opts (Para inlines) = do then "<p>" ++ contents ++ "</p>" else contents ++ if null listLevel then "\n" else "" -blockToMediaWiki _ (RawBlock "mediawiki" str) = return str -blockToMediaWiki _ (RawBlock "html" str) = return str -blockToMediaWiki _ (RawBlock _ _) = return "" +blockToMediaWiki _ (RawBlock f str) + | f == Format "mediawiki" = return str + | f == Format "html" = return str + | otherwise = return "" blockToMediaWiki _ HorizontalRule = return "\n-----\n" @@ -135,25 +142,17 @@ blockToMediaWiki opts (BlockQuote blocks) = do return $ "<blockquote>" ++ contents ++ "</blockquote>" blockToMediaWiki opts (Table capt aligns widths headers rows') = do - let alignStrings = map alignmentToString aligns - captionDoc <- if null capt - then return "" - else do - c <- inlineListToMediaWiki opts capt - return $ "<caption>" ++ c ++ "</caption>\n" - let percent w = show (truncate (100*w) :: Integer) ++ "%" - let coltags = if all (== 0.0) widths - then "" - else unlines $ map - (\w -> "<col width=\"" ++ percent w ++ "\" />") widths - head' <- if all null headers - then return "" - else do - hs <- tableRowToMediaWiki opts alignStrings 0 headers - return $ "<thead>\n" ++ hs ++ "\n</thead>\n" - body' <- zipWithM (tableRowToMediaWiki opts alignStrings) [1..] rows' - return $ "<table>\n" ++ captionDoc ++ coltags ++ head' ++ - "<tbody>\n" ++ unlines body' ++ "</tbody>\n</table>\n" + caption <- if null capt + then return "" + else do + c <- inlineListToMediaWiki opts capt + return $ "|+ " ++ trimr c ++ "\n" + let headless = all null headers + let allrows = if headless then rows' else headers:rows' + tableBody <- (concat . intersperse "|-\n") `fmap` + mapM (tableRowToMediaWiki opts headless aligns widths) + (zip [1..] allrows) + return $ "{|\n" ++ caption ++ tableBody ++ "|}\n" blockToMediaWiki opts x@(BulletList items) = do oldUseTags <- get >>= return . stUseTags @@ -285,20 +284,34 @@ vcat = intercalate "\n" -- Auxiliary functions for tables: tableRowToMediaWiki :: WriterOptions - -> [String] - -> Int - -> [[Block]] + -> Bool + -> [Alignment] + -> [Double] + -> (Int, [[Block]]) -> State WriterState String -tableRowToMediaWiki opts alignStrings rownum cols' = do - let celltype = if rownum == 0 then "th" else "td" - let rowclass = case rownum of - 0 -> "header" - x | x `rem` 2 == 1 -> "odd" - _ -> "even" - cols'' <- sequence $ zipWith - (\alignment item -> tableItemToMediaWiki opts celltype alignment item) - alignStrings cols' - return $ "<tr class=\"" ++ rowclass ++ "\">\n" ++ unlines cols'' ++ "</tr>" +tableRowToMediaWiki opts headless alignments widths (rownum, cells) = do + cells' <- mapM (\cellData -> + tableCellToMediaWiki opts headless rownum cellData) + $ zip3 alignments widths cells + return $ unlines cells' + +tableCellToMediaWiki :: WriterOptions + -> Bool + -> Int + -> (Alignment, Double, [Block]) + -> State WriterState String +tableCellToMediaWiki opts headless rownum (alignment, width, bs) = do + contents <- blockListToMediaWiki opts bs + let marker = if rownum == 1 && not headless then "!" else "|" + let percent w = show (truncate (100*w) :: Integer) ++ "%" + let attrs = ["align=" ++ show (alignmentToString alignment) | + alignment /= AlignDefault && alignment /= AlignLeft] ++ + ["width=\"" ++ percent width ++ "\"" | + width /= 0.0 && rownum == 1] + let attr = if null attrs + then "" + else unwords attrs ++ "|" + return $ marker ++ attr ++ trimr contents alignmentToString :: Alignment -> [Char] alignmentToString alignment = case alignment of @@ -307,17 +320,6 @@ alignmentToString alignment = case alignment of AlignCenter -> "center" AlignDefault -> "left" -tableItemToMediaWiki :: WriterOptions - -> String - -> String - -> [Block] - -> State WriterState String -tableItemToMediaWiki opts celltype align' item = do - let mkcell x = "<" ++ celltype ++ " align=\"" ++ align' ++ "\">" ++ - x ++ "</" ++ celltype ++ ">" - contents <- blockListToMediaWiki opts item - return $ mkcell contents - -- | Convert list of Pandoc block elements to MediaWiki. blockListToMediaWiki :: WriterOptions -- ^ Options -> [Block] -- ^ List of block elements @@ -333,6 +335,10 @@ inlineListToMediaWiki opts lst = -- | Convert Pandoc inline element to MediaWiki. inlineToMediaWiki :: WriterOptions -> Inline -> State WriterState String +inlineToMediaWiki opts (Span attrs ils) = do + contents <- inlineListToMediaWiki opts ils + return $ render Nothing (tagWithAttrs "span" attrs) ++ contents ++ "</span>" + inlineToMediaWiki opts (Emph lst) = do contents <- inlineListToMediaWiki opts lst return $ "''" ++ contents ++ "''" @@ -373,9 +379,10 @@ inlineToMediaWiki _ (Str str) = return $ escapeString str inlineToMediaWiki _ (Math _ str) = return $ "<math>" ++ str ++ "</math>" -- note: str should NOT be escaped -inlineToMediaWiki _ (RawInline "mediawiki" str) = return str -inlineToMediaWiki _ (RawInline "html" str) = return str -inlineToMediaWiki _ (RawInline _ _) = return "" +inlineToMediaWiki _ (RawInline f str) + | f == Format "mediawiki" = return str + | f == Format "html" = return str + | otherwise = return "" inlineToMediaWiki _ (LineBreak) = return "<br />" diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs index 090b97433..cb821e40b 100644 --- a/src/Text/Pandoc/Writers/Native.hs +++ b/src/Text/Pandoc/Writers/Native.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Native - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index db27286e8..15f7c8be8 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -1,6 +1,6 @@ {-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2008-2010 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2008-2014 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.ODT - Copyright : Copyright (C) 2008-2010 John MacFarlane + Copyright : Copyright (C) 2008-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -30,7 +30,10 @@ Conversion of 'Pandoc' documents to ODT. -} module Text.Pandoc.Writers.ODT ( writeODT ) where import Data.IORef -import Data.List ( isPrefixOf ) +import Data.List ( isPrefixOf, isSuffixOf ) +import Data.Maybe ( fromMaybe ) +import Text.XML.Light.Output +import Text.TeXMath import qualified Data.ByteString.Lazy as B import Text.Pandoc.UTF8 ( fromStringLazy ) import Codec.Archive.Zip @@ -39,15 +42,15 @@ import Text.Pandoc.Shared ( stringify, readDataFile, fetchItem, warn ) import Text.Pandoc.ImageSize ( imageSize, sizeInPoints ) import Text.Pandoc.MIME ( getMimeType ) import Text.Pandoc.Definition -import Text.Pandoc.Generic +import Text.Pandoc.Walk +import Text.Pandoc.Writers.Shared ( fixDisplayMath ) import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) import Control.Monad (liftM) -import Control.Monad.Trans (liftIO) import Text.Pandoc.XML import Text.Pandoc.Pretty import qualified Control.Exception as E import Data.Time.Clock.POSIX ( getPOSIXTime ) -import System.FilePath ( takeExtension ) +import System.FilePath ( takeExtension, takeDirectory ) -- | Produce an ODT file from a Pandoc document. writeODT :: WriterOptions -- ^ Writer options @@ -61,34 +64,44 @@ writeODT opts doc@(Pandoc meta _) = do Just f -> B.readFile f Nothing -> (B.fromChunks . (:[])) `fmap` readDataFile datadir "reference.odt" - -- handle pictures + -- handle formulas and pictures picEntriesRef <- newIORef ([] :: [Entry]) - let sourceDir = writerSourceDirectory opts - doc' <- bottomUpM (transformPic sourceDir picEntriesRef) doc + doc' <- walkM (transformPicMath opts picEntriesRef) $ walk fixDisplayMath doc let newContents = writeOpenDocument opts{writerWrapText = False} doc' epochtime <- floor `fmap` getPOSIXTime - let contentEntry = toEntry "content.xml" epochtime $ fromStringLazy newContents + let contentEntry = toEntry "content.xml" epochtime + $ fromStringLazy newContents picEntries <- readIORef picEntriesRef - let archive = foldr addEntryToArchive refArchive $ contentEntry : picEntries + let archive = foldr addEntryToArchive refArchive + $ contentEntry : picEntries -- construct META-INF/manifest.xml based on archive let toFileEntry fp = case getMimeType fp of - Nothing -> empty + Nothing -> if "Formula-" `isPrefixOf` fp && "/" `isSuffixOf` fp + then selfClosingTag "manifest:file-entry" + [("manifest:media-type","application/vnd.oasis.opendocument.formula") + ,("manifest:full-path",fp)] + else empty Just m -> selfClosingTag "manifest:file-entry" [("manifest:media-type", m) ,("manifest:full-path", fp) + ,("manifest:version", "1.2") ] - let files = [ ent | ent <- filesInArchive archive, not ("META-INF" `isPrefixOf` ent) ] + let files = [ ent | ent <- filesInArchive archive, + not ("META-INF" `isPrefixOf` ent) ] + let formulas = [ takeDirectory ent ++ "/" | ent <- filesInArchive archive, + "Formula-" `isPrefixOf` ent, takeExtension ent == ".xml" ] let manifestEntry = toEntry "META-INF/manifest.xml" epochtime $ fromStringLazy $ render Nothing $ text "<?xml version=\"1.0\" encoding=\"utf-8\"?>" $$ ( inTags True "manifest:manifest" - [("xmlns:manifest","urn:oasis:names:tc:opendocument:xmlns:manifest:1.0")] + [("xmlns:manifest","urn:oasis:names:tc:opendocument:xmlns:manifest:1.0") + ,("manifest:version","1.2")] $ ( selfClosingTag "manifest:file-entry" [("manifest:media-type","application/vnd.oasis.opendocument.text") - ,("manifest:version","1.2") ,("manifest:full-path","/")] $$ vcat ( map toFileEntry $ files ) + $$ vcat ( map toFileEntry $ formulas ) ) ) let archive' = addEntryToArchive manifestEntry archive @@ -109,19 +122,23 @@ writeODT opts doc@(Pandoc meta _) = do ) ) ) - let archive'' = addEntryToArchive metaEntry archive' + -- make sure mimetype is first + let mimetypeEntry = toEntry "mimetype" epochtime + $ fromStringLazy "application/vnd.oasis.opendocument.text" + let archive'' = addEntryToArchive mimetypeEntry + $ addEntryToArchive metaEntry archive' return $ fromArchive archive'' -transformPic :: FilePath -> IORef [Entry] -> Inline -> IO Inline -transformPic sourceDir entriesRef (Image lab (src,_)) = do - res <- liftIO $ E.try $ fetchItem sourceDir src +transformPicMath :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline +transformPicMath opts entriesRef (Image lab (src,_)) = do + res <- fetchItem (writerSourceURL opts) src case res of Left (_ :: E.SomeException) -> do - liftIO $ warn $ "Could not find image `" ++ src ++ "', skipping..." + warn $ "Could not find image `" ++ src ++ "', skipping..." return $ Emph lab Right (img, _) -> do let size = imageSize img - let (w,h) = maybe (0,0) id $ sizeInPoints `fmap` size + let (w,h) = fromMaybe (0,0) $ sizeInPoints `fmap` size let tit' = show w ++ "x" ++ show h entries <- readIORef entriesRef let newsrc = "Pictures/" ++ show (length entries) ++ takeExtension src @@ -130,5 +147,29 @@ transformPic sourceDir entriesRef (Image lab (src,_)) = do let entry = toEntry newsrc epochtime $ toLazy img modifyIORef entriesRef (entry:) return $ Image lab (newsrc, tit') -transformPic _ _ x = return x +transformPicMath _ entriesRef (Math t math) = do + entries <- readIORef entriesRef + let dt = if t == InlineMath then DisplayInline else DisplayBlock + case texMathToMathML dt math of + Left _ -> return $ Math t math + Right r -> do + let conf = useShortEmptyTags (const False) defaultConfigPP + let mathml = ppcTopElement conf r + epochtime <- floor `fmap` getPOSIXTime + let dirname = "Formula-" ++ show (length entries) ++ "/" + let fname = dirname ++ "content.xml" + let entry = toEntry fname epochtime (fromStringLazy mathml) + modifyIORef entriesRef (entry:) + return $ RawInline (Format "opendocument") $ render Nothing $ + inTags False "draw:frame" [("text:anchor-type", + if t == DisplayMath + then "paragraph" + else "as-char") + ,("style:vertical-pos", "middle") + ,("style:vertical-rel", "text")] $ + selfClosingTag "draw:object" [("xlink:href", dirname) + , ("xlink:type", "simple") + , ("xlink:show", "embed") + , ("xlink:actuate", "onLoad")] +transformPicMath _ _ x = return x diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs index f6926c1dc..dd359f3f5 100644 --- a/src/Text/Pandoc/Writers/OPML.hs +++ b/src/Text/Pandoc/Writers/OPML.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2013 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2013-2014 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.OPML - Copyright : Copyright (C) 2013 John MacFarlane + Copyright : Copyright (C) 2013-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 30f99c3e4..b6da2694c 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -1,7 +1,7 @@ -{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE PatternGuards, OverloadedStrings #-} {- -Copyright (C) 2008-2010 Andrea Rossato <andrea.rossato@ing.unitn.it> -and John MacFarlane. +Copyright (C) 2008-2014 Andrea Rossato <andrea.rossato@ing.unitn.it> + and John MacFarlane. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.OpenDocument - Copyright : Copyright (C) 2008-2010 Andrea Rossato and John MacFarlane + Copyright : Copyright (C) 2008-2014 Andrea Rossato and John MacFarlane License : GNU GPL, version 2 or above Maintainer : Andrea Rossato <andrea.rossato@ing.unitn.it> @@ -64,6 +64,7 @@ data WriterState = , stInDefinition :: Bool , stTight :: Bool , stFirstPara :: Bool + , stImageId :: Int } defaultWriterState :: WriterState @@ -78,6 +79,7 @@ defaultWriterState = , stInDefinition = False , stTight = False , stFirstPara = False + , stImageId = 1 } when :: Bool -> Doc -> Doc @@ -283,8 +285,13 @@ blocksToOpenDocument o b = vcat <$> mapM (blockToOpenDocument o) b -- | Convert a Pandoc block element to OpenDocument. blockToOpenDocument :: WriterOptions -> Block -> State WriterState Doc blockToOpenDocument o bs - | Plain b <- bs = inParagraphTags =<< inlinesToOpenDocument o b - | Para b <- bs = inParagraphTags =<< inlinesToOpenDocument o b + | Plain b <- bs = if null b + then return empty + else inParagraphTags =<< inlinesToOpenDocument o b + | Para b <- bs = if null b + then return empty + else inParagraphTags =<< inlinesToOpenDocument o b + | Div _ xs <- bs = blocksToOpenDocument o xs | Header i _ b <- bs = setFirstPara >> (inHeaderTags i =<< inlinesToOpenDocument o b) | BlockQuote b <- bs = setFirstPara >> mkBlockQuote b @@ -295,7 +302,9 @@ blockToOpenDocument o bs | Table c a w h r <- bs = setFirstPara >> table c a w h r | HorizontalRule <- bs = setFirstPara >> return (selfClosingTag "text:p" [ ("text:style-name", "Horizontal_20_Line") ]) - | RawBlock _ _ <- bs = return empty + | RawBlock f s <- bs = if f == Format "opendocument" + then return $ text s + else return empty | Null <- bs = return empty | otherwise = return empty where @@ -360,6 +369,7 @@ inlinesToOpenDocument o l = hcat <$> mapM (inlineToOpenDocument o) l inlineToOpenDocument :: WriterOptions -> Inline -> State WriterState Doc inlineToOpenDocument o ils | Space <- ils = inTextStyle space + | Span _ xs <- ils = inlinesToOpenDocument o xs | LineBreak <- ils = return $ selfClosingTag "text:line-break" [] | Str s <- ils = inTextStyle $ handleSpaces $ escapeStringForXML s | Emph l <- ils = withTextStyle Italic $ inlinesToOpenDocument o l @@ -369,23 +379,27 @@ inlineToOpenDocument o ils | Subscript l <- ils = withTextStyle Sub $ inlinesToOpenDocument o l | SmallCaps l <- ils = withTextStyle SmallC $ inlinesToOpenDocument o l | Quoted t l <- ils = inQuotes t <$> inlinesToOpenDocument o l - | Code _ s <- ils = preformatted s - | Math _ s <- ils = inlinesToOpenDocument o (readTeXMath s) + | Code _ s <- ils = withTextStyle Pre $ inTextStyle $ preformatted s + | Math t s <- ils = inlinesToOpenDocument o (readTeXMath' t s) | Cite _ l <- ils = inlinesToOpenDocument o l - | RawInline "opendocument" s <- ils = preformatted s - | RawInline "html" s <- ils = preformatted s -- for backwards compat. - | RawInline _ _ <- ils = return empty + | RawInline f s <- ils = if f == Format "opendocument" + then return $ text s + else return empty | Link l (s,t) <- ils = mkLink s t <$> inlinesToOpenDocument o l - | Image _ (s,t) <- ils = return $ mkImg s t + | Image _ (s,t) <- ils = mkImg s t | Note l <- ils = mkNote l | otherwise = return empty where - preformatted = return . inSpanTags "Teletype" . handleSpaces . escapeStringForXML + preformatted s = handleSpaces $ escapeStringForXML s mkLink s t = inTags False "text:a" [ ("xlink:type" , "simple") , ("xlink:href" , s ) , ("office:name", t ) ] . inSpanTags "Definition" - mkImg s t = inTags False "draw:frame" (attrsFromTitle t) $ + mkImg s t = do + id' <- gets stImageId + modify (\st -> st{ stImageId = id' + 1 }) + return $ inTags False "draw:frame" + (("draw:name", "img" ++ show id'):attrsFromTitle t) $ selfClosingTag "draw:image" [ ("xlink:href" , s ) , ("xlink:type" , "simple") , ("xlink:show" , "embed" ) @@ -457,7 +471,8 @@ tableStyle :: Int -> [(Char,Double)] -> Doc tableStyle num wcs = let tableId = "Table" ++ show (num + 1) table = inTags True "style:style" - [("style:name", tableId)] $ + [("style:name", tableId) + ,("style:family", "table")] $ selfClosingTag "style:table-properties" [("table:align" , "center")] colStyle (c,0) = selfClosingTag "style:style" @@ -489,14 +504,16 @@ paraStyle parent attrs = do tight = if t then [ ("fo:margin-top" , "0in" ) , ("fo:margin-bottom" , "0in" )] else [] - indent = when (i /= 0 || b || t) $ - selfClosingTag "style:paragraph-properties" $ - [ ("fo:margin-left" , indentVal) + indent = if (i /= 0 || b) + then [ ("fo:margin-left" , indentVal) , ("fo:margin-right" , "0in" ) , ("fo:text-indent" , "0in" ) , ("style:auto-text-indent" , "false" )] - ++ tight - addParaStyle $ inTags True "style:style" (styleAttr ++ attrs) indent + else [] + attributes = indent ++ tight + paraProps = when (not $ null attributes) $ + selfClosingTag "style:paragraph-properties" attributes + addParaStyle $ inTags True "style:style" (styleAttr ++ attrs) paraProps return pn paraListStyle :: Int -> State WriterState Int @@ -517,7 +534,8 @@ paraTableStyles t s (a:xs) [ ("fo:text-align", x) , ("style:justify-single-word", "false")] -data TextStyle = Italic | Bold | Strike | Sub | Sup | SmallC deriving ( Eq,Ord ) +data TextStyle = Italic | Bold | Strike | Sub | Sup | SmallC | Pre + deriving ( Eq,Ord ) textStyleAttr :: TextStyle -> [(String,String)] textStyleAttr s @@ -531,5 +549,8 @@ textStyleAttr s | Sub <- s = [("style:text-position" ,"sub 58%" )] | Sup <- s = [("style:text-position" ,"super 58%" )] | SmallC <- s = [("fo:font-variant" ,"small-caps")] + | Pre <- s = [("style:font-name" ,"Courier New") + ,("style:font-name-asian" ,"Courier New") + ,("style:font-name-complex" ,"Courier New")] | otherwise = [] diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 40e8abf7e..87046537c 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -1,6 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2006-2010 Puneeth Chaganti <punchagan@gmail.com> +Copyright (C) 2010-2014 Puneeth Chaganti <punchagan@gmail.com> + and John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Org - Copyright : Copyright (C) 2010 Puneeth Chaganti + Copyright : Copyright (C) 2010-2014 Puneeth Chaganti and John MacFarlane License : GNU GPL, version 2 or above Maintainer : Puneeth Chaganti <punchagan@gmail.com> @@ -106,6 +107,14 @@ escapeString = escapeStringUsing $ blockToOrg :: Block -- ^ Block element -> State WriterState Doc blockToOrg Null = return empty +blockToOrg (Div attrs bs) = do + contents <- blockListToOrg bs + let startTag = tagWithAttrs "div" attrs + let endTag = text "</div>" + return $ blankline $$ "#+BEGIN_HTML" $$ + nest 2 startTag $$ "#+END_HTML" $$ blankline $$ + contents $$ blankline $$ "#+BEGIN_HTML" $$ + nest 2 endTag $$ "#+END_HTML" $$ blankline blockToOrg (Plain inlines) = inlineListToOrg inlines -- title beginning with fig: indicates that the image is a figure blockToOrg (Para [Image txt (src,'f':'i':'g':':':tit)]) = do @@ -121,7 +130,7 @@ blockToOrg (Para inlines) = do blockToOrg (RawBlock "html" str) = return $ blankline $$ "#+BEGIN_HTML" $$ nest 2 (text str) $$ "#+END_HTML" $$ blankline -blockToOrg (RawBlock f str) | f == "org" || f == "latex" || f == "tex" = +blockToOrg (RawBlock f str) | f `elem` ["org", "latex", "tex"] = return $ text str blockToOrg (RawBlock _ _) = return empty blockToOrg HorizontalRule = return $ blankline $$ "--------------" $$ blankline @@ -229,6 +238,8 @@ inlineListToOrg lst = mapM inlineToOrg lst >>= return . hcat -- | Convert Pandoc inline element to Org. inlineToOrg :: Inline -> State WriterState Doc +inlineToOrg (Span _ lst) = + inlineListToOrg lst inlineToOrg (Emph lst) = do contents <- inlineListToOrg lst return $ "/" <> contents <> "/" @@ -261,7 +272,7 @@ inlineToOrg (Math t str) = do else "$$" <> text str <> "$$" inlineToOrg (RawInline f str) | f == "tex" || f == "latex" = return $ text str inlineToOrg (RawInline _ _) = return empty -inlineToOrg (LineBreak) = return cr -- there's no line break in Org +inlineToOrg (LineBreak) = return (text "\\\\" <> cr) inlineToOrg Space = return space inlineToOrg (Link txt (src, _)) = do case txt of diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 606793842..31c97349b 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.RST - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -38,11 +38,11 @@ import Text.Pandoc.Writers.Shared import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Builder (deleteMeta) import Data.List ( isPrefixOf, intersperse, transpose ) -import Network.URI (isAbsoluteURI) +import Network.URI (isURI) import Text.Pandoc.Pretty import Control.Monad.State import Control.Applicative ( (<$>) ) -import Data.Char (isSpace) +import Data.Char (isSpace, toLower) type Refs = [([Inline], Target)] @@ -161,6 +161,11 @@ bordered contents c = blockToRST :: Block -- ^ Block element -> State WriterState Doc blockToRST Null = return empty +blockToRST (Div attr bs) = do + contents <- blockListToRST bs + let startTag = ".. raw:: html" $+$ nest 3 (tagWithAttrs "div" attr) + let endTag = ".. raw:: html" $+$ nest 3 "</div>" + return $ blankline <> startTag $+$ contents $+$ endTag $$ blankline blockToRST (Plain inlines) = inlineListToRST inlines -- title beginning with fig: indicates that the image is a figure blockToRST (Para [Image txt (src,'f':'i':'g':':':tit)]) = do @@ -175,9 +180,11 @@ blockToRST (Para inlines) | otherwise = do contents <- inlineListToRST inlines return $ contents <> blankline -blockToRST (RawBlock f str) = - return $ blankline <> ".. raw:: " <> text f $+$ - (nest 3 $ text str) $$ blankline +blockToRST (RawBlock f@(Format f') str) + | f == "rst" = return $ text str + | otherwise = return $ blankline <> ".. raw:: " <> + text (map toLower f') $+$ + (nest 3 $ text str) $$ blankline blockToRST HorizontalRule = return $ blankline $$ "--------------" $$ blankline blockToRST (Header level _ inlines) = do @@ -212,11 +219,15 @@ blockToRST (Table caption _ widths headers rows) = do else blankline <> text "Table: " <> caption' headers' <- mapM blockListToRST headers rawRows <- mapM (mapM blockListToRST) rows - let isSimple = all (==0) widths && all (all (\bs -> length bs <= 1)) rows + -- let isSimpleCell [Plain _] = True + -- isSimpleCell [Para _] = True + -- isSimpleCell [] = True + -- isSimpleCell _ = False + -- let isSimple = all (==0) widths && all (all isSimpleCell) rows let numChars = maximum . map offset opts <- get >>= return . stOptions let widthsInChars = - if isSimple + if all (== 0) widths then map ((+2) . numChars) $ transpose (headers' : rawRows) else map (floor . (fromIntegral (writerColumns opts) *)) widths let hpipeBlocks blocks = hcat [beg, middle, end] @@ -280,7 +291,7 @@ definitionListItemToRST (label, defs) = do label' <- inlineListToRST label contents <- liftM vcat $ mapM blockListToRST defs tabstop <- get >>= (return . writerTabStop . stOptions) - return $ label' $$ nest tabstop (contents <> cr) + return $ label' $$ nest tabstop (nestle contents <> cr) -- | Convert list of Pandoc block elements to RST. blockListToRST :: [Block] -- ^ List of block elements @@ -289,8 +300,14 @@ blockListToRST blocks = mapM blockToRST blocks >>= return . vcat -- | Convert list of Pandoc inline elements to RST. inlineListToRST :: [Inline] -> State WriterState Doc -inlineListToRST lst = mapM inlineToRST (insertBS lst) >>= return . hcat - where insertBS :: [Inline] -> [Inline] -- insert '\ ' where needed +inlineListToRST lst = + mapM inlineToRST (removeSpaceAfterDisplayMath $ insertBS lst) >>= return . hcat + where -- remove spaces after displaymath, as they screw up indentation: + removeSpaceAfterDisplayMath (Math DisplayMath x : zs) = + Math DisplayMath x : dropWhile (==Space) zs + removeSpaceAfterDisplayMath (x:xs) = x : removeSpaceAfterDisplayMath xs + removeSpaceAfterDisplayMath [] = [] + insertBS :: [Inline] -> [Inline] -- insert '\ ' where needed insertBS (x:y:z:zs) | isComplex y && surroundComplex x z = x : y : RawInline "rst" "\\ " : insertBS (z:zs) @@ -338,6 +355,7 @@ inlineListToRST lst = mapM inlineToRST (insertBS lst) >>= return . hcat -- | Convert Pandoc inline element to RST. inlineToRST :: Inline -> State WriterState Doc +inlineToRST (Span _ ils) = inlineListToRST ils inlineToRST (Emph lst) = do contents <- inlineListToRST lst return $ "*" <> contents <> "*" @@ -372,13 +390,14 @@ inlineToRST (Math t str) = do then blankline $$ ".. math::" $$ blankline $$ nest 3 (text str) $$ blankline else blankline $$ (".. math:: " <> text str) $$ blankline -inlineToRST (RawInline "rst" x) = return $ text x -inlineToRST (RawInline _ _) = return empty +inlineToRST (RawInline f x) + | f == "rst" = return $ text x + | otherwise = return empty inlineToRST (LineBreak) = return cr -- there's no line break in RST (see Para) inlineToRST Space = return space -- autolink inlineToRST (Link [Str str] (src, _)) - | isAbsoluteURI src && + | isURI src && if "mailto:" `isPrefixOf` src then src == escapeURI ("mailto:" ++ str) else src == escapeURI str = do diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 0db1c52c4..e0428aaa8 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.RTF - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -34,13 +34,13 @@ import Text.Pandoc.Shared import Text.Pandoc.Writers.Shared import Text.Pandoc.Readers.TeXMath import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Generic (bottomUpM) +import Text.Pandoc.Walk import Data.List ( isSuffixOf, intercalate ) import Data.Char ( ord, chr, isDigit, toLower ) import System.FilePath ( takeExtension ) import qualified Data.ByteString as B import Text.Printf ( printf ) -import Network.URI ( isAbsoluteURI, unEscapeString ) +import Network.URI ( isURI, unEscapeString ) import qualified Control.Exception as E -- | Convert Image inlines into a raw RTF embedded image, read from a file. @@ -48,7 +48,7 @@ import qualified Control.Exception as E rtfEmbedImage :: Inline -> IO Inline rtfEmbedImage x@(Image _ (src,_)) = do let ext = map toLower (takeExtension src) - if ext `elem` [".jpg",".jpeg",".png"] && not (isAbsoluteURI src) + if ext `elem` [".jpg",".jpeg",".png"] && not (isURI src) then do let src' = unEscapeString src imgdata <- E.catch (B.readFile src') @@ -62,7 +62,7 @@ rtfEmbedImage x@(Image _ (src,_)) = do let raw = "{\\pict" ++ filetype ++ " " ++ concat bytes ++ "}" return $ if B.null imgdata then x - else RawInline "rtf" raw + else RawInline (Format "rtf") raw else return x rtfEmbedImage x = return x @@ -70,7 +70,7 @@ rtfEmbedImage x = return x -- images embedded as encoded binary data. writeRTFWithEmbeddedImages :: WriterOptions -> Pandoc -> IO String writeRTFWithEmbeddedImages options doc = - writeRTF options `fmap` bottomUpM rtfEmbedImage doc + writeRTF options `fmap` walkM rtfEmbedImage doc -- | Convert Pandoc to a string in rich text format. writeRTF :: WriterOptions -> Pandoc -> String @@ -208,6 +208,8 @@ blockToRTF :: Int -- ^ indent level -> Block -- ^ block to convert -> String blockToRTF _ _ Null = "" +blockToRTF indent alignment (Div _ bs) = + concatMap (blockToRTF indent alignment) bs blockToRTF indent alignment (Plain lst) = rtfCompact indent 0 alignment $ inlineListToRTF lst blockToRTF indent alignment (Para lst) = @@ -216,8 +218,9 @@ blockToRTF indent alignment (BlockQuote lst) = concatMap (blockToRTF (indent + indentIncrement) alignment) lst blockToRTF indent _ (CodeBlock _ str) = rtfPar indent 0 AlignLeft ("\\f1 " ++ (codeStringToRTF str)) -blockToRTF _ _ (RawBlock "rtf" str) = str -blockToRTF _ _ (RawBlock _ _) = "" +blockToRTF _ _ (RawBlock f str) + | f == Format "rtf" = str + | otherwise = "" blockToRTF indent alignment (BulletList lst) = spaceAtEnd $ concatMap (listItemToRTF alignment indent (bulletMarker indent)) lst blockToRTF indent alignment (OrderedList attribs lst) = spaceAtEnd $ concat $ @@ -256,7 +259,7 @@ tableRowToRTF header indent aligns sizes' cols = tableItemToRTF :: Int -> Alignment -> [Block] -> String tableItemToRTF indent alignment item = let contents = concatMap (blockToRTF indent alignment) item - in "{\\intbl " ++ contents ++ "\\cell}\n" + in "{" ++ substitute "\\pard" "\\pard\\intbl" contents ++ "\\cell}\n" -- | Ensure that there's the same amount of space after compact -- lists as after regular lists. @@ -308,6 +311,7 @@ inlineListToRTF lst = concatMap inlineToRTF lst -- | Convert inline item to RTF. inlineToRTF :: Inline -- ^ inline to convert -> String +inlineToRTF (Span _ lst) = inlineListToRTF lst inlineToRTF (Emph lst) = "{\\i " ++ (inlineListToRTF lst) ++ "}" inlineToRTF (Strong lst) = "{\\b " ++ (inlineListToRTF lst) ++ "}" inlineToRTF (Strikeout lst) = "{\\strike " ++ (inlineListToRTF lst) ++ "}" @@ -320,10 +324,11 @@ inlineToRTF (Quoted DoubleQuote lst) = "\\u8220\"" ++ (inlineListToRTF lst) ++ "\\u8221\"" inlineToRTF (Code _ str) = "{\\f1 " ++ (codeStringToRTF str) ++ "}" inlineToRTF (Str str) = stringToRTF str -inlineToRTF (Math _ str) = inlineListToRTF $ readTeXMath str +inlineToRTF (Math t str) = inlineListToRTF $ readTeXMath' t str inlineToRTF (Cite _ lst) = inlineListToRTF lst -inlineToRTF (RawInline "rtf" str) = str -inlineToRTF (RawInline _ _) = "" +inlineToRTF (RawInline f str) + | f == Format "rtf" = str + | otherwise = "" inlineToRTF (LineBreak) = "\\line " inlineToRTF Space = " " inlineToRTF (Link text (src, _)) = diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index c6c30d070..800e741a4 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -1,5 +1,6 @@ +{-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2013 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2013-2014 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Shared - Copyright : Copyright (C) 2013 John MacFarlane + Copyright : Copyright (C) 2013-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -32,9 +33,13 @@ module Text.Pandoc.Writers.Shared ( , getField , setField , defField + , tagWithAttrs + , fixDisplayMath ) where import Text.Pandoc.Definition +import Text.Pandoc.Pretty +import Text.Pandoc.XML (escapeStringForXML) import Control.Monad (liftM) import Text.Pandoc.Options (WriterOptions(..)) import qualified Data.HashMap.Strict as H @@ -42,6 +47,7 @@ import qualified Data.Map as M import qualified Data.Text as T import Data.Aeson (FromJSON(..), fromJSON, ToJSON (..), Value(Object), Result(..)) import qualified Data.Traversable as Traversable +import Data.List ( groupBy ) -- | Create JSON value for template from a 'Meta' and an association list -- of variables, specified at the command line or in the writer. @@ -61,8 +67,7 @@ metaToJSON opts blockWriter inlineWriter (Meta metamap) renderedMap <- Traversable.mapM (metaValueToJSON blockWriter inlineWriter) metamap - return $ M.foldWithKey (\key val obj -> defField key val obj) - baseContext renderedMap + return $ M.foldWithKey defField baseContext renderedMap | otherwise = return (Object H.empty) metaValueToJSON :: Monad m @@ -74,6 +79,7 @@ metaValueToJSON blockWriter inlineWriter (MetaMap metamap) = liftM toJSON $ Traversable.mapM (metaValueToJSON blockWriter inlineWriter) metamap metaValueToJSON blockWriter inlineWriter (MetaList xs) = liftM toJSON $ Traversable.mapM (metaValueToJSON blockWriter inlineWriter) xs +metaValueToJSON _ _ (MetaBool b) = return $ toJSON b metaValueToJSON _ _ (MetaString s) = return $ toJSON s metaValueToJSON blockWriter _ (MetaBlocks bs) = liftM toJSON $ blockWriter bs metaValueToJSON _ inlineWriter (MetaInlines bs) = liftM toJSON $ inlineWriter bs @@ -119,3 +125,41 @@ defField field val (Object hashmap) = where f _newval oldval = oldval defField _ _ x = x +-- Produce an HTML tag with the given pandoc attributes. +tagWithAttrs :: String -> Attr -> Doc +tagWithAttrs tag (ident,classes,kvs) = hsep + ["<" <> text tag + ,if null ident + then empty + else "id=" <> doubleQuotes (text ident) + ,if null classes + then empty + else "class=" <> doubleQuotes (text (unwords classes)) + ,hsep (map (\(k,v) -> text k <> "=" <> + doubleQuotes (text (escapeStringForXML v))) kvs) + ] <> ">" + +isDisplayMath :: Inline -> Bool +isDisplayMath (Math DisplayMath _) = True +isDisplayMath _ = False + +stripLeadingTrailingSpace :: [Inline] -> [Inline] +stripLeadingTrailingSpace = go . reverse . go . reverse + where go (Space:xs) = xs + go xs = xs + +-- Put display math in its own block (for ODT/DOCX). +fixDisplayMath :: Block -> Block +fixDisplayMath (Plain lst) + | any isDisplayMath lst && not (all isDisplayMath lst) = + -- chop into several paragraphs so each displaymath is its own + Div ("",["math"],[]) $ map (Plain . stripLeadingTrailingSpace) $ + groupBy (\x y -> (isDisplayMath x && isDisplayMath y) || + not (isDisplayMath x || isDisplayMath y)) lst +fixDisplayMath (Para lst) + | any isDisplayMath lst && not (all isDisplayMath lst) = + -- chop into several paragraphs so each displaymath is its own + Div ("",["math"],[]) $ map (Para . stripLeadingTrailingSpace) $ + groupBy (\x y -> (isDisplayMath x && isDisplayMath y) || + not (isDisplayMath x || isDisplayMath y)) lst +fixDisplayMath x = x diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index 0f57d14b2..8ac717bab 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -1,5 +1,6 @@ +{-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2008-2010 John MacFarlane and Peter Wang +Copyright (C) 2008-2014 John MacFarlane and Peter Wang This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Texinfo - Copyright : Copyright (C) 2008-2010 John MacFarlane and Peter Wang + Copyright : Copyright (C) 2008-2014 John MacFarlane and Peter Wang License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -39,7 +40,7 @@ import Data.Ord ( comparing ) import Data.Char ( chr, ord ) import Control.Monad.State import Text.Pandoc.Pretty -import Network.URI ( isAbsoluteURI, unEscapeString ) +import Network.URI ( isURI, unEscapeString ) import System.FilePath data WriterState = @@ -123,6 +124,8 @@ blockToTexinfo :: Block -- ^ Block to convert blockToTexinfo Null = return empty +blockToTexinfo (Div _ bs) = blockListToTexinfo bs + blockToTexinfo (Plain lst) = inlineListToTexinfo lst @@ -150,10 +153,11 @@ blockToTexinfo (CodeBlock _ str) = do flush (text str) $$ text "@end verbatim" <> blankline -blockToTexinfo (RawBlock "texinfo" str) = return $ text str -blockToTexinfo (RawBlock "latex" str) = - return $ text "@tex" $$ text str $$ text "@end tex" -blockToTexinfo (RawBlock _ _) = return empty +blockToTexinfo (RawBlock f str) + | f == "texinfo" = return $ text str + | f == "latex" || f == "tex" = + return $ text "@tex" $$ text str $$ text "@end tex" + | otherwise = return empty blockToTexinfo (BulletList lst) = do items <- mapM listItemToTexinfo lst @@ -289,7 +293,7 @@ blockListToTexinfo (x:xs) = do case x of Header level _ _ -> do -- We need need to insert a menu for this node. - let (before, after) = break isHeader xs + let (before, after) = break isHeaderBlock xs before' <- blockListToTexinfo before let menu = if level < 4 then collectNodes (level + 1) after @@ -311,10 +315,6 @@ blockListToTexinfo (x:xs) = do xs' <- blockListToTexinfo xs return $ x' $$ xs' -isHeader :: Block -> Bool -isHeader (Header _ _ _) = True -isHeader _ = False - collectNodes :: Int -> [Block] -> [Block] collectNodes _ [] = [] collectNodes level (x:xs) = @@ -374,6 +374,9 @@ disallowedInNode c = c `elem` ".,:()" inlineToTexinfo :: Inline -- ^ Inline to convert -> State WriterState Doc +inlineToTexinfo (Span _ lst) = + inlineListToTexinfo lst + inlineToTexinfo (Emph lst) = inlineListToTexinfo lst >>= return . inCmd "emph" @@ -413,10 +416,11 @@ inlineToTexinfo (Cite _ lst) = inlineListToTexinfo lst inlineToTexinfo (Str str) = return $ text (stringToTexinfo str) inlineToTexinfo (Math _ str) = return $ inCmd "math" $ text str -inlineToTexinfo (RawInline f str) | f == "latex" || f == "tex" = - return $ text "@tex" $$ text str $$ text "@end tex" -inlineToTexinfo (RawInline "texinfo" str) = return $ text str -inlineToTexinfo (RawInline _ _) = return empty +inlineToTexinfo (RawInline f str) + | f == "latex" || f == "tex" = + return $ text "@tex" $$ text str $$ text "@end tex" + | f == "texinfo" = return $ text str + | otherwise = return empty inlineToTexinfo (LineBreak) = return $ text "@*" inlineToTexinfo Space = return $ char ' ' @@ -440,7 +444,7 @@ inlineToTexinfo (Image alternate (source, _)) = do where ext = drop 1 $ takeExtension source' base = dropExtension source' - source' = if isAbsoluteURI source + source' = if isURI source then source else unEscapeString source diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index 3288ce222..3a6982a01 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2010-2014 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Textile - Copyright : Copyright (C) 2010 John MacFarlane + Copyright : Copyright (C) 2010-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -33,6 +33,7 @@ module Text.Pandoc.Writers.Textile ( writeTextile ) where import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Shared +import Text.Pandoc.Pretty (render) import Text.Pandoc.Writers.Shared import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.XML ( escapeStringForXML ) @@ -50,7 +51,7 @@ data WriterState = WriterState { writeTextile :: WriterOptions -> Pandoc -> String writeTextile opts document = evalState (pandocToTextile opts document) - (WriterState { stNotes = [], stListLevel = [], stUseTags = False }) + WriterState { stNotes = [], stListLevel = [], stUseTags = False } -- | Return Textile representation of document. pandocToTextile :: WriterOptions -> Pandoc -> State WriterState String @@ -101,6 +102,12 @@ blockToTextile :: WriterOptions -- ^ Options blockToTextile _ Null = return "" +blockToTextile opts (Div attr bs) = do + let startTag = render Nothing $ tagWithAttrs "div" attr + let endTag = "</div>" + contents <- blockListToTextile opts bs + return $ startTag ++ "\n\n" ++ contents ++ "\n\n" ++ endTag ++ "\n" + blockToTextile opts (Plain inlines) = inlineListToTextile opts inlines @@ -118,10 +125,9 @@ blockToTextile opts (Para inlines) = do then "<p>" ++ contents ++ "</p>" else contents ++ if null listLevel then "\n" else "" -blockToTextile _ (RawBlock f str) = - if f == "html" || f == "textile" - then return str - else return "" +blockToTextile _ (RawBlock f str) + | f == Format "html" || f == Format "textile" = return str + | otherwise = return "" blockToTextile _ HorizontalRule = return "<hr />\n" @@ -343,6 +349,9 @@ inlineListToTextile opts lst = -- | Convert Pandoc inline element to Textile. inlineToTextile :: WriterOptions -> Inline -> State WriterState String +inlineToTextile opts (Span _ lst) = + inlineListToTextile opts lst + inlineToTextile opts (Emph lst) = do contents <- inlineListToTextile opts lst return $ if '_' `elem` contents @@ -395,10 +404,9 @@ inlineToTextile _ (Str str) = return $ escapeStringForTextile str inlineToTextile _ (Math _ str) = return $ "<span class=\"math\">" ++ escapeStringForXML str ++ "</math>" -inlineToTextile _ (RawInline f str) = - if f == "html" || f == "textile" - then return str - else return "" +inlineToTextile _ (RawInline f str) + | f == Format "html" || f == Format "textile" = return str + | otherwise = return "" inlineToTextile _ (LineBreak) = return "\n" |