diff options
Diffstat (limited to 'src/Text/Pandoc/Writers')
-rw-r--r-- | src/Text/Pandoc/Writers/AsciiDoc.hs | 6 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/CommonMark.hs | 178 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/ConTeXt.hs | 24 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Custom.hs | 16 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docbook.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docx.hs | 440 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/DokuWiki.hs | 16 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/EPUB.hs | 136 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/FB2.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/HTML.hs | 215 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/ICML.hs | 14 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/LaTeX.hs | 84 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Markdown.hs | 118 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/MediaWiki.hs | 8 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/ODT.hs | 16 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/OPML.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/OpenDocument.hs | 15 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/RST.hs | 82 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/RTF.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Texinfo.hs | 6 |
20 files changed, 1031 insertions, 355 deletions
diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index e5b8c5167..1c33b004a 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -126,7 +126,7 @@ blockToAsciiDoc :: WriterOptions -- ^ Options blockToAsciiDoc _ Null = return empty blockToAsciiDoc opts (Plain inlines) = do contents <- inlineListToAsciiDoc opts inlines - return $ contents <> cr + return $ contents <> blankline blockToAsciiDoc opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = do blockToAsciiDoc opts (Para [Image alt (src,tit)]) blockToAsciiDoc opts (Para inlines) = do @@ -272,7 +272,7 @@ bulletListItemToAsciiDoc opts blocks = do contents <- foldM addBlock empty blocks modify $ \s -> s{ bulletListLevel = lev } let marker = text (replicate lev '*') - return $ marker <> space <> contents <> cr + return $ marker <> text " " <> contents <> cr -- | Convert ordered list item (a list of blocks) to asciidoc. orderedListItemToAsciiDoc :: WriterOptions -- ^ options @@ -292,7 +292,7 @@ orderedListItemToAsciiDoc opts marker blocks = do modify $ \s -> s{ orderedListLevel = lev + 1 } contents <- foldM addBlock empty blocks modify $ \s -> s{ orderedListLevel = lev } - return $ text marker <> space <> contents <> cr + return $ text marker <> text " " <> contents <> cr -- | Convert definition list item (label, list of blocks) to asciidoc. definitionListItemToAsciiDoc :: WriterOptions diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs new file mode 100644 index 000000000..706b27175 --- /dev/null +++ b/src/Text/Pandoc/Writers/CommonMark.hs @@ -0,0 +1,178 @@ +{- +Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.CommonMark + Copyright : Copyright (C) 2015 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to CommonMark. + +CommonMark: <http://commonmark.org> +-} +module Text.Pandoc.Writers.CommonMark (writeCommonMark) where + +import Text.Pandoc.Writers.HTML (writeHtmlString) +import Text.Pandoc.Definition +import Text.Pandoc.Shared (isTightList) +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Writers.Shared +import Text.Pandoc.Options +import CMark +import qualified Data.Text as T +import Control.Monad.Identity (runIdentity, Identity) +import Control.Monad.State (runState, State, modify, get) +import Text.Pandoc.Walk (walkM) + +-- | Convert Pandoc to CommonMark. +writeCommonMark :: WriterOptions -> Pandoc -> String +writeCommonMark opts (Pandoc meta blocks) = rendered + where main = runIdentity $ blocksToCommonMark opts (blocks' ++ notes') + (blocks', notes) = runState (walkM processNotes blocks) [] + notes' = if null notes + then [] + else [OrderedList (1, Decimal, Period) $ reverse notes] + metadata = runIdentity $ metaToJSON opts + (blocksToCommonMark opts) + (inlinesToCommonMark opts) + meta + context = defField "body" main $ metadata + rendered = if writerStandalone opts + then renderTemplate' (writerTemplate opts) context + else main + +processNotes :: Inline -> State [[Block]] Inline +processNotes (Note bs) = do + modify (bs :) + notes <- get + return $ Str $ "[" ++ show (length notes) ++ "]" +processNotes x = return x + +node :: NodeType -> [Node] -> Node +node = Node Nothing + +blocksToCommonMark :: WriterOptions -> [Block] -> Identity String +blocksToCommonMark opts bs = return $ + T.unpack $ nodeToCommonmark cmarkOpts colwidth + $ node DOCUMENT (blocksToNodes bs) + where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] + colwidth = if writerWrapText opts + then writerColumns opts + else 0 + +inlinesToCommonMark :: WriterOptions -> [Inline] -> Identity String +inlinesToCommonMark opts ils = return $ + T.unpack $ nodeToCommonmark cmarkOpts colwidth + $ node PARAGRAPH (inlinesToNodes ils) + where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] + colwidth = if writerWrapText opts + then writerColumns opts + else 0 + +blocksToNodes :: [Block] -> [Node] +blocksToNodes = foldr blockToNodes [] + +blockToNodes :: Block -> [Node] -> [Node] +blockToNodes (Plain xs) = (node PARAGRAPH (inlinesToNodes xs) :) +blockToNodes (Para xs) = (node PARAGRAPH (inlinesToNodes xs) :) +blockToNodes (CodeBlock (_,classes,_) xs) = + (node (CODE_BLOCK (T.pack (unwords classes)) (T.pack xs)) [] :) +blockToNodes (RawBlock fmt xs) + | fmt == Format "html" = (node (HTML (T.pack xs)) [] :) + | otherwise = id +blockToNodes (BlockQuote bs) = + (node BLOCK_QUOTE (blocksToNodes bs) :) +blockToNodes (BulletList items) = + (node (LIST ListAttributes{ + listType = BULLET_LIST, + listDelim = PERIOD_DELIM, + listTight = isTightList items, + listStart = 1 }) (map (node ITEM . blocksToNodes) items) :) +blockToNodes (OrderedList (start, _sty, delim) items) = + (node (LIST ListAttributes{ + listType = ORDERED_LIST, + listDelim = case delim of + OneParen -> PAREN_DELIM + TwoParens -> PAREN_DELIM + _ -> PERIOD_DELIM, + listTight = isTightList items, + listStart = start }) (map (node ITEM . blocksToNodes) items) :) +blockToNodes HorizontalRule = (node HRULE [] :) +blockToNodes (Header lev _ ils) = (node (HEADER lev) (inlinesToNodes ils) :) +blockToNodes (Div _ bs) = (blocksToNodes bs ++) +blockToNodes (DefinitionList items) = blockToNodes (BulletList items') + where items' = map dlToBullet items + dlToBullet (term, ((Para xs : ys) : zs)) = + Para (term ++ [LineBreak] ++ xs) : ys ++ concat zs + dlToBullet (term, ((Plain xs : ys) : zs)) = + Plain (term ++ [LineBreak] ++ xs) : ys ++ concat zs + dlToBullet (term, xs) = + Para term : concat xs +blockToNodes t@(Table _ _ _ _ _) = + (node (HTML (T.pack $! writeHtmlString def $! Pandoc nullMeta [t])) [] :) +blockToNodes Null = id + +inlinesToNodes :: [Inline] -> [Node] +inlinesToNodes = foldr inlineToNodes [] + +inlineToNodes :: Inline -> [Node] -> [Node] +inlineToNodes (Str s) = (node (TEXT (T.pack s)) [] :) +inlineToNodes Space = (node (TEXT (T.pack " ")) [] :) +inlineToNodes LineBreak = (node LINEBREAK [] :) +inlineToNodes (Emph xs) = (node EMPH (inlinesToNodes xs) :) +inlineToNodes (Strong xs) = (node STRONG (inlinesToNodes xs) :) +inlineToNodes (Strikeout xs) = + ((node (INLINE_HTML (T.pack "<s>")) [] : inlinesToNodes xs ++ + [node (INLINE_HTML (T.pack "</s>")) []]) ++ ) +inlineToNodes (Superscript xs) = + ((node (INLINE_HTML (T.pack "<sub>")) [] : inlinesToNodes xs ++ + [node (INLINE_HTML (T.pack "</sub>")) []]) ++ ) +inlineToNodes (Subscript xs) = + ((node (INLINE_HTML (T.pack "<sup>")) [] : inlinesToNodes xs ++ + [node (INLINE_HTML (T.pack "</sup>")) []]) ++ ) +inlineToNodes (SmallCaps xs) = + ((node (INLINE_HTML (T.pack "<span style=\"font-variant:small-caps;\">")) [] + : inlinesToNodes xs ++ + [node (INLINE_HTML (T.pack "</span>")) []]) ++ ) +inlineToNodes (Link ils (url,tit)) = + (node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes ils) :) +inlineToNodes (Image ils (url,tit)) = + (node (IMAGE (T.pack url) (T.pack tit)) (inlinesToNodes ils) :) +inlineToNodes (RawInline fmt xs) + | fmt == Format "html" = (node (INLINE_HTML (T.pack xs)) [] :) + | otherwise = id +inlineToNodes (Quoted qt ils) = + ((node (TEXT start) [] : inlinesToNodes ils ++ [node (TEXT end) []]) ++) + where (start, end) = case qt of + SingleQuote -> (T.pack "‘", T.pack "’") + DoubleQuote -> (T.pack "“", T.pack "”") +inlineToNodes (Code _ str) = (node (CODE (T.pack str)) [] :) +inlineToNodes (Math mt str) = + case mt of + InlineMath -> + (node (INLINE_HTML (T.pack ("\\(" ++ str ++ "\\)"))) [] :) + DisplayMath -> + (node (INLINE_HTML (T.pack ("\\[" ++ str ++ "\\]"))) [] :) +inlineToNodes (Span _ ils) = (inlinesToNodes ils ++) +inlineToNodes (Cite _ ils) = (inlinesToNodes ils ++) +inlineToNodes (Note _) = id -- should not occur +-- we remove Note elements in preprocessing diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index bbca7f858..edfb4d0ff 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -36,6 +36,7 @@ import Text.Pandoc.Options import Text.Pandoc.Walk (query) import Text.Printf ( printf ) import Data.List ( intercalate ) +import Data.Char ( ord ) import Control.Monad.State import Text.Pandoc.Pretty import Text.Pandoc.Templates ( renderTemplate' ) @@ -114,6 +115,13 @@ escapeCharForConTeXt opts ch = stringToConTeXt :: WriterOptions -> String -> String stringToConTeXt opts = concatMap (escapeCharForConTeXt opts) +-- | Sanitize labels +toLabel :: String -> String +toLabel z = concatMap go z + where go x + | elem x ("\\#[]\",{}%()|=" :: String) = "ux" ++ printf "%x" (ord x) + | otherwise = [x] + -- | Convert Elements to ConTeXt elementToConTeXt :: WriterOptions -> Element -> State WriterState Doc elementToConTeXt _ (Blk block) = blockToConTeXt block @@ -286,15 +294,16 @@ inlineToConTeXt Space = return space -- Handle HTML-like internal document references to sections inlineToConTeXt (Link txt (('#' : ref), _)) = do opts <- gets stOptions - label <- inlineListToConTeXt txt + contents <- inlineListToConTeXt txt + let ref' = toLabel $ stringToConTeXt opts ref return $ text "\\in" <> braces (if writerNumberSections opts - then label <+> text "(\\S" - else label) -- prefix + then contents <+> text "(\\S" + else contents) -- prefix <> braces (if writerNumberSections opts then text ")" else empty) -- suffix - <> brackets (text ref) + <> brackets (text ref') inlineToConTeXt (Link txt (src, _)) = do let isAutolink = txt == [Str (unEscapeString src)] @@ -302,13 +311,13 @@ inlineToConTeXt (Link txt (src, _)) = do let next = stNextRef st put $ st {stNextRef = next + 1} let ref = "url" ++ show next - label <- inlineListToConTeXt txt + contents <- inlineListToConTeXt txt return $ "\\useURL" <> brackets (text ref) <> brackets (text $ escapeStringUsing [('#',"\\#"),('%',"\\%")] src) <> (if isAutolink then empty - else brackets empty <> brackets label) + else brackets empty <> brackets contents) <> "\\from" <> brackets (text ref) inlineToConTeXt (Image _ (src, _)) = do @@ -337,6 +346,7 @@ sectionHeader (ident,classes,_) hdrLevel lst = do st <- get let opts = stOptions st let level' = if writerChapters opts then hdrLevel - 1 else hdrLevel + let ident' = toLabel ident let (section, chapter) = if "unnumbered" `elem` classes then (text "subject", text "title") else (text "section", text "chapter") @@ -344,7 +354,7 @@ sectionHeader (ident,classes,_) hdrLevel lst = do then char '\\' <> text (concat (replicate (level' - 1) "sub")) <> section - <> (if (not . null) ident then brackets (text ident) else empty) + <> (if (not . null) ident' then brackets (text ident') else empty) <> braces contents <> blankline else if level' == 0 diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index 914d61850..6fc3b9b3c 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -1,5 +1,5 @@ {-# LANGUAGE OverlappingInstances, FlexibleInstances, OverloadedStrings, - ScopedTypeVariables #-} + ScopedTypeVariables, DeriveDataTypeable #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {- Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu> @@ -35,6 +35,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Options import Data.List ( intersperse ) import Data.Char ( toLower ) +import Data.Typeable import Scripting.Lua (LuaState, StackValue, callfunc) import Text.Pandoc.Writers.Shared import qualified Scripting.Lua as Lua @@ -42,6 +43,8 @@ import Text.Pandoc.UTF8 (fromString, toString) import Data.ByteString (ByteString) import qualified Data.ByteString.Char8 as C8 import Data.Monoid +import Control.Monad (when) +import Control.Exception import qualified Data.Map as M import Text.Pandoc.Templates @@ -145,13 +148,22 @@ instance StackValue Citation where peek = undefined valuetype _ = Lua.TTABLE +data PandocLuaException = PandocLuaException String + deriving (Show, Typeable) + +instance Exception PandocLuaException + -- | Convert Pandoc to custom markup. writeCustom :: FilePath -> WriterOptions -> Pandoc -> IO String writeCustom luaFile opts doc@(Pandoc meta _) = do luaScript <- C8.unpack `fmap` C8.readFile luaFile lua <- Lua.newstate Lua.openlibs lua - Lua.loadstring lua luaScript "custom" + status <- Lua.loadstring lua luaScript luaFile + -- check for error in lua script (later we'll change the return type + -- to handle this more gracefully): + when (status /= 0) $ + Lua.tostring lua 1 >>= throw . PandocLuaException Lua.call lua 0 0 -- TODO - call hierarchicalize, so we have that info rendered <- docToCustom lua opts doc diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index b10317506..19f8f2f11 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -114,7 +114,8 @@ elementToDocbook opts lvl (Sec _ _num (id',_,_) title elements) = n | n == 0 -> "chapter" | n >= 1 && n <= 5 -> "sect" ++ show n | otherwise -> "simplesect" - in inTags True tag [("id", writerIdentifierPrefix opts ++ id')] $ + in inTags True tag [("id", writerIdentifierPrefix opts ++ id') | + not (null id')] $ inTagsSimple "title" (inlinesToDocbook opts title) $$ vcat (map (elementToDocbook opts (lvl + 1)) elements') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 09321d1cc..4809d2a14 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE ScopedTypeVariables, PatternGuards #-} +{-# LANGUAGE ScopedTypeVariables, PatternGuards, ViewPatterns #-} {- Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu> @@ -41,7 +41,7 @@ import Data.Time.Clock.POSIX import Data.Time.Clock import Data.Time.Format import System.Environment -import System.Locale +import Text.Pandoc.Compat.Locale (defaultTimeLocale) import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.ImageSize @@ -52,8 +52,10 @@ import Text.Pandoc.Readers.TeXMath import Text.Pandoc.Highlighting ( highlight ) import Text.Pandoc.Walk import Text.Highlighting.Kate.Types () -import Text.XML.Light +import Text.XML.Light as XML import Text.TeXMath +import Text.Pandoc.Readers.Docx.StyleMap +import Text.Pandoc.Readers.Docx.Util (elemName) import Control.Monad.State import Text.Highlighting.Kate import Data.Unique (hashUnique, newUnique) @@ -62,8 +64,9 @@ import Text.Printf (printf) import qualified Control.Exception as E import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef, extensionFromMimeType) -import Control.Applicative ((<$>), (<|>)) -import Data.Maybe (fromMaybe, mapMaybe) +import Control.Applicative ((<$>), (<|>), (<*>)) +import Data.Maybe (fromMaybe, mapMaybe, maybeToList) +import Data.Char (ord) data ListMarker = NoMarker | BulletMarker @@ -104,13 +107,17 @@ data WriterState = WriterState{ , stInDel :: Bool , stChangesAuthor :: String , stChangesDate :: String + , stPrintWidth :: Integer + , stStyleMaps :: StyleMaps + , stFirstPara :: Bool + , stTocTitle :: [Inline] } defaultWriterState :: WriterState defaultWriterState = WriterState{ stTextProperties = [] , stParaProperties = [] - , stFootnotes = [] + , stFootnotes = defaultFootnotes , stSectionIds = [] , stExternalLinks = M.empty , stImages = M.empty @@ -122,6 +129,10 @@ defaultWriterState = WriterState{ , stInDel = False , stChangesAuthor = "unknown" , stChangesDate = "1969-12-31T19:00:00Z" + , stPrintWidth = 1 + , stStyleMaps = defaultStyleMaps + , stFirstPara = False + , stTocTitle = normalizeInlines [Str "Table of Contents"] } type WS a = StateT WriterState IO a @@ -143,24 +154,106 @@ renderXml :: Element -> BL.ByteString renderXml elt = BL8.pack "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" <> UTF8.fromStringLazy (showElement elt) +renumIdMap :: Int -> [Element] -> M.Map String String +renumIdMap _ [] = M.empty +renumIdMap n (e:es) + | Just oldId <- findAttr (QName "Id" Nothing Nothing) e = + M.insert oldId ("rId" ++ (show n)) (renumIdMap (n+1) es) + | otherwise = renumIdMap n es + +replaceAttr :: (QName -> Bool) -> String -> [XML.Attr] -> [XML.Attr] +replaceAttr _ _ [] = [] +replaceAttr f val (a:as) | f (attrKey a) = + (XML.Attr (attrKey a) val) : (replaceAttr f val as) + | otherwise = a : (replaceAttr f val as) + +renumId :: (QName -> Bool) -> (M.Map String String) -> Element -> Element +renumId f renumMap e + | Just oldId <- findAttrBy f e + , Just newId <- M.lookup oldId renumMap = + let attrs' = replaceAttr f newId (elAttribs e) + in + e { elAttribs = attrs' } + | otherwise = e + +renumIds :: (QName -> Bool) -> (M.Map String String) -> [Element] -> [Element] +renumIds f renumMap = map (renumId f renumMap) + +-- | Certain characters are invalid in XML even if escaped. +-- See #1992 +stripInvalidChars :: Pandoc -> Pandoc +stripInvalidChars = bottomUp (filter isValidChar) + +-- | See XML reference +isValidChar :: Char -> Bool +isValidChar (ord -> c) + | c == 0x9 = True + | c == 0xA = True + | c == 0xD = True + | 0x20 <= c && c <= 0xD7FF = True + | 0xE000 <= c && c <= 0xFFFD = True + | 0x10000 <= c && c <= 0x10FFFF = True + | otherwise = False + +metaValueToInlines :: MetaValue -> [Inline] +metaValueToInlines (MetaString s) = normalizeInlines [Str s] +metaValueToInlines (MetaInlines ils) = ils +metaValueToInlines (MetaBlocks bs) = query return bs +metaValueToInlines (MetaBool b) = [Str $ show b] +metaValueToInlines _ = [] + -- | Produce an Docx file from a Pandoc document. writeDocx :: WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert -> IO BL.ByteString writeDocx opts doc@(Pandoc meta _) = do let datadir = writerUserDataDir opts - let doc' = walk fixDisplayMath doc + let doc' = stripInvalidChars . walk fixDisplayMath $ doc username <- lookup "USERNAME" <$> getEnvironment utctime <- getCurrentTime refArchive <- liftM (toArchive . toLazy) $ case writerReferenceDocx opts of Just f -> B.readFile f Nothing -> readDataFile datadir "reference.docx" - distArchive <- liftM (toArchive . toLazy) $ readDataFile Nothing "reference.docx" + distArchive <- liftM (toArchive . toLazy) $ readDataFile datadir "reference.docx" + + parsedDoc <- parseXml refArchive distArchive "word/document.xml" + let wname f qn = qPrefix qn == Just "w" && f (qName qn) + let mbsectpr = filterElementName (wname (=="sectPr")) parsedDoc + + -- Gets the template size + let mbpgsz = mbsectpr >>= (filterElementName (wname (=="pgSz"))) + let mbAttrSzWidth = (elAttribs <$> mbpgsz) >>= (lookupAttrBy ((=="w") . qName)) + + let mbpgmar = mbsectpr >>= (filterElementName (wname (=="pgMar"))) + let mbAttrMarLeft = (elAttribs <$> mbpgmar) >>= (lookupAttrBy ((=="left") . qName)) + let mbAttrMarRight = (elAttribs <$> mbpgmar) >>= (lookupAttrBy ((=="right") . qName)) + + -- Get the avaible area (converting the size and the margins to int and + -- doing the difference + let pgContentWidth = (-) <$> (read <$> mbAttrSzWidth ::Maybe Integer) + <*> ( + (+) <$> (read <$> mbAttrMarRight ::Maybe Integer) + <*> (read <$> mbAttrMarLeft ::Maybe Integer) + ) + + -- styles + let stylepath = "word/styles.xml" + styledoc <- parseXml refArchive distArchive stylepath + + -- parse styledoc for heading styles + let styleMaps = getStyleMaps styledoc + + let tocTitle = fromMaybe (stTocTitle defaultWriterState) $ + metaValueToInlines <$> lookupMeta "toc-title" meta ((contents, footnotes), st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc') defaultWriterState{ stChangesAuthor = fromMaybe "unknown" username - , stChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime} + , stChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime + , stPrintWidth = (maybe 420 (\x -> quot x 20) pgContentWidth) + , stStyleMaps = styleMaps + , stTocTitle = tocTitle + } let epochtime = floor $ utcTimeToPOSIXSeconds utctime let imgs = M.elems $ stImages st @@ -168,13 +261,6 @@ writeDocx opts doc@(Pandoc meta _) = do let toImageEntry (_,path,_,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img let imageEntries = map toImageEntry imgs - -- adjust contents to add sectPr from reference.docx - parsedDoc <- parseXml refArchive distArchive "word/document.xml" - let wname f qn = qPrefix qn == Just "w" && f (qName qn) - let mbsectpr = filterElementName (wname (=="sectPr")) parsedDoc - - let sectpr = fromMaybe (mknode "w:sectPr" [] ()) mbsectpr - let stdAttributes = [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main") ,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math") @@ -186,9 +272,6 @@ writeDocx opts doc@(Pandoc meta _) = do ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture") ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")] - let contents' = contents ++ [sectpr] - let docContents = mknode "w:document" stdAttributes - $ mknode "w:body" [] contents' parsedRels <- parseXml refArchive distArchive "word/_rels/document.xml.rels" let isHeaderNode e = findAttr (QName "Type" Nothing Nothing) e == Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/header" @@ -255,7 +338,7 @@ writeDocx opts doc@(Pandoc meta _) = do [("Type",url') ,("Id",id') ,("Target",target')] () - let baserels = map toBaseRel + let baserels' = map toBaseRel [("http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering", "rId1", "numbering.xml") @@ -277,8 +360,12 @@ writeDocx opts doc@(Pandoc meta _) = do ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes", "rId7", "footnotes.xml") - ] ++ - headers ++ footers + ] + + let idMap = renumIdMap (length baserels' + 1) (headers ++ footers) + let renumHeaders = renumIds (\q -> qName q == "Id") idMap headers + let renumFooters = renumIds (\q -> qName q == "Id") idMap footers + let baserels = baserels' ++ renumHeaders ++ renumFooters let toImgRel (ident,path,_,_,_) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] () let imgrels = map toImgRel imgs let toLinkRel (src,ident) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] () @@ -288,6 +375,23 @@ writeDocx opts doc@(Pandoc meta _) = do $ renderXml reldoc + -- adjust contents to add sectPr from reference.docx + let sectpr = case mbsectpr of + Just sectpr' -> let cs = renumIds + (\q -> qName q == "id" && qPrefix q == Just "r") + idMap + (elChildren sectpr') + in + add_attrs (elAttribs sectpr') $ mknode "w:sectPr" [] cs + Nothing -> (mknode "w:sectPr" [] ()) + + -- let sectpr = fromMaybe (mknode "w:sectPr" [] ()) mbsectpr' + let contents' = contents ++ [sectpr] + let docContents = mknode "w:document" stdAttributes + $ mknode "w:body" [] contents' + + + -- word/document.xml let contentEntry = toEntry "word/document.xml" epochtime $ renderXml docContents @@ -302,11 +406,18 @@ writeDocx opts doc@(Pandoc meta _) = do linkrels -- styles - let newstyles = styleToOpenXml $ writerHighlightStyle opts - let stylepath = "word/styles.xml" - styledoc <- parseXml refArchive distArchive stylepath - let styledoc' = styledoc{ elContent = elContent styledoc ++ - [Elem x | x <- newstyles, writerHighlight opts] } + let newstyles = styleToOpenXml styleMaps $ writerHighlightStyle opts + let styledoc' = styledoc{ elContent = modifyContent (elContent styledoc) } + where + modifyContent + | writerHighlight opts = (++ map Elem newstyles) + | otherwise = filter notTokStyle + notTokStyle (Elem el) = notStyle el || notTokId el + notTokStyle _ = True + notStyle = (/= elemName' "style") . elName + notTokId = maybe True (`notElem` tokStys) . findAttr (elemName' "styleId") + tokStys = "SourceCode" : map show (enumFromTo KeywordTok NormalTok) + elemName' = elemName (sNameSpaces styleMaps) "w" let styleEntry = toEntry stylepath epochtime $ renderXml styledoc' -- construct word/numbering.xml @@ -351,6 +462,17 @@ writeDocx opts doc@(Pandoc meta _) = do ] let relsEntry = toEntry relsPath epochtime $ renderXml rels + -- we use dist archive for settings.xml, because Word sometimes + -- adds references to footnotes or endnotes we don't have... + -- we do, however, copy some settings over from reference + let settingsPath = "word/settings.xml" + settingsList = [ "w:autoHyphenation" + , "w:consecutiveHyphenLimit" + , "w:hyphenationZone" + , "w:doNotHyphenateCap" + ] + settingsEntry <- copyChildren refArchive distArchive settingsPath epochtime settingsList + let entryFromArchive arch path = maybe (fail $ path ++ " corrupt or missing in reference docx") return @@ -358,9 +480,6 @@ writeDocx opts doc@(Pandoc meta _) = do docPropsAppEntry <- entryFromArchive refArchive "docProps/app.xml" themeEntry <- entryFromArchive refArchive "word/theme/theme1.xml" fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml" - -- we use dist archive for settings.xml, because Word sometimes - -- adds references to footnotes or endnotes we don't have... - settingsEntry <- entryFromArchive distArchive "word/settings.xml" webSettingsEntry <- entryFromArchive refArchive "word/webSettings.xml" headerFooterEntries <- mapM (entryFromArchive refArchive) $ mapMaybe (fmap ("word/" ++) . extractTarget) @@ -383,10 +502,13 @@ writeDocx opts doc@(Pandoc meta _) = do miscRelEntries ++ otherMediaEntries return $ fromArchive archive -styleToOpenXml :: Style -> [Element] -styleToOpenXml style = parStyle : map toStyle alltoktypes +styleToOpenXml :: StyleMaps -> Style -> [Element] +styleToOpenXml sm style = + maybeToList parStyle ++ mapMaybe toStyle alltoktypes where alltoktypes = enumFromTo KeywordTok NormalTok - toStyle toktype = mknode "w:style" [("w:type","character"), + toStyle toktype | hasStyleName (show toktype) (sCharStyleMap sm) = Nothing + | otherwise = Just $ + mknode "w:style" [("w:type","character"), ("w:customStyle","1"),("w:styleId",show toktype)] [ mknode "w:name" [("w:val",show toktype)] () , mknode "w:basedOn" [("w:val","VerbatimChar")] () @@ -407,17 +529,35 @@ styleToOpenXml style = parStyle : map toStyle alltoktypes tokBg toktype = maybe "auto" (drop 1 . fromColor) $ (tokenBackground =<< lookup toktype tokStyles) `mplus` backgroundColor style - parStyle = mknode "w:style" [("w:type","paragraph"), + parStyle | hasStyleName "Source Code" (sParaStyleMap sm) = Nothing + | otherwise = Just $ + mknode "w:style" [("w:type","paragraph"), ("w:customStyle","1"),("w:styleId","SourceCode")] [ mknode "w:name" [("w:val","Source Code")] () , mknode "w:basedOn" [("w:val","Normal")] () , mknode "w:link" [("w:val","VerbatimChar")] () , mknode "w:pPr" [] $ mknode "w:wordWrap" [("w:val","off")] () + : mknode "w:noProof" [] () : ( maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill",drop 1 $ fromColor col)] ()]) $ backgroundColor style ) ] +copyChildren :: Archive -> Archive -> String -> Integer -> [String] -> IO Entry +copyChildren refArchive distArchive path timestamp elNames = do + ref <- parseXml refArchive distArchive path + dist <- parseXml distArchive distArchive path + return $ toEntry path timestamp $ renderXml dist{ + elContent = elContent dist ++ copyContent ref + } + where + strName QName{qName=name, qPrefix=prefix} + | Just p <- prefix = p++":"++name + | otherwise = name + shouldCopy = (`elem` elNames) . strName + cleanElem el@Element{elName=name} = Elem el{elName=name{qURI=Nothing}} + copyContent = map cleanElem . filterChildrenName shouldCopy + -- this is the lowest number used for a list numId baseListId :: Int baseListId = 1000 @@ -495,6 +635,34 @@ mkLvl marker lvl = getNumId :: WS Int getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists +makeTOC :: WriterOptions -> WS [Element] +makeTOC opts | writerTableOfContents opts = do + let depth = "1-"++(show (writerTOCDepth opts)) + let tocCmd = "TOC \\o \""++depth++"\" \\h \\z \\u" + tocTitle <- gets stTocTitle + title <- withParaPropM (pStyleM "TOC Heading") (blocksToOpenXML opts [Para tocTitle]) + return $ + [mknode "w:sdt" [] ([ + mknode "w:sdtPr" [] ( + mknode "w:docPartObj" [] ( + [mknode "w:docPartGallery" [("w:val","Table of Contents")] (), + mknode "w:docPartUnique" [] ()] + ) -- w:docPartObj + ), -- w:sdtPr + mknode "w:sdtContent" [] (title++[ + mknode "w:p" [] ( + mknode "w:r" [] ([ + mknode "w:fldChar" [("w:fldCharType","begin"),("w:dirty","true")] (), + mknode "w:instrText" [("xml:space","preserve")] tocCmd, + mknode "w:fldChar" [("w:fldCharType","separate")] (), + mknode "w:fldChar" [("w:fldCharType","end")] () + ]) -- w:r + ) -- w:p + ]) + ])] -- w:sdt +makeTOC _ = return [] + + -- | Convert Pandoc document to two lists of -- OpenXML elements (the main document and footnotes). writeOpenXML :: WriterOptions -> Pandoc -> WS ([Element], [Element]) @@ -513,32 +681,45 @@ writeOpenXML opts (Pandoc meta blocks) = do Just (MetaBlocks [Para xs]) -> xs Just (MetaInlines xs) -> xs _ -> [] - title <- withParaProp (pStyle "Title") $ blocksToOpenXML opts [Para tit | not (null tit)] - subtitle <- withParaProp (pStyle "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')] - authors <- withParaProp (pStyle "Author") $ blocksToOpenXML opts $ + title <- withParaPropM (pStyleM "Title") $ blocksToOpenXML opts [Para tit | not (null tit)] + subtitle <- withParaPropM (pStyleM "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')] + authors <- withParaProp (pCustomStyle "Author") $ blocksToOpenXML opts $ map Para auths - date <- withParaProp (pStyle "Date") $ blocksToOpenXML opts [Para dat | not (null dat)] + date <- withParaPropM (pStyleM "Date") $ blocksToOpenXML opts [Para dat | not (null dat)] abstract <- if null abstract' then return [] - else withParaProp (pStyle "Abstract") $ blocksToOpenXML opts abstract' + else withParaProp (pCustomStyle "Abstract") $ blocksToOpenXML opts abstract' let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs convertSpace xs = xs let blocks' = bottomUp convertSpace blocks - doc' <- blocksToOpenXML opts blocks' + doc' <- (setFirstPara >> blocksToOpenXML opts blocks') notes' <- reverse `fmap` gets stFootnotes - let meta' = title ++ subtitle ++ authors ++ date ++ abstract + toc <- makeTOC opts + let meta' = title ++ subtitle ++ authors ++ date ++ abstract ++ toc return (meta' ++ doc', notes') -- | Convert a list of Pandoc blocks to OpenXML. blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element] blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls -pStyle :: String -> Element -pStyle sty = mknode "w:pStyle" [("w:val",sty)] () +pCustomStyle :: String -> Element +pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] () + +pStyleM :: String -> WS XML.Element +pStyleM styleName = do + styleMaps <- gets stStyleMaps + let sty' = getStyleId styleName $ sParaStyleMap styleMaps + return $ mknode "w:pStyle" [("w:val",sty')] () + +rCustomStyle :: String -> Element +rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] () -rStyle :: String -> Element -rStyle sty = mknode "w:rStyle" [("w:val",sty)] () +rStyleM :: String -> WS XML.Element +rStyleM styleName = do + styleMaps <- gets stStyleMaps + let sty' = getStyleId styleName $ sCharStyleMap styleMaps + return $ mknode "w:rStyle" [("w:val",sty')] () getUniqueId :: MonadIO m => m String -- the + 20 is to ensure that there are no clashes with the rIds @@ -552,12 +733,13 @@ blockToOpenXML opts (Div (_,["references"],_) bs) = do let (hs, bs') = span isHeaderBlock bs header <- blocksToOpenXML opts hs -- We put the Bibliography style on paragraphs after the header - rest <- withParaProp (pStyle "Bibliography") $ blocksToOpenXML opts bs' + rest <- withParaPropM (pStyleM "Bibliography") $ blocksToOpenXML opts bs' return (header ++ rest) blockToOpenXML opts (Div _ bs) = blocksToOpenXML opts bs blockToOpenXML opts (Header lev (ident,_,_) lst) = do - paraProps <- withParaProp (pStyle $ "Heading" ++ show lev) $ - getParaProps False + setFirstPara + paraProps <- withParaPropM (pStyleM ("Heading "++show lev)) $ + getParaProps False contents <- inlinesToOpenXML opts lst usedIdents <- gets stSectionIds let bookmarkName = if null ident @@ -569,40 +751,60 @@ blockToOpenXML opts (Header lev (ident,_,_) lst) = do ,("w:name",bookmarkName)] () let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] () return [mknode "w:p" [] (paraProps ++ [bookmarkStart, bookmarkEnd] ++ contents)] -blockToOpenXML opts (Plain lst) = withParaProp (pStyle "Compact") +blockToOpenXML opts (Plain lst) = withParaProp (pCustomStyle "Compact") $ blockToOpenXML opts (Para lst) -- title beginning with fig: indicates that the image is a figure blockToOpenXML opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = do + setFirstPara + pushParaProp $ pCustomStyle $ + if null alt + then "Figure" + else "FigureWithCaption" paraProps <- getParaProps False + popParaProp contents <- inlinesToOpenXML opts [Image alt (src,tit)] - captionNode <- withParaProp (pStyle "ImageCaption") + captionNode <- withParaProp (pCustomStyle "ImageCaption") $ blockToOpenXML opts (Para alt) return $ mknode "w:p" [] (paraProps ++ contents) : captionNode -- fixDisplayMath sometimes produces a Para [] as artifact blockToOpenXML _ (Para []) = return [] blockToOpenXML opts (Para lst) = do - paraProps <- getParaProps $ case lst of - [Math DisplayMath _] -> True - _ -> False - contents <- inlinesToOpenXML opts lst - return [mknode "w:p" [] (paraProps ++ contents)] + isFirstPara <- gets stFirstPara + paraProps <- getParaProps $ case lst of + [Math DisplayMath _] -> True + _ -> False + bodyTextStyle <- pStyleM "Body Text" + let paraProps' = case paraProps of + [] | isFirstPara -> [mknode "w:pPr" [] [pCustomStyle "FirstParagraph"]] + [] -> [mknode "w:pPr" [] [bodyTextStyle]] + ps -> ps + modify $ \s -> s { stFirstPara = False } + contents <- inlinesToOpenXML opts lst + return [mknode "w:p" [] (paraProps' ++ contents)] blockToOpenXML _ (RawBlock format str) | format == Format "openxml" = return [ x | Elem x <- parseXML str ] | otherwise = return [] -blockToOpenXML opts (BlockQuote blocks) = - withParaProp (pStyle "BlockQuote") $ blocksToOpenXML opts blocks -blockToOpenXML opts (CodeBlock attrs str) = - withParaProp (pStyle "SourceCode") $ blockToOpenXML opts $ Para [Code attrs str] -blockToOpenXML _ HorizontalRule = return [ - mknode "w:p" [] $ mknode "w:r" [] $ mknode "w:pict" [] +blockToOpenXML opts (BlockQuote blocks) = do + p <- withParaPropM (pStyleM "Block Text") $ blocksToOpenXML opts blocks + setFirstPara + return p +blockToOpenXML opts (CodeBlock attrs str) = do + p <- withParaProp (pCustomStyle "SourceCode") (blockToOpenXML opts $ Para [Code attrs str]) + setFirstPara + return p +blockToOpenXML _ HorizontalRule = do + setFirstPara + return [ + mknode "w:p" [] $ mknode "w:r" [] $ mknode "w:pict" [] $ mknode "v:rect" [("style","width:0;height:1.5pt"), ("o:hralign","center"), ("o:hrstd","t"),("o:hr","t")] () ] blockToOpenXML opts (Table caption aligns widths headers rows) = do + setFirstPara let captionStr = stringify caption caption' <- if null caption then return [] - else withParaProp (pStyle "TableCaption") + else withParaProp (pCustomStyle "TableCaption") $ blockToOpenXML opts (Para caption) let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] () let cellToOpenXML (al, cell) = withParaProp (alignmentFor al) @@ -613,51 +815,62 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do [ mknode "w:tcBorders" [] $ mknode "w:bottom" [("w:val","single")] () , mknode "w:vAlign" [("w:val","bottom")] () ] - let emptyCell = [mknode "w:p" [] [mknode "w:pPr" [] - [mknode "w:pStyle" [("w:val","Compact")] ()]]] + let emptyCell = [mknode "w:p" [] [pCustomStyle "Compact"]] let mkcell border contents = mknode "w:tc" [] $ [ borderProps | border ] ++ if null contents then emptyCell else contents - let mkrow border cells = mknode "w:tr" [] $ map (mkcell border) cells + let mkrow border cells = mknode "w:tr" [] $ + [mknode "w:trPr" [] [ + mknode "w:cnfStyle" [("w:firstRow","1")] ()] | border] + ++ map (mkcell border) cells let textwidth = 7920 -- 5.5 in in twips, 1/20 pt let fullrow = 5000 -- 100% specified in pct let rowwidth = fullrow * sum widths let mkgridcol w = mknode "w:gridCol" [("w:w", show (floor (textwidth * w) :: Integer))] () + let hasHeader = not (all null headers) return $ - mknode "w:tbl" [] + caption' ++ + [mknode "w:tbl" [] ( mknode "w:tblPr" [] ( mknode "w:tblStyle" [("w:val","TableNormal")] () : mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () : + mknode "w:tblLook" [("w:firstRow","1") | hasHeader ] () : [ mknode "w:tblCaption" [("w:val", captionStr)] () | not (null caption) ] ) : mknode "w:tblGrid" [] (if all (==0) widths then [] else map mkgridcol widths) - : [ mkrow True headers' | not (all null headers) ] ++ + : [ mkrow True headers' | hasHeader ] ++ map (mkrow False) rows' - ) : caption' + )] blockToOpenXML opts (BulletList lst) = do let marker = BulletMarker addList marker numid <- getNumId - asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst + l <- asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst + setFirstPara + return l blockToOpenXML opts (OrderedList (start, numstyle, numdelim) lst) = do let marker = NumberMarker numstyle numdelim start addList marker numid <- getNumId - asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst -blockToOpenXML opts (DefinitionList items) = - concat `fmap` mapM (definitionListItemToOpenXML opts) items + l <- asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst + setFirstPara + return l +blockToOpenXML opts (DefinitionList items) = do + l <- concat `fmap` mapM (definitionListItemToOpenXML opts) items + setFirstPara + return l definitionListItemToOpenXML :: WriterOptions -> ([Inline],[[Block]]) -> WS [Element] definitionListItemToOpenXML opts (term,defs) = do - term' <- withParaProp (pStyle "DefinitionTerm") + term' <- withParaProp (pCustomStyle "DefinitionTerm") $ blockToOpenXML opts (Para term) - defs' <- withParaProp (pStyle "Definition") + defs' <- withParaProp (pCustomStyle "Definition") $ concat `fmap` mapM (blocksToOpenXML opts) defs return $ term' ++ defs' @@ -721,6 +934,9 @@ withTextProp d p = do popTextProp return res +withTextPropM :: WS Element -> WS a -> WS a +withTextPropM = (. flip withTextProp) . (>>=) + getParaProps :: Bool -> WS [Element] getParaProps displayMathPara = do props <- gets stParaProperties @@ -749,6 +965,9 @@ withParaProp d p = do popParaProp return res +withParaPropM :: WS Element -> WS a -> WS a +withParaPropM = (. flip withParaProp) . (>>=) + formattedString :: String -> WS [Element] formattedString str = do props <- getTextProps @@ -758,6 +977,9 @@ formattedString str = do [ mknode (if inDel then "w:delText" else "w:t") [("xml:space","preserve")] str ] ] +setFirstPara :: WS () +setFirstPara = modify $ \s -> s { stFirstPara = True } + -- | Convert an inline element to OpenXML. inlineToOpenXML :: WriterOptions -> Inline -> WS [Element] inlineToOpenXML _ (Str str) = formattedString str @@ -828,25 +1050,26 @@ inlineToOpenXML opts (Math mathType str) = do Right r -> return [r] Left _ -> inlinesToOpenXML opts (texMathToInlines mathType str) inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst -inlineToOpenXML opts (Code attrs str) = - withTextProp (rStyle "VerbatimChar") - $ if writerHighlight opts - then case highlight formatOpenXML attrs str of - Nothing -> unhighlighted - Just h -> return h - else unhighlighted - where unhighlighted = intercalate [br] `fmap` - (mapM formattedString $ lines str) - formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) - toHlTok (toktype,tok) = mknode "w:r" [] - [ mknode "w:rPr" [] - [ rStyle $ show toktype ] - , mknode "w:t" [("xml:space","preserve")] tok ] +inlineToOpenXML opts (Code attrs str) = do + let unhighlighted = intercalate [br] `fmap` + (mapM formattedString $ lines str) + formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) + toHlTok (toktype,tok) = mknode "w:r" [] + [ mknode "w:rPr" [] + [ rCustomStyle (show toktype) ] + , mknode "w:t" [("xml:space","preserve")] tok ] + withTextProp (rCustomStyle "VerbatimChar") + $ if writerHighlight opts + then case highlight formatOpenXML attrs str of + Nothing -> unhighlighted + Just h -> return h + else unhighlighted inlineToOpenXML opts (Note bs) = do notes <- gets stFootnotes notenum <- getUniqueId + footnoteStyle <- rStyleM "Footnote Reference" let notemarker = mknode "w:r" [] - [ mknode "w:rPr" [] (rStyle "FootnoteRef") + [ mknode "w:rPr" [] footnoteStyle , mknode "w:footnoteRef" [] () ] let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : ils) : xs @@ -856,22 +1079,22 @@ inlineToOpenXML opts (Note bs) = do oldParaProperties <- gets stParaProperties oldTextProperties <- gets stTextProperties modify $ \st -> st{ stListLevel = -1, stParaProperties = [], stTextProperties = [] } - contents <- withParaProp (pStyle "FootnoteText") $ blocksToOpenXML opts + contents <- withParaPropM (pStyleM "Footnote Text") $ blocksToOpenXML opts $ insertNoteRef bs modify $ \st -> st{ stListLevel = oldListLevel, stParaProperties = oldParaProperties, stTextProperties = oldTextProperties } let newnote = mknode "w:footnote" [("w:id", notenum)] $ contents modify $ \s -> s{ stFootnotes = newnote : notes } return [ mknode "w:r" [] - [ mknode "w:rPr" [] (rStyle "FootnoteRef") + [ mknode "w:rPr" [] footnoteStyle , mknode "w:footnoteReference" [("w:id", notenum)] () ] ] -- internal link: inlineToOpenXML opts (Link txt ('#':xs,_)) = do - contents <- withTextProp (rStyle "Link") $ inlinesToOpenXML opts txt + contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt return [ mknode "w:hyperlink" [("w:anchor",xs)] contents ] -- external link: inlineToOpenXML opts (Link txt (src,_)) = do - contents <- withTextProp (rStyle "Link") $ inlinesToOpenXML opts txt + contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt extlinks <- gets stExternalLinks id' <- case M.lookup src extlinks of Just i -> return i @@ -883,6 +1106,7 @@ inlineToOpenXML opts (Link txt (src,_)) = do return [ mknode "w:hyperlink" [("r:id",id')] contents ] inlineToOpenXML opts (Image alt (src, tit)) = do -- first, check to see if we've already done this image + pageWidth <- gets stPrintWidth imgs <- gets stImages case M.lookup src imgs of Just (_,_,_,elt,_) -> return [elt] @@ -899,7 +1123,7 @@ inlineToOpenXML opts (Image alt (src, tit)) = do let size = imageSize img let (xpt,ypt) = maybe (120,120) sizeInPoints size -- 12700 emu = 1 pt - let (xemu,yemu) = fitToPage (xpt * 12700, ypt * 12700) + let (xemu,yemu) = fitToPage (xpt * 12700, ypt * 12700) (pageWidth * 12700) let cNvPicPr = mknode "pic:cNvPicPr" [] $ mknode "a:picLocks" [("noChangeArrowheads","1"),("noChangeAspect","1")] () let nvPicPr = mknode "pic:nvPicPr" [] @@ -957,6 +1181,22 @@ inlineToOpenXML opts (Image alt (src, tit)) = do br :: Element br = mknode "w:r" [] [mknode "w:br" [("w:type","textWrapping")] () ] +-- Word will insert these footnotes into the settings.xml file +-- (whether or not they're visible in the document). If they're in the +-- file, but not in the footnotes.xml file, it will produce +-- problems. So we want to make sure we insert them into our document. +defaultFootnotes :: [Element] +defaultFootnotes = [ mknode "w:footnote" + [("w:type", "separator"), ("w:id", "-1")] $ + [ mknode "w:p" [] $ + [mknode "w:r" [] $ + [ mknode "w:separator" [] ()]]] + , mknode "w:footnote" + [("w:type", "continuationSeparator"), ("w:id", "0")] $ + [ mknode "w:p" [] $ + [ mknode "w:r" [] $ + [ mknode "w:continuationSeparator" [] ()]]]] + parseXml :: Archive -> Archive -> String -> IO Element parseXml refArchive distArchive relpath = case ((findEntryByPath relpath refArchive `mplus` @@ -966,9 +1206,11 @@ parseXml refArchive distArchive relpath = Nothing -> fail $ relpath ++ " corrupt or missing in reference docx" -- | Scales the image to fit the page -fitToPage :: (Integer, Integer) -> (Integer, Integer) -fitToPage (x, y) - --5440680 is the emu width size of a letter page in portrait, minus the margins - | x > 5440680 = - (5440680, round $ (5440680 / ((fromIntegral :: Integer -> Double) x)) * (fromIntegral y)) +-- sizes are passed in emu +fitToPage :: (Integer, Integer) -> Integer -> (Integer, Integer) +fitToPage (x, y) pageWidth + -- Fixes width to the page width and scales the height + | x > pageWidth = + (pageWidth, round $ + ((fromIntegral pageWidth) / ((fromIntegral :: Integer -> Double) x)) * (fromIntegral y)) | otherwise = (x, y) diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs index 8c1d360aa..17ff8a279 100644 --- a/src/Text/Pandoc/Writers/DokuWiki.hs +++ b/src/Text/Pandoc/Writers/DokuWiki.hs @@ -134,7 +134,9 @@ blockToDokuWiki opts (Para [Image txt (src,'f':'i':'g':':':tit)]) = do let opt = if null txt then "" else "|" ++ if null tit then capt else tit ++ capt - return $ "{{:" ++ src ++ opt ++ "}}\n" + -- Relative links fail isURI and receive a colon + prefix = if isURI src then "" else ":" + return $ "{{" ++ prefix ++ src ++ opt ++ "}}\n" blockToDokuWiki opts (Para inlines) = do indent <- stIndent <$> ask @@ -178,7 +180,7 @@ blockToDokuWiki _ (CodeBlock (_,classes,_) str) = do blockToDokuWiki opts (BlockQuote blocks) = do contents <- blockListToDokuWiki opts blocks if isSimpleBlockQuote blocks - then return $ "> " ++ contents + then return $ unlines $ map ("> " ++) $ lines contents else return $ "<HTML><blockquote>\n" ++ contents ++ "</blockquote></HTML>" blockToDokuWiki opts (Table capt aligns _ headers rows) = do @@ -352,9 +354,7 @@ isPlainOrPara (Para _) = True isPlainOrPara _ = False isSimpleBlockQuote :: [Block] -> Bool -isSimpleBlockQuote [BlockQuote bs] = isSimpleBlockQuote bs -isSimpleBlockQuote [b] = isPlainOrPara b -isSimpleBlockQuote _ = False +isSimpleBlockQuote bs = all isPlainOrPara bs -- | Concatenates strings with line breaks between them. vcat :: [String] -> String @@ -459,7 +459,7 @@ inlineToDokuWiki _ (RawInline f str) | f == Format "html" = return $ "<html>" ++ str ++ "</html>" | otherwise = return "" -inlineToDokuWiki _ (LineBreak) = return "\\\\ " +inlineToDokuWiki _ (LineBreak) = return "\\\\\n" inlineToDokuWiki _ Space = return " " @@ -480,7 +480,9 @@ inlineToDokuWiki opts (Image alt (source, tit)) = do ("", []) -> "" ("", _ ) -> "|" ++ alt' (_ , _ ) -> "|" ++ tit - return $ "{{:" ++ source ++ txt ++ "}}" + -- Relative links fail isURI and receive a colon + prefix = if isURI source then "" else ":" + return $ "{{" ++ prefix ++ source ++ txt ++ "}}" inlineToDokuWiki opts (Note contents) = do contents' <- blockListToDokuWiki opts contents diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index e4f2d1335..29ea44e02 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables, ViewPatterns #-} +{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables, ViewPatterns, FlexibleContexts #-} {- Copyright (C) 2010-2014 John MacFarlane <jgm@berkeley.edu> @@ -35,16 +35,17 @@ import Data.Maybe ( fromMaybe ) import Data.List ( isPrefixOf, isInfixOf, intercalate ) import System.Environment ( getEnv ) import Text.Printf (printf) -import System.FilePath ( (</>), takeExtension, takeFileName ) +import System.FilePath ( takeExtension, takeFileName ) +import System.FilePath.Glob ( namesMatching ) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as B8 import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.SelfContained ( makeSelfContained ) import Codec.Archive.Zip ( emptyArchive, addEntryToArchive, eRelativePath, fromEntry , Entry, toEntry, fromArchive) -import Control.Applicative ((<$>)) +import Control.Applicative ((<$>), (<$)) import Data.Time.Clock.POSIX ( getPOSIXTime ) import Data.Time (getCurrentTime,UTCTime, formatTime) -import System.Locale ( defaultTimeLocale ) +import Text.Pandoc.Compat.Locale ( defaultTimeLocale ) import Text.Pandoc.Shared ( trimr, renderTags', safeRead, uniqueIdent, trim , normalizeDate, readDataFile, stringify, warn , hierarchicalize, fetchItem' ) @@ -57,14 +58,13 @@ import Text.Pandoc.Options ( WriterOptions(..) import Text.Pandoc.Definition import Text.Pandoc.Walk (walk, walkM) import Control.Monad.State (modify, get, execState, State, put, evalState) -import Control.Monad (foldM, when, mplus, liftM) +import Control.Monad (foldM, mplus, liftM, when) import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs , strContent, lookupAttr, Node(..), QName(..), parseXML , onlyElems, node, ppElement) import Text.Pandoc.UUID (getRandomUUID) import Text.Pandoc.Writers.HTML (writeHtmlString, writeHtml) import Data.Char ( toLower, isDigit, isAlphaNum ) -import Network.URI ( unEscapeString ) import Text.Pandoc.MIME (MimeType, getMimeType) import qualified Control.Exception as E import Text.Blaze.Html.Renderer.Utf8 (renderHtml) @@ -344,7 +344,6 @@ writeEPUB opts doc@(Pandoc meta _) = do , writerStandalone = True , writerSectionDivs = True , writerHtml5 = epub3 - , writerTableOfContents = False -- we always have one in epub , writerVariables = vars , writerHTMLMathMethod = if epub3 @@ -359,8 +358,9 @@ writeEPUB opts doc@(Pandoc meta _) = do Nothing -> return ([],[]) Just img -> do let coverImage = "media/" ++ takeFileName img - let cpContent = renderHtml $ writeHtml opts' - (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"]) + let cpContent = renderHtml $ writeHtml + opts'{ writerVariables = ("coverpage","true"):vars } + (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"]) imgContent <- B.readFile img return ( [mkEntry "cover.xhtml" cpContent] , [mkEntry coverImage imgContent] ) @@ -388,8 +388,14 @@ writeEPUB opts doc@(Pandoc meta _) = do picEntries <- foldM readPicEntry [] pics -- handle fonts + let matchingGlob f = do + xs <- namesMatching f + when (null xs) $ + warn $ f ++ " did not match any font files." + return xs let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f - fontEntries <- mapM mkFontEntry $ writerEpubFonts opts' + fontFiles <- concat <$> mapM matchingGlob (writerEpubFonts opts') + fontEntries <- mapM mkFontEntry fontFiles -- set page progression direction attribution let progressionDirection = case epubPageDirection metadata of @@ -489,6 +495,9 @@ writeEPUB opts doc@(Pandoc meta _) = do [] -> "UNTITLED" (x:_) -> titleText x x -> stringify x + + let tocTitle = fromMaybe plainTitle $ + metaValueToString <$> lookupMeta "toc-title" meta let uuid = case epubIdentifier metadata of (x:_) -> identifierText x -- use first identifier as UUID [] -> error "epubIdentifier is null" -- shouldn't happen @@ -528,14 +537,12 @@ writeEPUB opts doc@(Pandoc meta _) = do case lookupMeta "title" meta of Just _ -> "yes" Nothing -> "no")] $ ()) : - (unode "itemref" ! [("idref", "nav") - ,("linear", if writerTableOfContents opts - then "yes" - else "no")] $ ()) : + [unode "itemref" ! [("idref", "nav")] $ () + | writerTableOfContents opts ] ++ map chapterRefNode chapterEntries) , unode "guide" $ [ unode "reference" ! - [("type","toc"),("title",plainTitle), + [("type","toc"),("title", tocTitle), ("href","nav.xhtml")] $ () ] ++ [ unode "reference" ! @@ -572,8 +579,7 @@ writeEPUB opts doc@(Pandoc meta _) = do let navMapFormatter :: Int -> String -> String -> [Element] -> Element navMapFormatter n tit src subs = unode "navPoint" ! - [("id", "navPoint-" ++ show n) - ,("playOrder", show n)] $ + [("id", "navPoint-" ++ show n)] $ [ unode "navLabel" $ unode "text" tit , unode "content" ! [("src", src)] $ () ] ++ subs @@ -598,7 +604,7 @@ writeEPUB opts doc@(Pandoc meta _) = do Nothing -> [] Just img -> [unode "meta" ! [("name","cover"), ("content", toId img)] $ ()] - , unode "docTitle'" $ unode "text" $ plainTitle + , unode "docTitle" $ unode "text" $ plainTitle , unode "navMap" $ tpNode : evalState (mapM (navPointNode navMapFormatter) secs) 1 ] @@ -614,17 +620,35 @@ writeEPUB opts doc@(Pandoc meta _) = do (_:_) -> [unode "ol" ! [("class","toc")] $ subs] let navtag = if epub3 then "nav" else "div" - let navData = UTF8.fromStringLazy $ ppTopElement $ - unode "html" ! [("xmlns","http://www.w3.org/1999/xhtml") - ,("xmlns:epub","http://www.idpf.org/2007/ops")] $ - [ unode "head" $ - [ unode "title" plainTitle - , unode "link" ! [("rel","stylesheet"),("type","text/css"),("href","stylesheet.css")] $ () ] - , unode "body" $ - unode navtag ! [("epub:type","toc") | epub3] $ - [ unode "h1" ! [("id","toc-title")] $ plainTitle - , unode "ol" ! [("class","toc")] $ evalState (mapM (navPointNode navXhtmlFormatter) secs) 1] - ] + let navBlocks = [RawBlock (Format "html") $ ppElement $ + unode navtag ! ([("epub:type","toc") | epub3] ++ + [("id","toc")]) $ + [ unode "h1" ! [("id","toc-title")] $ tocTitle + , unode "ol" ! [("class","toc")] $ evalState (mapM (navPointNode navXhtmlFormatter) secs) 1]] + let landmarks = if epub3 + then [RawBlock (Format "html") $ ppElement $ + unode "nav" ! [("epub:type","landmarks") + ,("hidden","hidden")] $ + [ unode "ol" $ + [ unode "li" + [ unode "a" ! [("href", "cover.xhtml") + ,("epub:type", "cover")] $ + "Cover"] | + epubCoverImage metadata /= Nothing + ] ++ + [ unode "li" + [ unode "a" ! [("href", "#toc") + ,("epub:type", "toc")] $ + "Table of contents" + ] | writerTableOfContents opts + ] + ] + ] + else [] + let navData = renderHtml $ writeHtml opts' + (Pandoc (setMeta "title" + (walk removeNote $ fromList $ docTitle' meta) nullMeta) + (navBlocks ++ landmarks)) let navEntry = mkEntry "nav.xhtml" navData -- mimetype @@ -767,23 +791,20 @@ metadataElement version md currentTime = showDateTimeISO8601 :: UTCTime -> String showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ" -transformTag :: WriterOptions - -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media +transformTag :: IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media -> Tag String -> IO (Tag String) -transformTag opts mediaRef tag@(TagOpen name attr) +transformTag mediaRef tag@(TagOpen name attr) | name `elem` ["video", "source", "img", "audio"] = do let src = fromAttrib "src" tag let poster = fromAttrib "poster" tag - let oldsrc = maybe src (</> src) $ writerSourceURL opts - let oldposter = maybe poster (</> poster) $ writerSourceURL opts - newsrc <- modifyMediaRef mediaRef oldsrc - newposter <- modifyMediaRef mediaRef oldposter + newsrc <- modifyMediaRef mediaRef src + newposter <- modifyMediaRef mediaRef poster let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++ [("src", newsrc) | not (null newsrc)] ++ [("poster", newposter) | not (null newposter)] return $ TagOpen name attr' -transformTag _ _ tag = return tag +transformTag _ tag = return tag modifyMediaRef :: IORef [(FilePath, FilePath)] -> FilePath -> IO FilePath modifyMediaRef _ "" = return "" @@ -793,7 +814,7 @@ modifyMediaRef mediaRef oldsrc = do Just n -> return n Nothing -> do let new = "media/file" ++ show (length media) ++ - takeExtension oldsrc + takeExtension (takeWhile (/='?') oldsrc) -- remove query modifyIORef mediaRef ( (oldsrc, new): ) return new @@ -801,10 +822,10 @@ transformBlock :: WriterOptions -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media -> Block -> IO Block -transformBlock opts mediaRef (RawBlock fmt raw) +transformBlock _ mediaRef (RawBlock fmt raw) | fmt == Format "html" = do let tags = parseTags raw - tags' <- mapM (transformTag opts mediaRef) tags + tags' <- mapM (transformTag mediaRef) tags return $ RawBlock fmt (renderTags' tags') transformBlock _ _ b = return b @@ -812,19 +833,17 @@ transformInline :: WriterOptions -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media -> Inline -> IO Inline -transformInline opts mediaRef (Image lab (src,tit)) = do - let src' = unEscapeString src - let oldsrc = maybe src' (</> src) $ writerSourceURL opts - newsrc <- modifyMediaRef mediaRef oldsrc +transformInline _ mediaRef (Image lab (src,tit)) = do + newsrc <- modifyMediaRef mediaRef src return $ Image lab (newsrc, tit) transformInline opts _ (x@(Math _ _)) | WebTeX _ <- writerHTMLMathMethod opts = do raw <- makeSelfContained opts $ writeHtmlInline opts x return $ RawInline (Format "html") raw -transformInline opts mediaRef (RawInline fmt raw) +transformInline _ mediaRef (RawInline fmt raw) | fmt == Format "html" = do let tags = parseTags raw - tags' <- mapM (transformTag opts mediaRef) tags + tags' <- mapM (transformTag mediaRef) tags return $ RawInline fmt (renderTags' tags') transformInline _ _ x = return x @@ -885,20 +904,27 @@ addIdentifiers bs = evalState (mapM go bs) [] -- was "header-1" might turn into "ch006.xhtml#header". correlateRefs :: Int -> [Block] -> [(String,String)] correlateRefs chapterHeaderLevel bs = - identTable $ execState (mapM_ go bs) + identTable $ execState (walkM goBlock bs >>= walkM goInline) IdentState{ chapterNumber = 0 , identTable = [] } - where go :: Block -> State IdentState () - go (Header n (ident,_,_) _) = do - when (n <= chapterHeaderLevel) $ - modify $ \s -> s{ chapterNumber = chapterNumber s + 1 } + where goBlock :: Block -> State IdentState Block + goBlock x@(Header n (ident,_,_) _) = x <$ addIdentifier (Just n) ident + goBlock x@(Div (ident,_,_) _) = x <$ addIdentifier Nothing ident + goBlock x = return x + goInline :: Inline -> State IdentState Inline + goInline x@(Span (ident,_,_) _) = x <$ addIdentifier Nothing ident + goInline x = return x + addIdentifier mbHeaderLevel ident = do + case mbHeaderLevel of + Just n | n <= chapterHeaderLevel -> + modify $ \s -> s{ chapterNumber = chapterNumber s + 1 } + _ -> return () st <- get let chapterid = showChapter (chapterNumber st) ++ - if n <= chapterHeaderLevel - then "" - else '#' : ident + case mbHeaderLevel of + Just n | n <= chapterHeaderLevel -> "" + _ -> '#' : ident modify $ \s -> s{ identTable = (ident, chapterid) : identTable st } - go _ = return () -- Replace internal link references using the table produced -- by correlateRefs. diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 233b8b32b..31fa4bee8 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -85,7 +85,7 @@ writeFB2 opts (Pandoc meta blocks) = flip evalStateT newFB $ do (imgs,missing) <- liftM imagesToFetch get >>= \s -> liftIO (fetchImages s) let body' = replaceImagesWithAlt missing body let fb2_xml = el "FictionBook" (fb2_attrs, [desc, body'] ++ notes ++ imgs) - return $ xml_head ++ (showContent fb2_xml) + return $ xml_head ++ (showContent fb2_xml) ++ "\n" where xml_head = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" fb2_attrs = diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 9ead604d7..53dc931cc 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE OverloadedStrings, CPP #-} +{-# LANGUAGE OverloadedStrings, CPP, ViewPatterns, ScopedTypeVariables #-} {-# OPTIONS_GHC -fno-warn-deprecations #-} {- Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> @@ -49,7 +49,10 @@ import Data.String ( fromString ) import Data.Maybe ( catMaybes, fromMaybe ) import Control.Monad.State import Text.Blaze.Html hiding(contents) +#if MIN_VERSION_blaze_markup(0,6,3) +#else import Text.Blaze.Internal(preEscapedString) +#endif #if MIN_VERSION_blaze_html(0,5,1) import qualified Text.Blaze.XHtml5 as H5 #else @@ -60,6 +63,8 @@ import qualified Text.Blaze.XHtml1.Transitional.Attributes as A import Text.Blaze.Renderer.String (renderHtml) import Text.TeXMath import Text.XML.Light.Output +import Text.XML.Light (unode, elChildren, unqual) +import qualified Text.XML.Light as XML import System.FilePath (takeExtension) import Data.Monoid import Data.Aeson (Value) @@ -71,11 +76,13 @@ data WriterState = WriterState , stQuotes :: Bool -- ^ <q> tag is used , stHighlighting :: Bool -- ^ Syntax highlighting is used , stSecNum :: [Int] -- ^ Number of current section + , stElement :: Bool -- ^ Processing an Element } defaultWriterState :: WriterState defaultWriterState = WriterState {stNotes= [], stMath = False, stQuotes = False, - stHighlighting = False, stSecNum = []} + stHighlighting = False, stSecNum = [], + stElement = False} -- Helpers to render HTML with the appropriate function. @@ -155,6 +162,10 @@ pandocToHtml opts (Pandoc meta blocks) = do H.script ! A.src (toValue url) ! A.type_ "text/javascript" $ mempty + KaTeX js css -> + (H.script ! A.src (toValue js) $ mempty) <> + (H.link ! A.rel "stylesheet" ! A.href (toValue css)) <> + (H.script ! A.type_ "text/javascript" $ toHtml renderKaTeX) _ -> case lookup "mathml-script" (writerVariables opts) of Just s | not (writerHtml5 opts) -> H.script ! A.type_ "text/javascript" @@ -274,7 +285,13 @@ elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elemen let titleSlide = slide && level < slideLevel header' <- if title' == [Str "\0"] -- marker for hrule then return mempty - else blockToHtml opts (Header level' (id',classes,keyvals) title') + else do + modify (\st -> st{ stElement = True}) + res <- blockToHtml opts + (Header level' (id',classes,keyvals) title') + modify (\st -> st{ stElement = False}) + return res + let isSec (Sec _ _ _ _ _) = True isSec (Blk _) = False let isPause (Blk x) = x == Para [Str ".",Space,Str ".",Space,Str "."] @@ -342,10 +359,10 @@ parseMailto s = do _ -> fail "not a mailto: URL" -- | Obfuscate a "mailto:" link. -obfuscateLink :: WriterOptions -> String -> String -> Html +obfuscateLink :: WriterOptions -> Html -> String -> Html obfuscateLink opts txt s | writerEmailObfuscation opts == NoObfuscation = - H.a ! A.href (toValue s) $ toHtml txt -obfuscateLink opts txt s = + H.a ! A.href (toValue s) $ txt +obfuscateLink opts (renderHtml -> txt) s = let meth = writerEmailObfuscation opts s' = map toLower (take 7 s) ++ drop 7 s in case parseMailto s' of @@ -424,24 +441,30 @@ blockToHtml opts (Para [Image txt (s,'f':'i':'g':':':tit)]) = do then H5.figure $ mconcat [nl opts, img, capt, nl opts] else H.div ! A.class_ "figure" $ mconcat - [nl opts, img, capt, nl opts] + [nl opts, img, nl opts, capt, nl opts] blockToHtml opts (Para lst) = do contents <- inlineListToHtml opts lst return $ H.p contents blockToHtml opts (Div attr@(_,classes,_) bs) = do - contents <- blockListToHtml opts bs + let speakerNotes = "notes" `elem` classes + -- we don't want incremental output inside speaker notes, see #1394 + let opts' = if speakerNotes then opts{ writerIncremental = False } else opts + contents <- blockListToHtml opts' bs let contents' = nl opts >> contents >> nl opts return $ - if "notes" `elem` classes - then let opts' = opts{ writerIncremental = False } in - -- we don't want incremental output inside speaker notes - case writerSlideVariant opts of + if speakerNotes + then case writerSlideVariant opts of RevealJsSlides -> addAttrs opts' attr $ H5.aside $ contents' NoSlides -> addAttrs opts' attr $ H.div $ contents' _ -> mempty else addAttrs opts attr $ H.div $ contents' -blockToHtml _ (RawBlock f str) +blockToHtml opts (RawBlock f str) | f == Format "html" = return $ preEscapedString str + | f == Format "latex" = + case writerHTMLMathMethod opts of + MathJax _ -> do modify (\st -> st{ stMath = True }) + return $ toHtml str + _ -> return mempty | otherwise = return mempty blockToHtml opts (HorizontalRule) = return $ if writerHtml5 opts then H5.hr else H.hr blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do @@ -485,7 +508,7 @@ blockToHtml opts (BlockQuote blocks) = else do contents <- blockListToHtml opts blocks return $ H.blockquote $ nl opts >> contents >> nl opts -blockToHtml opts (Header level (_,classes,_) lst) = do +blockToHtml opts (Header level attr@(_,classes,_) lst) = do contents <- inlineListToHtml opts lst secnum <- liftM stSecNum get let contents' = if writerNumberSections opts && not (null secnum) @@ -493,7 +516,9 @@ blockToHtml opts (Header level (_,classes,_) lst) = do then (H.span ! A.class_ "header-section-number" $ toHtml $ showSecNum secnum) >> strToHtml " " >> contents else contents - return $ case level of + inElement <- gets stElement + return $ (if inElement then id else addAttrs opts attr) + $ case level of 1 -> H.h1 contents' 2 -> H.h2 contents' 3 -> H.h3 contents' @@ -506,7 +531,9 @@ blockToHtml opts (BulletList lst) = do return $ unordList opts contents blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do contents <- mapM (blockListToHtml opts) lst - let numstyle' = camelCaseToHyphenated $ show numstyle + let numstyle' = case numstyle of + Example -> "decimal" + _ -> camelCaseToHyphenated $ show numstyle let attribs = (if startnum /= 1 then [A.start $ toValue startnum] else []) ++ @@ -615,13 +642,28 @@ inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html inlineListToHtml opts lst = mapM (inlineToHtml opts) lst >>= return . mconcat +-- | Annotates a MathML expression with the tex source +annotateMML :: XML.Element -> String -> XML.Element +annotateMML e tex = math (unode "semantics" [cs, unode "annotation" (annotAttrs, tex)]) + where + cs = case elChildren e of + [] -> unode "mrow" () + [x] -> x + xs -> unode "mrow" xs + math childs = XML.Element q as [XML.Elem childs] l + where + (XML.Element q as _ l) = e + annotAttrs = [XML.Attr (unqual "encoding") "application/x-tex"] + + -- | Convert Pandoc inline element to HTML. inlineToHtml :: WriterOptions -> Inline -> State WriterState Html inlineToHtml opts inline = case inline of (Str str) -> return $ strToHtml str (Space) -> return $ strToHtml " " - (LineBreak) -> return $ if writerHtml5 opts then H5.br else H.br + (LineBreak) -> return $ (if writerHtml5 opts then H5.br else H.br) + <> strToHtml "\n" (Span (id',classes,kvs) ils) -> inlineListToHtml opts ils >>= return . addAttrs opts attr' . H.span @@ -669,69 +711,78 @@ inlineToHtml opts inline = H.q `fmap` inlineListToHtml opts lst else (\x -> leftQuote >> x >> rightQuote) `fmap` inlineListToHtml opts lst - (Math t str) -> modify (\st -> st {stMath = True}) >> - (case writerHTMLMathMethod opts of - LaTeXMathML _ -> - -- putting LaTeXMathML in container with class "LaTeX" prevents - -- non-math elements on the page from being treated as math by - -- the javascript - return $ H.span ! A.class_ "LaTeX" $ - case t of - InlineMath -> toHtml ("$" ++ str ++ "$") - DisplayMath -> toHtml ("$$" ++ str ++ "$$") - JsMath _ -> do - let m = preEscapedString str - return $ case t of - InlineMath -> H.span ! A.class_ "math" $ m - DisplayMath -> H.div ! A.class_ "math" $ m - WebTeX url -> do - let imtag = if writerHtml5 opts then H5.img else H.img - let m = imtag ! A.style "vertical-align:middle" - ! A.src (toValue $ url ++ urlEncode str) - ! A.alt (toValue str) - ! A.title (toValue str) - let brtag = if writerHtml5 opts then H5.br else H.br - return $ case t of - InlineMath -> m - DisplayMath -> brtag >> m >> brtag - GladTeX -> - return $ case t of - InlineMath -> preEscapedString $ "<EQ ENV=\"math\">" ++ str ++ "</EQ>" - DisplayMath -> preEscapedString $ "<EQ ENV=\"displaymath\">" ++ str ++ "</EQ>" - MathML _ -> do - let dt = if t == InlineMath - then DisplayInline - else DisplayBlock - let conf = useShortEmptyTags (const False) - defaultConfigPP - case writeMathML dt <$> readTeX str of - Right r -> return $ preEscapedString $ - ppcElement conf r - Left _ -> inlineListToHtml opts - (texMathToInlines t str) >>= - return . (H.span ! A.class_ "math") - MathJax _ -> return $ H.span ! A.class_ "math" $ toHtml $ - case t of - InlineMath -> "\\(" ++ str ++ "\\)" - DisplayMath -> "\\[" ++ str ++ "\\]" - PlainMath -> do - x <- inlineListToHtml opts (texMathToInlines t str) - let m = H.span ! A.class_ "math" $ x - let brtag = if writerHtml5 opts then H5.br else H.br - return $ case t of - InlineMath -> m - DisplayMath -> brtag >> m >> brtag ) + (Math t str) -> do + modify (\st -> st {stMath = True}) + let mathClass = toValue $ ("math " :: String) ++ + if t == InlineMath then "inline" else "display" + case writerHTMLMathMethod opts of + LaTeXMathML _ -> + -- putting LaTeXMathML in container with class "LaTeX" prevents + -- non-math elements on the page from being treated as math by + -- the javascript + return $ H.span ! A.class_ "LaTeX" $ + case t of + InlineMath -> toHtml ("$" ++ str ++ "$") + DisplayMath -> toHtml ("$$" ++ str ++ "$$") + JsMath _ -> do + let m = preEscapedString str + return $ case t of + InlineMath -> H.span ! A.class_ mathClass $ m + DisplayMath -> H.div ! A.class_ mathClass $ m + WebTeX url -> do + let imtag = if writerHtml5 opts then H5.img else H.img + let m = imtag ! A.style "vertical-align:middle" + ! A.src (toValue $ url ++ urlEncode str) + ! A.alt (toValue str) + ! A.title (toValue str) + let brtag = if writerHtml5 opts then H5.br else H.br + return $ case t of + InlineMath -> m + DisplayMath -> brtag >> m >> brtag + GladTeX -> + return $ case t of + InlineMath -> preEscapedString $ "<EQ ENV=\"math\">" ++ str ++ "</EQ>" + DisplayMath -> preEscapedString $ "<EQ ENV=\"displaymath\">" ++ str ++ "</EQ>" + MathML _ -> do + let dt = if t == InlineMath + then DisplayInline + else DisplayBlock + let conf = useShortEmptyTags (const False) + defaultConfigPP + case writeMathML dt <$> readTeX str of + Right r -> return $ preEscapedString $ + ppcElement conf (annotateMML r str) + Left _ -> inlineListToHtml opts + (texMathToInlines t str) >>= + return . (H.span ! A.class_ mathClass) + MathJax _ -> return $ H.span ! A.class_ mathClass $ toHtml $ + case t of + InlineMath -> "\\(" ++ str ++ "\\)" + DisplayMath -> "\\[" ++ str ++ "\\]" + KaTeX _ _ -> return $ H.span ! A.class_ mathClass $ + toHtml (case t of + InlineMath -> str + DisplayMath -> "\\displaystyle " ++ str) + PlainMath -> do + x <- inlineListToHtml opts (texMathToInlines t str) + let m = H.span ! A.class_ mathClass $ x + let brtag = if writerHtml5 opts then H5.br else H.br + return $ case t of + InlineMath -> m + DisplayMath -> brtag >> m >> brtag (RawInline f str) | f == Format "latex" -> case writerHTMLMathMethod opts of LaTeXMathML _ -> do modify (\st -> st {stMath = True}) return $ toHtml str + MathJax _ -> do modify (\st -> st {stMath = True}) + return $ toHtml str _ -> return mempty | f == Format "html" -> return $ preEscapedString str | otherwise -> return mempty (Link txt (s,_)) | "mailto:" `isPrefixOf` s -> do linkText <- inlineListToHtml opts txt - return $ obfuscateLink opts (renderHtml linkText) s + return $ obfuscateLink opts linkText s (Link txt (s,tit)) -> do linkText <- inlineListToHtml opts txt let s' = case s of @@ -746,22 +797,15 @@ inlineToHtml opts inline = then link' else link' ! A.title (toValue tit) (Image txt (s,tit)) | treatAsImage s -> do - let alternate' = stringify txt let attributes = [A.src $ toValue s] ++ - (if null tit - then [] - else [A.title $ toValue tit]) ++ - if null txt - then [] - else [A.alt $ toValue alternate'] + [A.title $ toValue tit | not $ null tit] ++ + [A.alt $ toValue $ stringify txt] let tag = if writerHtml5 opts then H5.img else H.img return $ foldl (!) tag attributes -- note: null title included, as in Markdown.pl (Image _ (s,tit)) -> do let attributes = [A.src $ toValue s] ++ - (if null tit - then [] - else [A.title $ toValue tit]) + [A.title $ toValue tit | not $ null tit] return $ foldl (!) H5.embed attributes -- note: null title included, as in Markdown.pl (Note contents) @@ -815,3 +859,14 @@ blockListToNote opts ref blocks = Just EPUB3 -> noteItem ! customAttribute "epub:type" "footnote" _ -> noteItem return $ nl opts >> noteItem' + +-- Javascript snippet to render all KaTeX elements +renderKaTeX :: String +renderKaTeX = unlines [ + "window.onload = function(){var mathElements = document.getElementsByClassName(\"math\");" + , "for (var i=0; i < mathElements.length; i++)" + , "{" + , " var texText = mathElements[i].firstChild" + , " katex.render(texText.data, mathElements[i])" + , "}}" + ] diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs index ae20efd4b..6af4b7aa3 100644 --- a/src/Text/Pandoc/Writers/ICML.hs +++ b/src/Text/Pandoc/Writers/ICML.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE OverloadedStrings, FlexibleContexts #-} {- | Module : Text.Pandoc.Writers.ICML @@ -70,7 +70,6 @@ linkName = "Link" -- block element names (appear in InDesign's paragraph styles pane) paragraphName :: String codeBlockName :: String -rawBlockName :: String blockQuoteName :: String orderedListName :: String bulletListName :: String @@ -93,7 +92,6 @@ subListParName :: String footnoteName :: String paragraphName = "Paragraph" codeBlockName = "CodeBlock" -rawBlockName = "Rawblock" blockQuoteName = "Blockquote" orderedListName = "NumList" bulletListName = "BulList" @@ -278,7 +276,9 @@ blockToICML :: WriterOptions -> Style -> Block -> WS Doc blockToICML opts style (Plain lst) = parStyle opts style lst blockToICML opts style (Para lst) = parStyle opts (paragraphName:style) lst blockToICML opts style (CodeBlock _ str) = parStyle opts (codeBlockName:style) $ [Str str] -blockToICML opts style (RawBlock _ str) = parStyle opts (rawBlockName:style) $ [Str str] +blockToICML _ _ (RawBlock f str) + | f == Format "icml" = return $ text str + | otherwise = return empty blockToICML opts style (BlockQuote blocks) = blocksToICML opts (blockQuoteName:style) blocks blockToICML opts style (OrderedList attribs lst) = listItemsToICML opts orderedListName style (Just attribs) lst blockToICML opts style (BulletList lst) = listItemsToICML opts bulletListName style Nothing lst @@ -399,12 +399,14 @@ inlineToICML opts style (Subscript lst) = inlinesToICML opts (subscriptName:styl inlineToICML opts style (SmallCaps lst) = inlinesToICML opts (smallCapsName:style) lst inlineToICML opts style (Quoted SingleQuote lst) = inlinesToICML opts style $ [Str "‘"] ++ lst ++ [Str "’"] inlineToICML opts style (Quoted DoubleQuote lst) = inlinesToICML opts style $ [Str "“"] ++ lst ++ [Str "”"] -inlineToICML opts style (Cite _ lst) = footnoteToICML opts style [Para lst] +inlineToICML opts style (Cite _ lst) = inlinesToICML opts style lst inlineToICML _ style (Code _ str) = charStyle (codeName:style) $ text $ escapeStringForXML str inlineToICML _ style Space = charStyle style space inlineToICML _ style LineBreak = charStyle style $ text lineSeparator inlineToICML _ style (Math _ str) = charStyle style $ text $ escapeStringForXML str --InDesign doesn't really do math -inlineToICML _ style (RawInline _ str) = charStyle style $ text $ escapeStringForXML str +inlineToICML _ _ (RawInline f str) + | f == Format "icml" = return $ text str + | otherwise = return empty inlineToICML opts style (Link lst (url, title)) = do content <- inlinesToICML opts (linkName:style) lst state $ \st -> diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 0fa1e4857..49bc27b58 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -42,6 +42,7 @@ import Data.List ( (\\), isSuffixOf, isInfixOf, stripPrefix, isPrefixOf, intercalate, intersperse ) import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, ord ) import Data.Maybe ( fromMaybe ) +import Data.Aeson.Types ( (.:), parseMaybe, withObject ) import Control.Applicative ((<|>)) import Control.Monad.State import Text.Pandoc.Pretty @@ -54,6 +55,7 @@ data WriterState = WriterState { stInNote :: Bool -- true if we're in a note , stInQuote :: Bool -- true if in a blockquote , stInMinipage :: Bool -- true if in minipage + , stInHeading :: Bool -- true if in a section heading , stNotes :: [Doc] -- notes in a minipage , stOLLevel :: Int -- level of ordered list nesting , stOptions :: WriterOptions -- writer options, so they don't have to be parameter @@ -76,9 +78,9 @@ writeLaTeX :: WriterOptions -> Pandoc -> String writeLaTeX options document = evalState (pandocToLaTeX options document) $ WriterState { stInNote = False, stInQuote = False, - stInMinipage = False, stNotes = [], - stOLLevel = 1, stOptions = options, - stVerbInNote = False, + stInMinipage = False, stInHeading = False, + stNotes = [], stOLLevel = 1, + stOptions = options, stVerbInNote = False, stTable = False, stStrikeout = False, stUrl = False, stGraphics = False, stLHS = False, stBook = writerChapters options, @@ -101,8 +103,16 @@ pandocToLaTeX options (Pandoc meta blocks) = do modify $ \s -> s{ stInternalLinks = query isInternalLink blocks' } let template = writerTemplate options -- set stBook depending on documentclass + let colwidth = if writerWrapText options + then Just $ writerColumns options + else Nothing + metadata <- metaToJSON options + (fmap (render colwidth) . blockListToLaTeX) + (fmap (render colwidth) . inlineListToLaTeX) + meta let bookClasses = ["memoir","book","report","scrreprt","scrbook"] - case lookup "documentclass" (writerVariables options) of + case lookup "documentclass" (writerVariables options) `mplus` + parseMaybe (withObject "object" (.: "documentclass")) metadata of Just x | x `elem` bookClasses -> modify $ \s -> s{stBook = True} | otherwise -> return () Nothing | any (\x -> "\\documentclass" `isPrefixOf` x && @@ -113,13 +123,6 @@ pandocToLaTeX options (Pandoc meta blocks) = do -- \enquote{...} for smart quotes: when ("{csquotes}" `isInfixOf` template) $ modify $ \s -> s{stCsquotes = True} - let colwidth = if writerWrapText options - then Just $ writerColumns options - else Nothing - metadata <- metaToJSON options - (fmap (render colwidth) . blockListToLaTeX) - (fmap (render colwidth) . inlineListToLaTeX) - meta let (blocks'', lastHeader) = if writerCiteMethod options == Citeproc then (blocks', []) else case last blocks' of @@ -179,7 +182,9 @@ pandocToLaTeX options (Pandoc meta blocks) = do elementToLaTeX :: WriterOptions -> Element -> State WriterState Doc elementToLaTeX _ (Blk block) = blockToLaTeX block elementToLaTeX opts (Sec level _ (id',classes,_) title' elements) = do + modify $ \s -> s{stInHeading = True} header' <- sectionHeader ("unnumbered" `elem` classes) id' level title' + modify $ \s -> s{stInHeading = False} innerContents <- mapM (elementToLaTeX opts) elements return $ vsep (header' : innerContents) @@ -203,7 +208,7 @@ stringToLaTeX ctx (x:xs) = do '€' -> "\\euro{}" ++ rest '{' -> "\\{" ++ rest '}' -> "\\}" ++ rest - '$' -> "\\$" ++ rest + '$' | not isUrl -> "\\$" ++ rest '%' -> "\\%" ++ rest '&' -> "\\&" ++ rest '_' | not isUrl -> "\\_" ++ rest @@ -237,7 +242,7 @@ toLabel z = go `fmap` stringToLaTeX URLString z where go [] = "" go (x:xs) | (isLetter x || isDigit x) && isAscii x = x:go xs - | elem x "-+=:;." = x:go xs + | elem x ("-+=:;." :: String) = x:go xs | otherwise = "ux" ++ printf "%x" (ord x) ++ go xs -- | Puts contents into LaTeX command. @@ -466,8 +471,11 @@ blockToLaTeX (DefinitionList lst) = do "\\end{description}" blockToLaTeX HorizontalRule = return $ "\\begin{center}\\rule{0.5\\linewidth}{\\linethickness}\\end{center}" -blockToLaTeX (Header level (id',classes,_) lst) = - sectionHeader ("unnumbered" `elem` classes) id' level lst +blockToLaTeX (Header level (id',classes,_) lst) = do + modify $ \s -> s{stInHeading = True} + hdr <- sectionHeader ("unnumbered" `elem` classes) id' level lst + modify $ \s -> s{stInHeading = False} + return hdr blockToLaTeX (Table caption aligns widths heads rows) = do headers <- if all null heads then return empty @@ -539,10 +547,16 @@ fixLineBreaks' ils = case splitBy (== LineBreak) ils of where tohbox ys = RawInline "tex" "\\hbox{\\strut " : ys ++ [RawInline "tex" "}"] +-- We also change display math to inline math, since display +-- math breaks in simple tables. +displayMathToInline :: Inline -> Inline +displayMathToInline (Math DisplayMath x) = Math InlineMath x +displayMathToInline x = x + tableCellToLaTeX :: Bool -> (Double, Alignment, [Block]) -> State WriterState Doc tableCellToLaTeX _ (0, _, blocks) = - blockListToLaTeX $ walk fixLineBreaks blocks + blockListToLaTeX $ walk fixLineBreaks $ walk displayMathToInline blocks tableCellToLaTeX header (width, align, blocks) = do modify $ \st -> st{ stInMinipage = True, stNotes = [] } cellContents <- blockListToLaTeX blocks @@ -607,6 +621,7 @@ sectionHeader :: Bool -- True for unnumbered sectionHeader unnumbered ref level lst = do txt <- inlineListToLaTeX lst lab <- text `fmap` toLabel ref + plain <- stringToLaTeX TextString $ foldl (++) "" $ map stringify lst let noNote (Note _) = Str "" noNote x = x let lstNoNotes = walk noNote lst @@ -619,7 +634,12 @@ sectionHeader unnumbered ref level lst = do then return empty else do return $ brackets txtNoNotes - let stuffing = star <> optional <> braces txt + let contents = if render Nothing txt == plain + then braces txt + else braces (text "\\texorpdfstring" + <> braces txt + <> braces (text plain)) + let stuffing = star <> optional <> contents book <- gets stBook opts <- gets stOptions let level' = if book || writerChapters opts then level - 1 else level @@ -663,7 +683,7 @@ sectionHeader unnumbered ref level lst = do inlineListToLaTeX :: [Inline] -- ^ Inlines to convert -> State WriterState Doc inlineListToLaTeX lst = - mapM inlineToLaTeX (fixLineInitialSpaces lst) + mapM inlineToLaTeX (fixBreaks $ fixLineInitialSpaces lst) >>= return . hcat -- nonbreaking spaces (~) in LaTeX don't work after line breaks, -- so we turn nbsps after hard breaks to \hspace commands. @@ -675,6 +695,14 @@ inlineListToLaTeX lst = fixNbsps s = let (ys,zs) = span (=='\160') s in replicate (length ys) hspace ++ [Str zs] hspace = RawInline "latex" "\\hspace*{0.333em}" + -- linebreaks after blank lines cause problems: + fixBreaks [] = [] + fixBreaks ys@(LineBreak : LineBreak : _) = + case span (== LineBreak) ys of + (lbs, rest) -> RawInline "latex" + ("\\\\[" ++ show (length lbs) ++ + "\\baselineskip]") : fixBreaks rest + fixBreaks (y:ys) = y : fixBreaks ys isQuoted :: Inline -> Bool isQuoted (Quoted _ _) = True @@ -724,22 +752,27 @@ inlineToLaTeX (Cite cits lst) = do inlineToLaTeX (Code (_,classes,_) str) = do opts <- gets stOptions + inHeading <- gets stInHeading case () of - _ | writerListings opts -> listingsCode + _ | writerListings opts && not inHeading -> listingsCode | writerHighlight opts && not (null classes) -> highlightCode - | otherwise -> rawCode + | otherwise -> rawCode where listingsCode = do inNote <- gets stInNote when inNote $ modify $ \s -> s{ stVerbInNote = True } - let chr = ((enumFromTo '!' '~') \\ str) !! 0 + let chr = case "!\"&'()*,-./:;?@_" \\ str of + (c:_) -> c + [] -> '!' return $ text $ "\\lstinline" ++ [chr] ++ str ++ [chr] highlightCode = do case highlight formatLaTeXInline ("",classes,[]) str of Nothing -> rawCode Just h -> modify (\st -> st{ stHighlighting = True }) >> return (text h) - rawCode = liftM (text . (\s -> "\\texttt{" ++ s ++ "}")) + rawCode = liftM (text . (\s -> "\\texttt{" ++ escapeSpaces s ++ "}")) $ stringToLaTeX CodeString str + where + escapeSpaces = concatMap (\c -> if c == ' ' then "\\ " else [c]) inlineToLaTeX (Quoted qt lst) = do contents <- inlineListToLaTeX lst csquotes <- liftM stCsquotes get @@ -772,7 +805,7 @@ inlineToLaTeX (RawInline f str) | f == Format "latex" || f == Format "tex" = return $ text str | otherwise = return empty -inlineToLaTeX (LineBreak) = return "\\\\" +inlineToLaTeX (LineBreak) = return $ "\\\\" <> cr inlineToLaTeX Space = return space inlineToLaTeX (Link txt ('#':ident, _)) = do contents <- inlineListToLaTeX txt @@ -801,7 +834,10 @@ inlineToLaTeX (Image _ (source, _)) = do then source else unEscapeString source source'' <- stringToLaTeX URLString source' - return $ "\\includegraphics" <> braces (text source'') + inHeading <- gets stInHeading + return $ + (if inHeading then "\\protect\\includegraphics" else "\\includegraphics") + <> braces (text source'') inlineToLaTeX (Note contents) = do inMinipage <- gets stInMinipage modify (\s -> s{stInNote = True}) diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index f06f1d6cc..dee4d56a4 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -57,12 +57,15 @@ import qualified Data.Text as T type Notes = [[Block]] type Refs = [([Inline], Target)] -data WriterState = WriterState { stNotes :: Notes - , stRefs :: Refs - , stIds :: [String] - , stPlain :: Bool } +data WriterState = WriterState { stNotes :: Notes + , stRefs :: Refs + , stRefShortcutable :: Bool + , stInList :: Bool + , stIds :: [String] + , stPlain :: Bool } instance Default WriterState - where def = WriterState{ stNotes = [], stRefs = [], stIds = [], stPlain = False } + where def = WriterState{ stNotes = [], stRefs = [], stRefShortcutable = True, + stInList = False, stIds = [], stPlain = False } -- | Convert Pandoc to Markdown. writeMarkdown :: WriterOptions -> Pandoc -> String @@ -323,9 +326,9 @@ blockToMarkdown opts (Plain inlines) = do then Just $ writerColumns opts else Nothing let rendered = render colwidth contents - let escapeDelimiter (x:xs) | x `elem` ".()" = '\\':x:xs - | otherwise = x : escapeDelimiter xs - escapeDelimiter [] = [] + let escapeDelimiter (x:xs) | x `elem` (".()" :: String) = '\\':x:xs + | otherwise = x : escapeDelimiter xs + escapeDelimiter [] = [] let contents' = if isEnabled Ext_all_symbols_escapable opts && not (stPlain st) && beginsWithOrderedListMarker rendered then text $ escapeDelimiter rendered @@ -453,7 +456,7 @@ blockToMarkdown opts t@(Table caption aligns widths headers rows) = do $ Pandoc nullMeta [t] return $ nst $ tbl $$ blankline $$ caption'' $$ blankline blockToMarkdown opts (BulletList items) = do - contents <- mapM (bulletListItemToMarkdown opts) items + contents <- inList $ mapM (bulletListItemToMarkdown opts) items return $ cat contents <> blankline blockToMarkdown opts (OrderedList (start,sty,delim) items) = do let start' = if isEnabled Ext_startnum opts then start else 1 @@ -464,13 +467,22 @@ blockToMarkdown opts (OrderedList (start,sty,delim) items) = do let markers' = map (\m -> if length m < 3 then m ++ replicate (3 - length m) ' ' else m) markers - contents <- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $ + contents <- inList $ + mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $ zip markers' items return $ cat contents <> blankline blockToMarkdown opts (DefinitionList items) = do - contents <- mapM (definitionListItemToMarkdown opts) items + contents <- inList $ mapM (definitionListItemToMarkdown opts) items return $ cat contents <> blankline +inList :: State WriterState a -> State WriterState a +inList p = do + oldInList <- gets stInList + modify $ \st -> st{ stInList = True } + res <- p + modify $ \st -> st{ stInList = oldInList } + return res + addMarkdownAttribute :: String -> String addMarkdownAttribute s = case span isTagText $ reverse $ parseTags s of @@ -497,7 +509,12 @@ pipeTable headless aligns rawHeaders rawRows = do AlignCenter -> ':':replicate w '-' ++ ":" AlignRight -> replicate (w + 1) '-' ++ ":" AlignDefault -> replicate (w + 2) '-' - let header = if headless then empty else torow rawHeaders + -- note: pipe tables can't completely lack a + -- header; for a headerless table, we need a header of empty cells. + -- see jgm/pandoc#1996. + let header = if headless + then torow (replicate (length aligns) empty) + else torow rawHeaders let border = nowrap $ text "|" <> hcat (intersperse (text "|") $ map toborder $ zip aligns widths) <> text "|" let body = vcat $ map torow rawRows @@ -677,12 +694,53 @@ getReference label (src, tit) = do -- | Convert list of Pandoc inline elements to markdown. inlineListToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc -inlineListToMarkdown opts lst = - mapM (inlineToMarkdown opts) (avoidBadWraps lst) >>= return . cat - where avoidBadWraps [] = [] - avoidBadWraps (Space:Str (c:cs):xs) - | c `elem` "-*+>" = Str (' ':c:cs) : avoidBadWraps xs - avoidBadWraps (x:xs) = x : avoidBadWraps xs +inlineListToMarkdown opts lst = do + inlist <- gets stInList + go (if inlist then avoidBadWrapsInList lst else lst) + where go [] = return empty + go (i:is) = case i of + (Link _ _) -> case is of + -- If a link is followed by another link or '[' we don't shortcut + (Link _ _):_ -> unshortcutable + Space:(Link _ _):_ -> unshortcutable + Space:(Str('[':_)):_ -> unshortcutable + Space:(RawInline _ ('[':_)):_ -> unshortcutable + Space:(Cite _ _):_ -> unshortcutable + (Cite _ _):_ -> unshortcutable + Str ('[':_):_ -> unshortcutable + (RawInline _ ('[':_)):_ -> unshortcutable + (RawInline _ (' ':'[':_)):_ -> unshortcutable + _ -> shortcutable + _ -> shortcutable + where shortcutable = liftM2 (<>) (inlineToMarkdown opts i) (go is) + unshortcutable = do + iMark <- withState (\s -> s { stRefShortcutable = False }) + (inlineToMarkdown opts i) + modify (\s -> s {stRefShortcutable = True }) + fmap (iMark <>) (go is) + +avoidBadWrapsInList :: [Inline] -> [Inline] +avoidBadWrapsInList [] = [] +avoidBadWrapsInList (Space:Str ('>':cs):xs) = + Str (' ':'>':cs) : avoidBadWrapsInList xs +avoidBadWrapsInList (Space:Str [c]:[]) + | c `elem` ['-','*','+'] = Str [' ', c] : [] +avoidBadWrapsInList (Space:Str [c]:Space:xs) + | c `elem` ['-','*','+'] = Str [' ', c] : Space : avoidBadWrapsInList xs +avoidBadWrapsInList (Space:Str cs:Space:xs) + | isOrderedListMarker cs = Str (' ':cs) : Space : avoidBadWrapsInList xs +avoidBadWrapsInList (Space:Str cs:[]) + | isOrderedListMarker cs = Str (' ':cs) : [] +avoidBadWrapsInList (x:xs) = x : avoidBadWrapsInList xs + +isOrderedListMarker :: String -> Bool +isOrderedListMarker xs = (last xs `elem` ['.',')']) && + isRight (runParserT (anyOrderedListMarker >> eof) + defaultParserState "" xs) + +isRight :: Either a b -> Bool +isRight (Right _) = True +isRight (Left _) = False escapeSpaces :: Inline -> Inline escapeSpaces (Str s) = Str $ substitute " " "\\ " s @@ -692,8 +750,10 @@ escapeSpaces x = x -- | Convert Pandoc inline element to markdown. inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc inlineToMarkdown opts (Span attrs ils) = do + plain <- gets stPlain contents <- inlineListToMarkdown opts ils - return $ if isEnabled Ext_raw_html opts + return $ if not plain && + (isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts) then tagWithAttrs "span" attrs <> contents <> text "</span>" else contents inlineToMarkdown opts (Emph lst) = do @@ -726,13 +786,14 @@ inlineToMarkdown opts (Subscript lst) = do else "<sub>" <> contents <> "</sub>" inlineToMarkdown opts (SmallCaps lst) = do plain <- gets stPlain - if plain - then inlineListToMarkdown opts $ capitalize lst - else do + if not plain && + (isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts) + then do contents <- inlineListToMarkdown opts lst return $ tagWithAttrs "span" - ("",[],[("style","font-variant:small-caps;")]) + ("",[],[("style","font-variant:small-caps;")]) <> contents <> text "</span>" + else inlineListToMarkdown opts $ capitalize lst inlineToMarkdown opts (Quoted SingleQuote lst) = do contents <- inlineListToMarkdown opts lst return $ "‘" <> contents <> "’" @@ -821,8 +882,8 @@ inlineToMarkdown opts (Cite (c:cs) lst) sdoc <- inlineListToMarkdown opts sinlines let k' = text (modekey m ++ "@" ++ k) r = case sinlines of - Str (y:_):_ | y `elem` ",;]@" -> k' <> sdoc - _ -> k' <+> sdoc + Str (y:_):_ | y `elem` (",;]@" :: String) -> k' <> sdoc + _ -> k' <+> sdoc return $ pdoc <+> r modekey SuppressAuthor = "-" modekey _ = "" @@ -838,6 +899,9 @@ inlineToMarkdown opts (Link txt (src, tit)) = do [Str s] | escapeURI s == srcSuffix -> True _ -> False let useRefLinks = writerReferenceLinks opts && not useAuto + shortcutable <- gets stRefShortcutable + let useShortcutRefLinks = shortcutable && + isEnabled Ext_shortcut_reference_links opts ref <- if useRefLinks then getReference txt (src, tit) else return [] reftext <- inlineListToMarkdown opts ref return $ if useAuto @@ -847,7 +911,9 @@ inlineToMarkdown opts (Link txt (src, tit)) = do else if useRefLinks then let first = "[" <> linktext <> "]" second = if txt == ref - then "[]" + then if useShortcutRefLinks + then "" + else "[]" else "[" <> reftext <> "]" in first <> second else if plain diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index 3f392a5d0..b49c60867 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -107,7 +107,7 @@ blockToMediaWiki (Para [Image txt (src,'f':'i':'g':':':tit)]) = do let opt = if null txt then "" else "|alt=" ++ if null tit then capt else tit ++ capt - return $ "[[Image:" ++ src ++ "|frame|none" ++ opt ++ "]]\n" + return $ "[[File:" ++ src ++ "|frame|none" ++ opt ++ "]]\n" blockToMediaWiki (Para inlines) = do tags <- asks useTags @@ -375,14 +375,14 @@ inlineToMediaWiki (RawInline f str) | f == Format "html" = return str | otherwise = return "" -inlineToMediaWiki (LineBreak) = return "<br />" +inlineToMediaWiki (LineBreak) = return "<br />\n" inlineToMediaWiki Space = return " " inlineToMediaWiki (Link txt (src, _)) = do label <- inlineListToMediaWiki txt case txt of - [Str s] | escapeURI s == src -> return src + [Str s] | isURI src && escapeURI s == src -> return src _ -> return $ if isURI src then "[" ++ src ++ " " ++ label ++ "]" else "[[" ++ src' ++ "|" ++ label ++ "]]" @@ -397,7 +397,7 @@ inlineToMediaWiki (Image alt (source, tit)) = do then "" else '|' : alt' else '|' : tit - return $ "[[Image:" ++ source ++ txt ++ "]]" + return $ "[[File:" ++ source ++ txt ++ "]]" inlineToMediaWiki (Note contents) = do contents' <- blockListToMediaWiki contents diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 03f8e8ba4..81bbdaf3f 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -41,7 +41,7 @@ import Control.Applicative ((<$>)) import Text.Pandoc.Options ( WriterOptions(..) ) import Text.Pandoc.Shared ( stringify, readDataFile, fetchItem', warn ) import Text.Pandoc.ImageSize ( imageSize, sizeInPoints ) -import Text.Pandoc.MIME ( getMimeType ) +import Text.Pandoc.MIME ( getMimeType, extensionFromMimeType ) import Text.Pandoc.Definition import Text.Pandoc.Walk import Text.Pandoc.Writers.Shared ( fixDisplayMath ) @@ -51,7 +51,7 @@ import Text.Pandoc.XML import Text.Pandoc.Pretty import qualified Control.Exception as E import Data.Time.Clock.POSIX ( getPOSIXTime ) -import System.FilePath ( takeExtension, takeDirectory ) +import System.FilePath ( takeExtension, takeDirectory, (<.>)) -- | Produce an ODT file from a Pandoc document. writeODT :: WriterOptions -- ^ Writer options @@ -127,23 +127,27 @@ writeODT opts doc@(Pandoc meta _) = do return $ fromArchive archive'' transformPicMath :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline -transformPicMath opts entriesRef (Image lab (src,_)) = do +transformPicMath opts entriesRef (Image lab (src,t)) = do res <- fetchItem' (writerMediaBag opts) (writerSourceURL opts) src case res of Left (_ :: E.SomeException) -> do warn $ "Could not find image `" ++ src ++ "', skipping..." return $ Emph lab - Right (img, _) -> do + Right (img, mbMimeType) -> do let size = imageSize img let (w,h) = fromMaybe (0,0) $ sizeInPoints `fmap` size let tit' = show w ++ "x" ++ show h entries <- readIORef entriesRef - let newsrc = "Pictures/" ++ show (length entries) ++ takeExtension src + let extension = fromMaybe (takeExtension $ takeWhile (/='?') src) + (mbMimeType >>= extensionFromMimeType) + let newsrc = "Pictures/" ++ show (length entries) <.> extension let toLazy = B.fromChunks . (:[]) epochtime <- floor `fmap` getPOSIXTime let entry = toEntry newsrc epochtime $ toLazy img modifyIORef entriesRef (entry:) - return $ Image lab (newsrc, tit') + let fig | "fig:" `isPrefixOf` t = "fig:" + | otherwise = "" + return $ Image lab (newsrc, fig++tit') transformPicMath _ entriesRef (Math t math) = do entries <- readIORef entriesRef let dt = if t == InlineMath then DisplayInline else DisplayBlock diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs index dd359f3f5..5c8ef8c45 100644 --- a/src/Text/Pandoc/Writers/OPML.hs +++ b/src/Text/Pandoc/Writers/OPML.hs @@ -38,7 +38,7 @@ import Text.Pandoc.Writers.HTML (writeHtmlString) import Text.Pandoc.Writers.Markdown (writeMarkdown) import Text.Pandoc.Pretty import Data.Time -import System.Locale (defaultTimeLocale) +import Text.Pandoc.Compat.Locale (defaultTimeLocale) import qualified Text.Pandoc.Builder as B -- | Convert Pandoc document to string in OPML format. @@ -87,4 +87,3 @@ elementToOPML opts (Sec _ _num _ title elements) = | not (null blocks)] in inTags True "outline" attrs $ vcat (map (elementToOPML opts) rest) - diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 773d142f4..aee656413 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE PatternGuards, OverloadedStrings #-} +{-# LANGUAGE PatternGuards, OverloadedStrings, FlexibleContexts #-} {- Copyright (C) 2008-2014 Andrea Rossato <andrea.rossato@ing.unitn.it> and John MacFarlane. @@ -288,6 +288,8 @@ blockToOpenDocument o bs | Plain b <- bs = if null b then return empty else inParagraphTags =<< inlinesToOpenDocument o b + | Para [Image c (s,'f':'i':'g':':':t)] <- bs + = figure c s t | Para b <- bs = if null b then return empty else inParagraphTags =<< inlinesToOpenDocument o b @@ -334,7 +336,7 @@ blockToOpenDocument o bs mapM_ addParaStyle . newPara $ paraHStyles ++ paraStyles captionDoc <- if null c then return empty - else withParagraphStyle o "Caption" [Para c] + else withParagraphStyle o "TableCaption" [Para c] th <- if all null h then return empty else colHeadsToOpenDocument o name (map fst paraHStyles) h @@ -342,6 +344,12 @@ blockToOpenDocument o bs return $ inTags True "table:table" [ ("table:name" , name) , ("table:style-name", name) ] (vcat columns $$ th $$ vcat tr) $$ captionDoc + figure caption source title | null caption = + withParagraphStyle o "Figure" [Para [Image caption (source,title)]] + | otherwise = do + imageDoc <- withParagraphStyle o "FigureWithCaption" [Para [Image caption (source,title)]] + captionDoc <- withParagraphStyle o "FigureCaption" [Para caption] + return $ imageDoc $$ captionDoc colHeadsToOpenDocument :: WriterOptions -> String -> [String] -> [[Block]] -> State WriterState Doc colHeadsToOpenDocument o tn ns hs = @@ -370,7 +378,7 @@ inlineToOpenDocument :: WriterOptions -> Inline -> State WriterState Doc inlineToOpenDocument o ils | Space <- ils = inTextStyle space | Span _ xs <- ils = inlinesToOpenDocument o xs - | LineBreak <- ils = return $ selfClosingTag "text:line-break" [] + | LineBreak <- ils = return $ selfClosingTag "text:line-break" [] <> cr | Str s <- ils = inTextStyle $ handleSpaces $ escapeStringForXML s | Emph l <- ils = withTextStyle Italic $ inlinesToOpenDocument o l | Strong l <- ils = withTextStyle Bold $ inlinesToOpenDocument o l @@ -553,4 +561,3 @@ textStyleAttr s ,("style:font-name-asian" ,"Courier New") ,("style:font-name-complex" ,"Courier New")] | otherwise = [] - diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 57ebfc360..2dd899680 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -48,11 +48,13 @@ import Data.Char (isSpace, toLower) type Refs = [([Inline], Target)] data WriterState = - WriterState { stNotes :: [[Block]] - , stLinks :: Refs - , stImages :: [([Inline], (String, String, Maybe String))] - , stHasMath :: Bool - , stOptions :: WriterOptions + WriterState { stNotes :: [[Block]] + , stLinks :: Refs + , stImages :: [([Inline], (String, String, Maybe String))] + , stHasMath :: Bool + , stHasRawTeX :: Bool + , stOptions :: WriterOptions + , stTopLevel :: Bool } -- | Convert Pandoc to RST. @@ -60,7 +62,8 @@ writeRST :: WriterOptions -> Pandoc -> String writeRST opts document = let st = WriterState { stNotes = [], stLinks = [], stImages = [], stHasMath = False, - stOptions = opts } + stHasRawTeX = False, stOptions = opts, + stTopLevel = True} in evalState (pandocToRST document) st -- | Return RST representation of document. @@ -78,23 +81,32 @@ pandocToRST (Pandoc meta blocks) = do (fmap (render colwidth) . blockListToRST) (fmap (trimr . render colwidth) . inlineListToRST) $ deleteMeta "title" $ deleteMeta "subtitle" meta - body <- blockListToRST blocks + body <- blockListToRST' True $ normalizeHeadings 1 blocks notes <- liftM (reverse . stNotes) get >>= notesToRST -- note that the notes may contain refs, so we do them first refs <- liftM (reverse . stLinks) get >>= refsToRST pics <- liftM (reverse . stImages) get >>= pictRefsToRST hasMath <- liftM stHasMath get + rawTeX <- liftM stHasRawTeX get let main = render colwidth $ foldl ($+$) empty $ [body, notes, refs, pics] let context = defField "body" main $ defField "toc" (writerTableOfContents opts) - $ defField "toc-depth" (writerTOCDepth opts) + $ defField "toc-depth" (show $ writerTOCDepth opts) $ defField "math" hasMath $ defField "title" (render Nothing title :: String) $ defField "math" hasMath + $ defField "rawtex" rawTeX $ metadata if writerStandalone opts then return $ renderTemplate' (writerTemplate opts) context else return main + where + normalizeHeadings lev (Header l a i:bs) = Header lev a i:normalizeHeadings (lev+1) cont ++ normalizeHeadings lev bs' + where (cont,bs') = break (headerLtEq l) bs + headerLtEq level (Header l' _ _) = l' <= level + headerLtEq _ _ = False + normalizeHeadings lev (b:bs) = b:normalizeHeadings lev bs + normalizeHeadings _ [] = [] -- | Return RST representation of reference key table. refsToRST :: Refs -> State WriterState Doc @@ -105,7 +117,7 @@ keyToRST :: ([Inline], (String, String)) -> State WriterState Doc keyToRST (label, (src, _)) = do label' <- inlineListToRST label - let label'' = if ':' `elem` (render Nothing label') + let label'' = if ':' `elem` ((render Nothing label') :: String) then char '`' <> label' <> char '`' else label' return $ nowrap $ ".. _" <> label'' <> ": " <> text src @@ -173,11 +185,11 @@ blockToRST (Para [Image txt (src,'f':'i':'g':':':tit)]) = do capt <- inlineListToRST txt let fig = "figure:: " <> text src let alt = ":alt: " <> if null tit then capt else text tit - return $ hang 3 ".. " $ fig $$ alt $+$ capt $$ blankline + return $ hang 3 ".. " (fig $$ alt $+$ capt) $$ blankline blockToRST (Para inlines) | LineBreak `elem` inlines = do -- use line block if LineBreaks lns <- mapM inlineListToRST $ splitBy (==LineBreak) inlines - return $ (vcat $ map (text "| " <>) lns) <> blankline + return $ (vcat $ map (hang 2 (text "| ")) lns) <> blankline | otherwise = do contents <- inlineListToRST inlines return $ contents <> blankline @@ -188,11 +200,21 @@ blockToRST (RawBlock f@(Format f') str) (nest 3 $ text str) $$ blankline blockToRST HorizontalRule = return $ blankline $$ "--------------" $$ blankline -blockToRST (Header level _ inlines) = do +blockToRST (Header level (name,classes,_) inlines) = do contents <- inlineListToRST inlines - let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1) - let border = text $ replicate (offset contents) headerChar - return $ nowrap $ contents $$ border $$ blankline + isTopLevel <- gets stTopLevel + if isTopLevel + then do + let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1) + let border = text $ replicate (offset contents) headerChar + return $ nowrap $ contents $$ border $$ blankline + else do + let rub = "rubric:: " <> contents + let name' | null name = empty + | otherwise = ":name: " <> text name + let cls | null classes = empty + | otherwise = ":class: " <> text (unwords classes) + return $ nowrap $ hang 3 ".. " (rub $$ name' $$ cls) $$ blankline blockToRST (CodeBlock (_,classes,kvs) str) = do opts <- stOptions <$> get let tabstop = writerTabStop opts @@ -239,8 +261,7 @@ blockToRST (Table caption _ widths headers rows) = do middle = hcat $ intersperse sep' blocks let makeRow = hpipeBlocks . zipWith lblock widthsInChars let head' = makeRow headers' - rows' <- mapM (\row -> do cols <- mapM blockListToRST row - return $ makeRow cols) rows + let rows' = map makeRow rawRows let border ch = char '+' <> char ch <> (hcat $ intersperse (char ch <> char '+' <> char ch) $ map (\l -> text $ replicate l ch) widthsInChars) <> @@ -253,7 +274,7 @@ blockToRST (Table caption _ widths headers rows) = do blockToRST (BulletList items) = do contents <- mapM bulletListItemToRST items -- ensure that sublists have preceding blank line - return $ blankline $$ vcat contents $$ blankline + return $ blankline $$ chomp (vcat contents) $$ blankline blockToRST (OrderedList (start, style', delim) items) = do let markers = if start == 1 && style' == DefaultStyle && delim == DefaultDelim then take (length items) $ repeat "#." @@ -265,11 +286,11 @@ blockToRST (OrderedList (start, style', delim) items) = do contents <- mapM (\(item, num) -> orderedListItemToRST item num) $ zip markers' items -- ensure that sublists have preceding blank line - return $ blankline $$ vcat contents $$ blankline + return $ blankline $$ chomp (vcat contents) $$ blankline blockToRST (DefinitionList items) = do contents <- mapM definitionListItemToRST items -- ensure that sublists have preceding blank line - return $ blankline $$ vcat contents $$ blankline + return $ blankline $$ chomp (vcat contents) $$ blankline -- | Convert bullet list item (list of blocks) to RST. bulletListItemToRST :: [Block] -> State WriterState Doc @@ -295,9 +316,19 @@ definitionListItemToRST (label, defs) = do return $ label' $$ nest tabstop (nestle contents <> cr) -- | Convert list of Pandoc block elements to RST. +blockListToRST' :: Bool + -> [Block] -- ^ List of block elements + -> State WriterState Doc +blockListToRST' topLevel blocks = do + tl <- gets stTopLevel + modify (\s->s{stTopLevel=topLevel}) + res <- vcat `fmap` mapM blockToRST blocks + modify (\s->s{stTopLevel=tl}) + return res + blockListToRST :: [Block] -- ^ List of block elements -> State WriterState Doc -blockListToRST blocks = mapM blockToRST blocks >>= return . vcat +blockListToRST = blockListToRST' False -- | Convert list of Pandoc inline elements to RST. inlineListToRST :: [Inline] -> State WriterState Doc @@ -334,12 +365,12 @@ inlineListToRST lst = okAfterComplex :: Inline -> Bool okAfterComplex Space = True okAfterComplex LineBreak = True - okAfterComplex (Str (c:_)) = isSpace c || c `elem` "-.,:;!?\\/'\")]}>–—" + okAfterComplex (Str (c:_)) = isSpace c || c `elem` ("-.,:;!?\\/'\")]}>–—" :: String) okAfterComplex _ = False okBeforeComplex :: Inline -> Bool okBeforeComplex Space = True okBeforeComplex LineBreak = True - okBeforeComplex (Str (c:_)) = isSpace c || c `elem` "-:/'\"<([{–—" + okBeforeComplex (Str (c:_)) = isSpace c || c `elem` ("-:/'\"<([{–—" :: String) okBeforeComplex _ = False isComplex :: Inline -> Bool isComplex (Emph _) = True @@ -393,6 +424,9 @@ inlineToRST (Math t str) = do else blankline $$ (".. math:: " <> text str) $$ blankline inlineToRST (RawInline f x) | f == "rst" = return $ text x + | f == "latex" || f == "tex" = do + modify $ \st -> st{ stHasRawTeX = True } + return $ ":raw-latex:`" <> text x <> "`" | otherwise = return empty inlineToRST (LineBreak) = return cr -- there's no line break in RST (see Para) inlineToRST Space = return space @@ -427,7 +461,7 @@ inlineToRST (Image alternate (source, tit)) = do return $ "|" <> label <> "|" inlineToRST (Note contents) = do -- add to notes in state - notes <- get >>= return . stNotes + notes <- gets stNotes modify $ \st -> st { stNotes = contents:notes } let ref = show $ (length notes) + 1 return $ " [" <> text ref <> "]_" diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 43405ce3c..dfad4b0e2 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -106,7 +106,9 @@ writeRTF options (Pandoc meta@(Meta metamap) blocks) = $ metadata in if writerStandalone options then renderTemplate' (writerTemplate options) context - else body + else case reverse body of + ('\n':_) -> body + _ -> body ++ "\n" -- | Construct table of contents from list of header blocks. tableOfContents :: [Block] -> String diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index 8ac717bab..792718e95 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -368,7 +368,7 @@ inlineListForNode = return . text . stringToTexinfo . -- periods, commas, colons, and parentheses are disallowed in node names disallowedInNode :: Char -> Bool -disallowedInNode c = c `elem` ".,:()" +disallowedInNode c = c `elem` (".,:()" :: String) -- | Convert inline element to Texinfo inlineToTexinfo :: Inline -- ^ Inline to convert @@ -421,8 +421,8 @@ inlineToTexinfo (RawInline f str) return $ text "@tex" $$ text str $$ text "@end tex" | f == "texinfo" = return $ text str | otherwise = return empty -inlineToTexinfo (LineBreak) = return $ text "@*" -inlineToTexinfo Space = return $ char ' ' +inlineToTexinfo (LineBreak) = return $ text "@*" <> cr +inlineToTexinfo Space = return space inlineToTexinfo (Link txt (src@('#':_), _)) = do contents <- escapeCommas $ inlineListToTexinfo txt |