diff options
Diffstat (limited to 'src/Text/Pandoc/Writers')
-rw-r--r-- | src/Text/Pandoc/Writers/AsciiDoc.hs | 70 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Custom.hs | 34 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docbook.hs | 47 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docx.hs | 37 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/DokuWiki.hs | 475 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/EPUB.hs | 123 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/FB2.hs | 43 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/HTML.hs | 30 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Haddock.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/ICML.hs | 8 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/LaTeX.hs | 24 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Man.hs | 9 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Markdown.hs | 234 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/MediaWiki.hs | 316 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/ODT.hs | 7 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/OpenDocument.hs | 6 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Org.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/RST.hs | 7 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/RTF.hs | 72 |
19 files changed, 1100 insertions, 448 deletions
diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index 19112d8f5..e5b8c5167 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -43,16 +43,19 @@ import Text.Pandoc.Shared import Text.Pandoc.Writers.Shared import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (blankline, space) -import Data.List ( isPrefixOf, intersperse, intercalate ) +import Data.Maybe (fromMaybe) +import Data.List ( stripPrefix, intersperse, intercalate ) import Text.Pandoc.Pretty import Control.Monad.State import qualified Data.Map as M import Data.Aeson (Value(String), fromJSON, toJSON, Result(..)) import qualified Data.Text as T +import Control.Applicative ((<*), (*>)) data WriterState = WriterState { defListMarker :: String , orderedListLevel :: Int , bulletListLevel :: Int + , intraword :: Bool } -- | Convert Pandoc to AsciiDoc. @@ -62,6 +65,7 @@ writeAsciiDoc opts document = defListMarker = "::" , orderedListLevel = 1 , bulletListLevel = 1 + , intraword = False } -- | Return asciidoc representation of document. @@ -123,7 +127,7 @@ blockToAsciiDoc _ Null = return empty blockToAsciiDoc opts (Plain inlines) = do contents <- inlineListToAsciiDoc opts inlines return $ contents <> cr -blockToAsciiDoc opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = +blockToAsciiDoc opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = do blockToAsciiDoc opts (Para [Image alt (src,tit)]) blockToAsciiDoc opts (Para inlines) = do contents <- inlineListToAsciiDoc opts inlines @@ -142,10 +146,10 @@ blockToAsciiDoc opts (Header level (ident,_,_) inlines) = do let len = offset contents -- ident seem to be empty most of the time and asciidoc will generate them automatically -- so lets make them not show up when null - let identifier = if (null ident) then empty else ("[[" <> text ident <> "]]") + let identifier = if (null ident) then empty else ("[[" <> text ident <> "]]") let setext = writerSetextHeaders opts - return $ - (if setext + return $ + (if setext then identifier $$ contents $$ (case level of @@ -155,7 +159,7 @@ blockToAsciiDoc opts (Header level (ident,_,_) inlines) = do 4 -> text $ replicate len '+' _ -> empty) <> blankline else - identifier $$ text (replicate level '=') <> space <> contents <> blankline) + identifier $$ text (replicate level '=') <> space <> contents <> blankline) blockToAsciiDoc _ (CodeBlock (_,classes,_) str) = return $ flush (attrs <> dashes <> space <> attrs <> cr <> text str <> cr <> dashes) <> blankline @@ -317,17 +321,51 @@ blockListToAsciiDoc opts blocks = cat `fmap` mapM (blockToAsciiDoc opts) blocks -- | Convert list of Pandoc inline elements to asciidoc. inlineListToAsciiDoc :: WriterOptions -> [Inline] -> State WriterState Doc -inlineListToAsciiDoc opts lst = - mapM (inlineToAsciiDoc opts) lst >>= return . cat +inlineListToAsciiDoc opts lst = do + oldIntraword <- gets intraword + setIntraword False + result <- go lst + setIntraword oldIntraword + return result + where go [] = return empty + go (y:x:xs) + | not (isSpacy y) = do + y' <- if isSpacy x + then inlineToAsciiDoc opts y + else withIntraword $ inlineToAsciiDoc opts y + x' <- withIntraword $ inlineToAsciiDoc opts x + xs' <- go xs + return (y' <> x' <> xs') + | x /= Space && x /= LineBreak = do + y' <- withIntraword $ inlineToAsciiDoc opts y + xs' <- go (x:xs) + return (y' <> xs') + go (x:xs) = do + x' <- inlineToAsciiDoc opts x + xs' <- go xs + return (x' <> xs') + isSpacy Space = True + isSpacy LineBreak = True + isSpacy _ = False + +setIntraword :: Bool -> State WriterState () +setIntraword b = modify $ \st -> st{ intraword = b } + +withIntraword :: State WriterState a -> State WriterState a +withIntraword p = setIntraword True *> p <* setIntraword False -- | Convert Pandoc inline element to asciidoc. inlineToAsciiDoc :: WriterOptions -> Inline -> State WriterState Doc inlineToAsciiDoc opts (Emph lst) = do contents <- inlineListToAsciiDoc opts lst - return $ "_" <> contents <> "_" + isIntraword <- gets intraword + let marker = if isIntraword then "__" else "_" + return $ marker <> contents <> marker inlineToAsciiDoc opts (Strong lst) = do contents <- inlineListToAsciiDoc opts lst - return $ "*" <> contents <> "*" + isIntraword <- gets intraword + let marker = if isIntraword then "**" else "*" + return $ marker <> contents <> marker inlineToAsciiDoc opts (Strikeout lst) = do contents <- inlineListToAsciiDoc opts lst return $ "[line-through]*" <> contents <> "*" @@ -338,12 +376,10 @@ inlineToAsciiDoc opts (Subscript lst) = do contents <- inlineListToAsciiDoc opts lst return $ "~" <> contents <> "~" inlineToAsciiDoc opts (SmallCaps lst) = inlineListToAsciiDoc opts lst -inlineToAsciiDoc opts (Quoted SingleQuote lst) = do - contents <- inlineListToAsciiDoc opts lst - return $ "`" <> contents <> "'" -inlineToAsciiDoc opts (Quoted DoubleQuote lst) = do - contents <- inlineListToAsciiDoc opts lst - return $ "``" <> contents <> "''" +inlineToAsciiDoc opts (Quoted SingleQuote lst) = + inlineListToAsciiDoc opts (Str "`" : lst ++ [Str "'"]) +inlineToAsciiDoc opts (Quoted DoubleQuote lst) = + inlineListToAsciiDoc opts (Str "``" : lst ++ [Str "''"]) inlineToAsciiDoc _ (Code _ str) = return $ text "`" <> text (escapeStringUsing (backslashEscapes "`") str) <> "`" inlineToAsciiDoc _ (Str str) = return $ text $ escapeString str @@ -366,7 +402,7 @@ inlineToAsciiDoc opts (Link txt (src, _tit)) = do let prefix = if isRelative then text "link:" else empty - let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src + let srcSuffix = fromMaybe src (stripPrefix "mailto:" src) let useAuto = case txt of [Str s] | escapeURI s == srcSuffix -> True _ -> False diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index 88f590c43..914d61850 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -1,4 +1,5 @@ -{-# LANGUAGE OverlappingInstances, FlexibleInstances, OverloadedStrings #-} +{-# LANGUAGE OverlappingInstances, FlexibleInstances, OverloadedStrings, + ScopedTypeVariables #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {- Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu> @@ -35,12 +36,14 @@ import Text.Pandoc.Options import Data.List ( intersperse ) import Data.Char ( toLower ) import Scripting.Lua (LuaState, StackValue, callfunc) +import Text.Pandoc.Writers.Shared import qualified Scripting.Lua as Lua import Text.Pandoc.UTF8 (fromString, toString) import Data.ByteString (ByteString) import qualified Data.ByteString.Char8 as C8 import Data.Monoid import qualified Data.Map as M +import Text.Pandoc.Templates attrToMap :: Attr -> M.Map ByteString ByteString attrToMap (id',classes,keyvals) = M.fromList @@ -128,9 +131,23 @@ instance StackValue MetaValue where valuetype (MetaInlines _) = Lua.TSTRING valuetype (MetaBlocks _) = Lua.TSTRING +instance StackValue Citation where + push lua cit = do + Lua.createtable lua 6 0 + let addValue ((k :: String), v) = Lua.push lua k >> Lua.push lua v >> + Lua.rawset lua (-3) + addValue ("citationId", citationId cit) + addValue ("citationPrefix", citationPrefix cit) + addValue ("citationSuffix", citationSuffix cit) + addValue ("citationMode", show (citationMode cit)) + addValue ("citationNoteNum", citationNoteNum cit) + addValue ("citationHash", citationHash cit) + peek = undefined + valuetype _ = Lua.TTABLE + -- | Convert Pandoc to custom markup. writeCustom :: FilePath -> WriterOptions -> Pandoc -> IO String -writeCustom luaFile opts doc = do +writeCustom luaFile opts doc@(Pandoc meta _) = do luaScript <- C8.unpack `fmap` C8.readFile luaFile lua <- Lua.newstate Lua.openlibs lua @@ -138,8 +155,17 @@ writeCustom luaFile opts doc = do Lua.call lua 0 0 -- TODO - call hierarchicalize, so we have that info rendered <- docToCustom lua opts doc + context <- metaToJSON opts + (fmap toString . blockListToCustom lua) + (fmap toString . inlineListToCustom lua) + meta Lua.close lua - return $ toString rendered + let body = toString rendered + if writerStandalone opts + then do + let context' = setField "body" body context + return $ renderTemplate' (writerTemplate opts) context' + else return body docToCustom :: LuaState -> WriterOptions -> Pandoc -> IO ByteString docToCustom lua opts (Pandoc (Meta metamap) blocks) = do @@ -225,7 +251,7 @@ inlineToCustom lua (Quoted SingleQuote lst) = callfunc lua "SingleQuoted" lst inlineToCustom lua (Quoted DoubleQuote lst) = callfunc lua "DoubleQuoted" lst -inlineToCustom lua (Cite _ lst) = callfunc lua "Cite" lst +inlineToCustom lua (Cite cs lst) = callfunc lua "Cite" lst cs inlineToCustom lua (Code attr str) = callfunc lua "Code" (fromString str) (attrToMap attr) diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index ba6a92a08..b10317506 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE OverloadedStrings, PatternGuards #-} {- Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> @@ -37,8 +37,9 @@ import Text.Pandoc.Writers.Shared import Text.Pandoc.Options import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Readers.TeXMath -import Data.List ( isPrefixOf, intercalate, isSuffixOf ) +import Data.List ( stripPrefix, isPrefixOf, intercalate, isSuffixOf ) import Data.Char ( toLower ) +import Control.Applicative ((<$>)) import Data.Monoid ( Any(..) ) import Text.Pandoc.Highlighting ( languages, languagesByExtension ) import Text.Pandoc.Pretty @@ -293,14 +294,14 @@ inlineToDocbook _ (Code _ str) = inTagsSimple "literal" $ text (escapeStringForXML str) inlineToDocbook opts (Math t str) | isMathML (writerHTMLMathMethod opts) = - case texMathToMathML dt str of - Right r -> inTagsSimple tagtype - $ text $ Xml.ppcElement conf - $ fixNS - $ removeAttr r - Left _ -> inlinesToDocbook opts - $ readTeXMath' t str - | otherwise = inlinesToDocbook opts $ readTeXMath' t str + case writeMathML dt <$> readTeX str of + Right r -> inTagsSimple tagtype + $ text $ Xml.ppcElement conf + $ fixNS + $ removeAttr r + Left _ -> inlinesToDocbook opts + $ texMathToInlines t str + | otherwise = inlinesToDocbook opts $ texMathToInlines t str where (dt, tagtype) = case t of InlineMath -> (DisplayInline,"inlineequation") DisplayMath -> (DisplayBlock,"informalequation") @@ -312,19 +313,19 @@ inlineToDocbook _ (RawInline f x) | f == "html" || f == "docbook" = text x | otherwise = empty inlineToDocbook _ LineBreak = text "\n" inlineToDocbook _ Space = space -inlineToDocbook opts (Link txt (src, _)) = - if isPrefixOf "mailto:" src - then let src' = drop 7 src - emailLink = inTagsSimple "email" $ text $ - escapeStringForXML $ src' - in case txt of - [Str s] | escapeURI s == src' -> emailLink - _ -> inlinesToDocbook opts txt <+> - char '(' <> emailLink <> char ')' - else (if isPrefixOf "#" src - then inTags False "link" [("linkend", drop 1 src)] - else inTags False "ulink" [("url", src)]) $ - inlinesToDocbook opts txt +inlineToDocbook opts (Link txt (src, _)) + | Just email <- stripPrefix "mailto:" src = + let emailLink = inTagsSimple "email" $ text $ + escapeStringForXML $ email + in case txt of + [Str s] | escapeURI s == email -> emailLink + _ -> inlinesToDocbook opts txt <+> + char '(' <> emailLink <> char ')' + | otherwise = + (if isPrefixOf "#" src + then inTags False "link" [("linkend", drop 1 src)] + else inTags False "ulink" [("url", src)]) $ + inlinesToDocbook opts txt inlineToDocbook _ (Image _ (src, tit)) = let titleDoc = if null tit then empty diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 77ee51519..38031b7dc 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -58,7 +58,7 @@ import System.Random (randomRIO) import Text.Printf (printf) import qualified Control.Exception as E import Text.Pandoc.MIME (getMimeType, extensionFromMimeType) -import Control.Applicative ((<|>)) +import Control.Applicative ((<|>), (<$>)) import Data.Maybe (mapMaybe) data ListMarker = NoMarker @@ -481,17 +481,30 @@ writeOpenXML opts (Pandoc meta blocks) = do _ -> [] let auths = docAuthors meta let dat = docDate meta + let abstract' = case lookupMeta "abstract" meta of + Just (MetaBlocks bs) -> bs + Just (MetaInlines ils) -> [Plain ils] + _ -> [] + let subtitle' = case lookupMeta "subtitle" meta of + Just (MetaBlocks [Plain xs]) -> xs + Just (MetaBlocks [Para xs]) -> xs + Just (MetaInlines xs) -> xs + _ -> [] title <- withParaProp (pStyle "Title") $ blocksToOpenXML opts [Para tit | not (null tit)] - authors <- withParaProp (pStyle "Authors") $ blocksToOpenXML opts - [Para (intercalate [LineBreak] auths) | not (null auths)] + subtitle <- withParaProp (pStyle "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')] + authors <- withParaProp (pStyle "Author") $ blocksToOpenXML opts $ + map Para auths date <- withParaProp (pStyle "Date") $ blocksToOpenXML opts [Para dat | not (null dat)] + abstract <- if null abstract' + then return [] + else withParaProp (pStyle "Abstract") $ blocksToOpenXML opts abstract' let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs convertSpace xs = xs let blocks' = bottomUp convertSpace $ blocks doc' <- blocksToOpenXML opts blocks' notes' <- reverse `fmap` gets stFootnotes - let meta' = title ++ authors ++ date + let meta' = title ++ subtitle ++ authors ++ date ++ abstract return (meta' ++ doc', notes') -- | Convert a list of Pandoc blocks to OpenXML. @@ -514,8 +527,11 @@ blockToOpenXML :: WriterOptions -> Block -> WS [Element] blockToOpenXML _ Null = return [] blockToOpenXML opts (Div _ bs) = blocksToOpenXML opts bs blockToOpenXML opts (Header lev (ident,_,_) lst) = do - contents <- withParaProp (pStyle $ "Heading" ++ show lev) $ - blockToOpenXML opts (Para lst) + + paraProps <- withParaProp (pStyle $ "Heading" ++ show lev) $ + getParaProps False + contents <- inlinesToOpenXML opts lst + usedIdents <- gets stSectionIds let bookmarkName = if null ident then uniqueIdent lst usedIdents @@ -525,7 +541,7 @@ blockToOpenXML opts (Header lev (ident,_,_) lst) = do let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id') ,("w:name",bookmarkName)] () let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] () - return $ [bookmarkStart] ++ contents ++ [bookmarkEnd] + return [mknode "w:p" [] (paraProps ++ [bookmarkStart, bookmarkEnd] ++ contents)] blockToOpenXML opts (Plain lst) = withParaProp (pStyle "Compact") $ blockToOpenXML opts (Para lst) -- title beginning with fig: indicates that the image is a figure @@ -751,9 +767,9 @@ inlineToOpenXML opts (Math mathType str) = do let displayType = if mathType == DisplayMath then DisplayBlock else DisplayInline - case texMathToOMML displayType str of + case writeOMML displayType <$> readTeX str of Right r -> return [r] - Left _ -> inlinesToOpenXML opts (readTeXMath' mathType str) + Left _ -> inlinesToOpenXML opts (texMathToInlines mathType str) inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst inlineToOpenXML opts (Code attrs str) = withTextProp (rStyle "VerbatimChar") @@ -814,7 +830,8 @@ inlineToOpenXML opts (Image alt (src, tit)) = do case M.lookup src imgs of Just (_,_,_,elt,_) -> return [elt] Nothing -> do - res <- liftIO $ fetchItem (writerSourceURL opts) src + res <- liftIO $ + fetchItem' (writerMediaBag opts) (writerSourceURL opts) src case res of Left (_ :: E.SomeException) -> do liftIO $ warn $ "Could not find image `" ++ src ++ "', skipping..." diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs new file mode 100644 index 000000000..26f9b5f62 --- /dev/null +++ b/src/Text/Pandoc/Writers/DokuWiki.hs @@ -0,0 +1,475 @@ +{- +Copyright (C) 2008-2014 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.DokuWiki + Copyright : Copyright (C) 2008-2014 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : Clare Macrae <clare.macrae@googlemail.com> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to DokuWiki markup. + +DokuWiki: <https://www.dokuwiki.org/dokuwiki> +-} + +{- + [ ] Implement nested blockquotes (currently only ever does one level) + [ ] Implement alignment of text in tables + [ ] Implement comments + [ ] Work through the Dokuwiki spec, and check I've not missed anything out + [ ] Remove dud/duplicate code +-} + +module Text.Pandoc.Writers.DokuWiki ( writeDokuWiki ) where +import Text.Pandoc.Definition +import Text.Pandoc.Options +import Text.Pandoc.Shared +import Text.Pandoc.Writers.Shared +import Text.Pandoc.Templates (renderTemplate') +import Data.List ( intersect, intercalate, isPrefixOf ) +import Network.URI ( isURI ) +import Control.Monad.State + +data WriterState = WriterState { + stNotes :: Bool -- True if there are notes + , stIndent :: String -- Indent after the marker at the beginning of list items + , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list + } + +-- | Convert Pandoc to DokuWiki. +writeDokuWiki :: WriterOptions -> Pandoc -> String +writeDokuWiki opts document = + evalState (pandocToDokuWiki opts $ normalize document) + (WriterState { stNotes = False, stIndent = "", stUseTags = False }) + +-- | Return DokuWiki representation of document. +pandocToDokuWiki :: WriterOptions -> Pandoc -> State WriterState String +pandocToDokuWiki opts (Pandoc meta blocks) = do + metadata <- metaToJSON opts + (fmap trimr . blockListToDokuWiki opts) + (inlineListToDokuWiki opts) + meta + body <- blockListToDokuWiki opts blocks + notesExist <- get >>= return . stNotes + let notes = if notesExist + then "" -- TODO Was "\n<references />" Check whether I can really remove this: + -- if it is definitely to do with footnotes, can remove this whole bit + else "" + let main = body ++ notes + let context = defField "body" main + $ defField "toc" (writerTableOfContents opts) + $ metadata + if writerStandalone opts + then return $ renderTemplate' (writerTemplate opts) context + else return main + +-- | Escape special characters for DokuWiki. +escapeString :: String -> String +escapeString = substitute "__" "%%__%%" . + substitute "**" "%%**%%" . + substitute "//" "%%//%%" + +-- | Convert Pandoc block element to DokuWiki. +blockToDokuWiki :: WriterOptions -- ^ Options + -> Block -- ^ Block element + -> State WriterState String + +blockToDokuWiki _ Null = return "" + +blockToDokuWiki opts (Div _attrs bs) = do + contents <- blockListToDokuWiki opts bs + return $ contents ++ "\n" + +blockToDokuWiki opts (Plain inlines) = + inlineListToDokuWiki opts inlines + +-- title beginning with fig: indicates that the image is a figure +-- dokuwiki doesn't support captions - so combine together alt and caption into alt +blockToDokuWiki opts (Para [Image txt (src,'f':'i':'g':':':tit)]) = do + capt <- if null txt + then return "" + else (" " ++) `fmap` inlineListToDokuWiki opts txt + let opt = if null txt + then "" + else "|" ++ if null tit then capt else tit ++ capt + return $ "{{:" ++ src ++ opt ++ "}}\n" + +blockToDokuWiki opts (Para inlines) = do + indent <- gets stIndent + useTags <- gets stUseTags + contents <- inlineListToDokuWiki opts inlines + return $ if useTags + then "<HTML><p></HTML>" ++ contents ++ "<HTML></p></HTML>" + else contents ++ if null indent then "\n" else "" + +blockToDokuWiki _ (RawBlock f str) + | f == Format "dokuwiki" = return str + -- See https://www.dokuwiki.org/wiki:syntax + -- use uppercase HTML tag for block-level content: + | f == Format "html" = return $ "<HTML>\n" ++ str ++ "\n</HTML>" + | otherwise = return "" + +blockToDokuWiki _ HorizontalRule = return "\n----\n" + +blockToDokuWiki opts (Header level _ inlines) = do + -- emphasis, links etc. not allowed in headers, apparently, + -- so we remove formatting: + contents <- inlineListToDokuWiki opts $ removeFormatting inlines + let eqs = replicate ( 7 - level ) '=' + return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n" + +blockToDokuWiki _ (CodeBlock (_,classes,_) str) = do + let at = classes `intersect` ["actionscript", "ada", "apache", "applescript", "asm", "asp", + "autoit", "bash", "blitzbasic", "bnf", "c", "c_mac", "caddcl", "cadlisp", "cfdg", "cfm", + "cpp", "cpp-qt", "csharp", "css", "d", "delphi", "diff", "div", "dos", "eiffel", "fortran", + "freebasic", "gml", "groovy", "html4strict", "idl", "ini", "inno", "io", "java", "java5", + "javascript", "latex", "lisp", "lua", "matlab", "mirc", "mpasm", "mysql", "nsis", "objc", + "ocaml", "ocaml-brief", "oobas", "oracle8", "pascal", "perl", "php", "php-brief", "plsql", + "python", "qbasic", "rails", "reg", "robots", "ruby", "sas", "scheme", "sdlbasic", + "smalltalk", "smarty", "sql", "tcl", "", "thinbasic", "tsql", "vb", "vbnet", "vhdl", + "visualfoxpro", "winbatch", "xml", "xpp", "z80"] + let (beg, end) = if null at + then ("<code" ++ if null classes then ">" else " class=\"" ++ unwords classes ++ "\">", "</code>") + else ("<source lang=\"" ++ head at ++ "\">", "</source>") + return $ beg ++ str ++ end + +blockToDokuWiki opts (BlockQuote blocks) = do + contents <- blockListToDokuWiki opts blocks + if isSimpleBlockQuote blocks + then return $ "> " ++ contents + else return $ "<HTML><blockquote>\n" ++ contents ++ "</blockquote></HTML>" + +blockToDokuWiki opts (Table capt aligns _ headers rows') = do + let alignStrings = map alignmentToString aligns + captionDoc <- if null capt + then return "" + else do + c <- inlineListToDokuWiki opts capt + return $ "" ++ c ++ "\n" + head' <- if all null headers + then return "" + else do + hs <- tableHeaderToDokuWiki opts alignStrings 0 headers + return $ hs ++ "\n" + body' <- zipWithM (tableRowToDokuWiki opts alignStrings) [1..] rows' + return $ captionDoc ++ head' ++ + unlines body' + +blockToDokuWiki opts x@(BulletList items) = do + oldUseTags <- get >>= return . stUseTags + indent <- get >>= return . stIndent + let useTags = oldUseTags || not (isSimpleList x) + if useTags + then do + modify $ \s -> s { stUseTags = True } + contents <- mapM (listItemToDokuWiki opts) items + modify $ \s -> s { stUseTags = oldUseTags } + return $ "<HTML><ul></HTML>\n" ++ vcat contents ++ "<HTML></ul></HTML>\n" + else do + modify $ \s -> s { stIndent = stIndent s ++ " " } + contents <- mapM (listItemToDokuWiki opts) items + modify $ \s -> s { stIndent = indent } + return $ vcat contents ++ if null indent then "\n" else "" + +blockToDokuWiki opts x@(OrderedList attribs items) = do + oldUseTags <- get >>= return . stUseTags + indent <- get >>= return . stIndent + let useTags = oldUseTags || not (isSimpleList x) + if useTags + then do + modify $ \s -> s { stUseTags = True } + contents <- mapM (orderedListItemToDokuWiki opts) items + modify $ \s -> s { stUseTags = oldUseTags } + return $ "<HTML><ol" ++ listAttribsToString attribs ++ "></HTML>\n" ++ vcat contents ++ "<HTML></ol></HTML>\n" + else do + modify $ \s -> s { stIndent = stIndent s ++ " " } + contents <- mapM (orderedListItemToDokuWiki opts) items + modify $ \s -> s { stIndent = indent } + return $ vcat contents ++ if null indent then "\n" else "" + +-- TODO Need to decide how to make definition lists work on dokuwiki - I don't think there +-- is a specific representation of them. +-- TODO This creates double '; ; ' if there is a bullet or ordered list inside a definition list +blockToDokuWiki opts x@(DefinitionList items) = do + oldUseTags <- get >>= return . stUseTags + indent <- get >>= return . stIndent + let useTags = oldUseTags || not (isSimpleList x) + if useTags + then do + modify $ \s -> s { stUseTags = True } + contents <- mapM (definitionListItemToDokuWiki opts) items + modify $ \s -> s { stUseTags = oldUseTags } + return $ "<HTML><dl></HTML>\n" ++ vcat contents ++ "<HTML></dl></HTML>\n" + else do + modify $ \s -> s { stIndent = stIndent s ++ " " } + contents <- mapM (definitionListItemToDokuWiki opts) items + modify $ \s -> s { stIndent = indent } + return $ vcat contents ++ if null indent then "\n" else "" + +-- Auxiliary functions for lists: + +-- | Convert ordered list attributes to HTML attribute string +listAttribsToString :: ListAttributes -> String +listAttribsToString (startnum, numstyle, _) = + let numstyle' = camelCaseToHyphenated $ show numstyle + in (if startnum /= 1 + then " start=\"" ++ show startnum ++ "\"" + else "") ++ + (if numstyle /= DefaultStyle + then " style=\"list-style-type: " ++ numstyle' ++ ";\"" + else "") + +-- | Convert bullet list item (list of blocks) to DokuWiki. +listItemToDokuWiki :: WriterOptions -> [Block] -> State WriterState String +listItemToDokuWiki opts items = do + contents <- blockListToDokuWiki opts items + useTags <- get >>= return . stUseTags + if useTags + then return $ "<HTML><li></HTML>" ++ contents ++ "<HTML></li></HTML>" + else do + indent <- get >>= return . stIndent + return $ indent ++ "* " ++ contents + +-- | Convert ordered list item (list of blocks) to DokuWiki. +-- | TODO Emiminate dreadful duplication of text from listItemToDokuWiki +orderedListItemToDokuWiki :: WriterOptions -> [Block] -> State WriterState String +orderedListItemToDokuWiki opts items = do + contents <- blockListToDokuWiki opts items + useTags <- get >>= return . stUseTags + if useTags + then return $ "<HTML><li></HTML>" ++ contents ++ "<HTML></li></HTML>" + else do + indent <- get >>= return . stIndent + return $ indent ++ "- " ++ contents + +-- | Convert definition list item (label, list of blocks) to DokuWiki. +definitionListItemToDokuWiki :: WriterOptions + -> ([Inline],[[Block]]) + -> State WriterState String +definitionListItemToDokuWiki opts (label, items) = do + labelText <- inlineListToDokuWiki opts label + contents <- mapM (blockListToDokuWiki opts) items + useTags <- get >>= return . stUseTags + if useTags + then return $ "<HTML><dt></HTML>" ++ labelText ++ "<HTML></dt></HTML>\n" ++ + (intercalate "\n" $ map (\d -> "<HTML><dd></HTML>" ++ d ++ "<HTML></dd></HTML>") contents) + else do + indent <- get >>= return . stIndent + return $ indent ++ "* **" ++ labelText ++ "** " ++ concat contents + +-- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed. +isSimpleList :: Block -> Bool +isSimpleList x = + case x of + BulletList items -> all isSimpleListItem items + OrderedList (num, sty, _) items -> all isSimpleListItem items && + num == 1 && sty `elem` [DefaultStyle, Decimal] + DefinitionList items -> all isSimpleListItem $ concatMap snd items + _ -> False + +-- | True if list item can be handled with the simple wiki syntax. False if +-- HTML tags will be needed. +isSimpleListItem :: [Block] -> Bool +isSimpleListItem [] = True +isSimpleListItem [x] = + case x of + Plain _ -> True + Para _ -> True + BulletList _ -> isSimpleList x + OrderedList _ _ -> isSimpleList x + DefinitionList _ -> isSimpleList x + _ -> False +isSimpleListItem [x, y] | isPlainOrPara x = + case y of + BulletList _ -> isSimpleList y + OrderedList _ _ -> isSimpleList y + DefinitionList _ -> isSimpleList y + _ -> False +isSimpleListItem _ = False + +isPlainOrPara :: Block -> Bool +isPlainOrPara (Plain _) = True +isPlainOrPara (Para _) = True +isPlainOrPara _ = False + +isSimpleBlockQuote :: [Block] -> Bool +isSimpleBlockQuote [BlockQuote bs] = isSimpleBlockQuote bs +isSimpleBlockQuote [b] = isPlainOrPara b +isSimpleBlockQuote _ = False + +-- | Concatenates strings with line breaks between them. +vcat :: [String] -> String +vcat = intercalate "\n" + +-- Auxiliary functions for tables: + +-- TODO Eliminate copy-and-pasted code in tableHeaderToDokuWiki and tableRowToDokuWiki +tableHeaderToDokuWiki :: WriterOptions + -> [String] + -> Int + -> [[Block]] + -> State WriterState String +tableHeaderToDokuWiki opts alignStrings rownum cols' = do + let celltype = if rownum == 0 then "" else "" + cols'' <- sequence $ zipWith + (\alignment item -> tableItemToDokuWiki opts celltype alignment item) + alignStrings cols' + return $ "^ " ++ "" ++ joinHeaders cols'' ++ " ^" + +tableRowToDokuWiki :: WriterOptions + -> [String] + -> Int + -> [[Block]] + -> State WriterState String +tableRowToDokuWiki opts alignStrings rownum cols' = do + let celltype = if rownum == 0 then "" else "" + cols'' <- sequence $ zipWith + (\alignment item -> tableItemToDokuWiki opts celltype alignment item) + alignStrings cols' + return $ "| " ++ "" ++ joinColumns cols'' ++ " |" + +alignmentToString :: Alignment -> [Char] +alignmentToString alignment = case alignment of + AlignLeft -> "" + AlignRight -> "" + AlignCenter -> "" + AlignDefault -> "" + +tableItemToDokuWiki :: WriterOptions + -> String + -> String + -> [Block] + -> State WriterState String +-- TODO Fix celltype and align' defined but not used +tableItemToDokuWiki opts _celltype _align' item = do + let mkcell x = "" ++ x ++ "" + contents <- blockListToDokuWiki opts item + return $ mkcell contents + +-- | Concatenates columns together. +joinColumns :: [String] -> String +joinColumns = intercalate " | " + +-- | Concatenates headers together. +joinHeaders :: [String] -> String +joinHeaders = intercalate " ^ " + +-- | Convert list of Pandoc block elements to DokuWiki. +blockListToDokuWiki :: WriterOptions -- ^ Options + -> [Block] -- ^ List of block elements + -> State WriterState String +blockListToDokuWiki opts blocks = + mapM (blockToDokuWiki opts) blocks >>= return . vcat + +-- | Convert list of Pandoc inline elements to DokuWiki. +inlineListToDokuWiki :: WriterOptions -> [Inline] -> State WriterState String +inlineListToDokuWiki opts lst = mapM (inlineToDokuWiki opts) lst >>= return . concat + +-- | Convert Pandoc inline element to DokuWiki. +inlineToDokuWiki :: WriterOptions -> Inline -> State WriterState String + +inlineToDokuWiki opts (Span _attrs ils) = do + contents <- inlineListToDokuWiki opts ils + return contents + +inlineToDokuWiki opts (Emph lst) = do + contents <- inlineListToDokuWiki opts lst + return $ "//" ++ contents ++ "//" + +inlineToDokuWiki opts (Strong lst) = do + contents <- inlineListToDokuWiki opts lst + return $ "**" ++ contents ++ "**" + +inlineToDokuWiki opts (Strikeout lst) = do + contents <- inlineListToDokuWiki opts lst + return $ "<del>" ++ contents ++ "</del>" + +inlineToDokuWiki opts (Superscript lst) = do + contents <- inlineListToDokuWiki opts lst + return $ "<sup>" ++ contents ++ "</sup>" + +inlineToDokuWiki opts (Subscript lst) = do + contents <- inlineListToDokuWiki opts lst + return $ "<sub>" ++ contents ++ "</sub>" + +inlineToDokuWiki opts (SmallCaps lst) = inlineListToDokuWiki opts lst + +inlineToDokuWiki opts (Quoted SingleQuote lst) = do + contents <- inlineListToDokuWiki opts lst + return $ "\8216" ++ contents ++ "\8217" + +inlineToDokuWiki opts (Quoted DoubleQuote lst) = do + contents <- inlineListToDokuWiki opts lst + return $ "\8220" ++ contents ++ "\8221" + +inlineToDokuWiki opts (Cite _ lst) = inlineListToDokuWiki opts lst + +inlineToDokuWiki _ (Code _ str) = + -- In dokuwiki, text surrounded by '' is really just a font statement, i.e. <tt>, + -- and so other formatting can be present inside. + -- However, in pandoc, and markdown, inlined code doesn't contain formatting. + -- So I have opted for using %% to disable all formatting inside inline code blocks. + -- This gives the best results when converting from other formats to dokuwiki, even if + -- the resultand code is a little ugly, for short strings that don't contain formatting + -- characters. + -- It does mean that if pandoc could ever read dokuwiki, and so round-trip the format, + -- any formatting inside inlined code blocks would be lost, or presented incorrectly. + return $ "''%%" ++ str ++ "%%''" + +inlineToDokuWiki _ (Str str) = return $ escapeString str + +inlineToDokuWiki _ (Math _ str) = return $ "<math>" ++ str ++ "</math>" + -- note: str should NOT be escaped + +inlineToDokuWiki _ (RawInline f str) + | f == Format "dokuwiki" = return str + | f == Format "html" = return $ "<html>" ++ str ++ "</html>" + | otherwise = return "" + +inlineToDokuWiki _ (LineBreak) = return "\\\\ " + +inlineToDokuWiki _ Space = return " " + +inlineToDokuWiki opts (Link txt (src, _)) = do + label <- inlineListToDokuWiki opts txt + case txt of + [Str s] | "mailto:" `isPrefixOf` src -> return $ "<" ++ s ++ ">" + | escapeURI s == src -> return src + _ -> if isURI src + then return $ "[[" ++ src ++ "|" ++ label ++ "]]" + else return $ "[[" ++ src' ++ "|" ++ label ++ "]]" + where src' = case src of + '/':xs -> xs -- with leading / it's a + _ -> src -- link to a help page +inlineToDokuWiki opts (Image alt (source, tit)) = do + alt' <- inlineListToDokuWiki opts alt + let txt = if (null tit) + then if null alt + then "" + else "|" ++ alt' + else "|" ++ tit + return $ "{{:" ++ source ++ txt ++ "}}" + +inlineToDokuWiki opts (Note contents) = do + contents' <- blockListToDokuWiki opts contents + modify (\s -> s { stNotes = True }) + return $ "((" ++ contents' ++ "))" + -- note - may not work for notes with multiple blocks diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index b6687c330..2aab7701f 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables #-} +{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables, ViewPatterns #-} {- Copyright (C) 2010-2014 John MacFarlane <jgm@berkeley.edu> @@ -29,10 +29,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to EPUB. -} module Text.Pandoc.Writers.EPUB ( writeEPUB ) where -import Data.IORef +import Data.IORef ( IORef, newIORef, readIORef, modifyIORef ) import qualified Data.Map as M import Data.Maybe ( fromMaybe ) -import Data.List ( isInfixOf, intercalate ) +import Data.List ( isPrefixOf, isInfixOf, intercalate ) import System.Environment ( getEnv ) import Text.Printf (printf) import System.FilePath ( (</>), takeExtension, takeFileName ) @@ -40,28 +40,35 @@ import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as B8 import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.SelfContained ( makeSelfContained ) -import Codec.Archive.Zip +import Codec.Archive.Zip ( emptyArchive, addEntryToArchive, eRelativePath, fromEntry , Entry, toEntry, fromArchive) import Control.Applicative ((<$>)) -import Data.Time.Clock.POSIX -import Data.Time -import System.Locale -import Text.Pandoc.Shared hiding ( Element ) -import qualified Text.Pandoc.Shared as Shared +import Data.Time.Clock.POSIX ( getPOSIXTime ) +import Data.Time (getCurrentTime,UTCTime, formatTime) +import System.Locale ( defaultTimeLocale ) +import Text.Pandoc.Shared ( trimr, renderTags', safeRead, uniqueIdent, trim + , normalizeDate, readDataFile, stringify, warn + , hierarchicalize, fetchItem' ) +import qualified Text.Pandoc.Shared as S (Element(..)) import Text.Pandoc.Builder (fromList, setMeta) -import Text.Pandoc.Options +import Text.Pandoc.Options ( WriterOptions(..) + , HTMLMathMethod(..) + , EPUBVersion(..) + , ObfuscationMethod(NoObfuscation) ) import Text.Pandoc.Definition -import Text.Pandoc.Walk -import Control.Monad.State -import Text.XML.Light hiding (ppTopElement) -import Text.Pandoc.UUID -import Text.Pandoc.Writers.HTML -import Text.Pandoc.Writers.Markdown ( writePlain ) +import Text.Pandoc.Walk (walk, walkM) +import Control.Monad.State (modify, get, execState, State, put, evalState) +import Control.Monad (foldM, when, mplus, liftM) +import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs + , strContent, lookupAttr, Node(..), QName(..), parseXML + , onlyElems, node, ppElement) +import Text.Pandoc.UUID (getRandomUUID) +import Text.Pandoc.Writers.HTML (writeHtmlString, writeHtml) import Data.Char ( toLower, isDigit, isAlphaNum ) import Network.URI ( unEscapeString ) import Text.Pandoc.MIME (getMimeType) import qualified Control.Exception as E import Text.Blaze.Html.Renderer.Utf8 (renderHtml) -import Text.HTML.TagSoup +import Text.HTML.TagSoup (Tag(TagOpen), fromAttrib, parseTags) -- A Chapter includes a list of blocks and maybe a section -- number offset. Note, some chapters are unnumbered. The section @@ -87,6 +94,7 @@ data EPUBMetadata = EPUBMetadata{ , epubRights :: Maybe String , epubCoverImage :: Maybe String , epubStylesheet :: Maybe Stylesheet + , epubPageDirection :: ProgressionDirection } deriving Show data Stylesheet = StylesheetPath FilePath @@ -115,6 +123,8 @@ data Title = Title{ , titleType :: Maybe String } deriving Show +data ProgressionDirection = LTR | RTL | Default deriving Show + dcName :: String -> QName dcName n = QName n Nothing (Just "dc") @@ -124,20 +134,15 @@ dcNode = node . dcName opfName :: String -> QName opfName n = QName n Nothing (Just "opf") -plainify :: [Inline] -> String -plainify t = - trimr $ writePlain def{ writerStandalone = False } - $ Pandoc nullMeta [Plain $ walk removeNote t] - -removeNote :: Inline -> Inline -removeNote (Note _) = Str "" -removeNote x = x - toId :: FilePath -> String toId = map (\x -> if isAlphaNum x || x == '-' || x == '_' then x else '_') . takeFileName +removeNote :: Inline -> Inline +removeNote (Note _) = Str "" +removeNote x = x + getEPUBMetadata :: WriterOptions -> Meta -> IO EPUBMetadata getEPUBMetadata opts meta = do let md = metadataFromMeta opts meta @@ -172,7 +177,7 @@ getEPUBMetadata opts meta = do if any (\c -> creatorRole c == Just "aut") $ epubCreator m then return m else do - let authors' = map plainify $ docAuthors meta + let authors' = map stringify $ docAuthors meta let toAuthor name = Creator{ creatorText = name , creatorRole = Just "aut" , creatorFileAs = Nothing } @@ -221,8 +226,8 @@ addMetadataFromXML _ md = md metaValueToString :: MetaValue -> String metaValueToString (MetaString s) = s -metaValueToString (MetaInlines ils) = plainify ils -metaValueToString (MetaBlocks bs) = plainify $ query (:[]) bs +metaValueToString (MetaInlines ils) = stringify ils +metaValueToString (MetaBlocks bs) = stringify bs metaValueToString (MetaBool b) = show b metaValueToString _ = "" @@ -294,6 +299,7 @@ metadataFromMeta opts meta = EPUBMetadata{ , epubRights = rights , epubCoverImage = coverImage , epubStylesheet = stylesheet + , epubPageDirection = pageDirection } where identifiers = getIdentifier meta titles = getTitle meta @@ -316,6 +322,14 @@ metadataFromMeta opts meta = EPUBMetadata{ stylesheet = (StylesheetContents <$> writerEpubStylesheet opts) `mplus` ((StylesheetPath . metaValueToString) <$> lookupMeta "stylesheet" meta) + pageDirection = maybe Default stringToPageDirection + (lookupMeta "page-progression-direction" meta) + stringToPageDirection (metaValueToString -> s) = + case s of + "ltr" -> LTR + "rtl" -> RTL + _ -> Default + -- | Produce an EPUB file from a Pandoc document. writeEPUB :: WriterOptions -- ^ Writer options @@ -339,7 +353,7 @@ writeEPUB opts doc@(Pandoc meta _) = do if epub3 then MathML Nothing else writerHTMLMathMethod opts - , writerWrapText = False } + , writerWrapText = True } metadata <- getEPUBMetadata opts' meta -- cover page @@ -366,7 +380,8 @@ writeEPUB opts doc@(Pandoc meta _) = do walkM (transformBlock opts' mediaRef) pics <- readIORef mediaRef let readPicEntry entries (oldsrc, newsrc) = do - res <- fetchItem (writerSourceURL opts') oldsrc + res <- fetchItem' (writerMediaBag opts') + (writerSourceURL opts') oldsrc case res of Left _ -> do warn $ "Could not find media `" ++ oldsrc ++ "', skipping..." @@ -379,6 +394,12 @@ writeEPUB opts doc@(Pandoc meta _) = do let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f fontEntries <- mapM mkFontEntry $ writerEpubFonts opts' + -- set page progression direction + let progressionDirection = case epubPageDirection metadata of + LTR -> "ltr" + RTL -> "rtl" + Default -> "default" + -- body pages -- add level 1 header to beginning if none there @@ -468,7 +489,7 @@ writeEPUB opts doc@(Pandoc meta _) = do [] -> case epubTitle metadata of [] -> "UNTITLED" (x:_) -> titleText x - x -> plainify x + x -> stringify x let uuid = case epubIdentifier metadata of (x:_) -> identifierText x -- use first identifier as UUID [] -> error "epubIdentifier is null" -- shouldn't happen @@ -498,7 +519,8 @@ writeEPUB opts doc@(Pandoc meta _) = do (pictureNode x)]) ++ map pictureNode picEntries ++ map fontNode fontEntries - , unode "spine" ! [("toc","ncx")] $ + , unode "spine" ! [("toc","ncx") + ,("page-progression-direction", progressionDirection)] $ case epubCoverImage metadata of Nothing -> [] Just _ -> [ unode "itemref" ! @@ -529,25 +551,25 @@ writeEPUB opts doc@(Pandoc meta _) = do let tocLevel = writerTOCDepth opts let navPointNode :: (Int -> String -> String -> [Element] -> Element) - -> Shared.Element -> State Int Element - navPointNode formatter (Sec _ nums (ident,_,_) ils children) = do + -> S.Element -> State Int Element + navPointNode formatter (S.Sec _ nums (ident,_,_) ils children) = do n <- get modify (+1) let showNums :: [Int] -> String showNums = intercalate "." . map show - let tit' = plainify ils + let tit' = stringify ils let tit = if writerNumberSections opts && not (null nums) then showNums nums ++ " " ++ tit' else tit' let src = case lookup ident reftable of Just x -> x Nothing -> error (ident ++ " not found in reftable") - let isSec (Sec lev _ _ _ _) = lev <= tocLevel + let isSec (S.Sec lev _ _ _ _) = lev <= tocLevel isSec _ = False let subsecs = filter isSec children subs <- mapM (navPointNode formatter) subsecs return $ formatter n tit src subs - navPointNode _ (Blk _) = error "navPointNode encountered Blk" + navPointNode _ (S.Blk _) = error "navPointNode encountered Blk" let navMapFormatter :: Int -> String -> String -> [Element] -> Element navMapFormatter n tit src subs = unode "navPoint" ! @@ -558,7 +580,7 @@ writeEPUB opts doc@(Pandoc meta _) = do ] ++ subs let tpNode = unode "navPoint" ! [("id", "navPoint-0")] $ - [ unode "navLabel" $ unode "text" (plainify $ docTitle meta) + [ unode "navLabel" $ unode "text" (stringify $ docTitle meta) , unode "content" ! [("src","title_page.xhtml")] $ () ] let tocData = UTF8.fromStringLazy $ ppTopElement $ @@ -587,7 +609,7 @@ writeEPUB opts doc@(Pandoc meta _) = do navXhtmlFormatter n tit src subs = unode "li" ! [("id", "toc-li-" ++ show n)] $ (unode "a" ! [("href",src)] - $ (unode "span" tit)) + $ tit) : case subs of [] -> [] (_:_) -> [unode "ol" ! [("class","toc")] $ subs] @@ -751,7 +773,7 @@ transformTag :: WriterOptions -> Tag String -> IO (Tag String) transformTag opts mediaRef tag@(TagOpen name attr) - | name == "video" || name == "source" || name == "img" = do + | name `elem` ["video", "source", "img", "audio"] = do let src = fromAttrib "src" tag let poster = fromAttrib "poster" tag let oldsrc = maybe src (</> src) $ writerSourceURL opts @@ -784,7 +806,7 @@ transformBlock opts mediaRef (RawBlock fmt raw) | fmt == Format "html" = do let tags = parseTags raw tags' <- mapM (transformTag opts mediaRef) tags - return $ RawBlock fmt (renderTags tags') + return $ RawBlock fmt (renderTags' tags') transformBlock _ _ b = return b transformInline :: WriterOptions @@ -798,8 +820,13 @@ transformInline opts mediaRef (Image lab (src,tit)) = do return $ Image lab (newsrc, tit) transformInline opts _ (x@(Math _ _)) | WebTeX _ <- writerHTMLMathMethod opts = do - raw <- makeSelfContained Nothing $ writeHtmlInline opts x + raw <- makeSelfContained opts $ writeHtmlInline opts x return $ RawInline (Format "html") raw +transformInline opts mediaRef (RawInline fmt raw) + | fmt == Format "html" = do + let tags = parseTags raw + tags' <- mapM (transformTag opts mediaRef) tags + return $ RawInline fmt (renderTags' tags') transformInline _ _ x = return x writeHtmlInline :: WriterOptions -> Inline -> String @@ -825,11 +852,11 @@ ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . unEntity (x:xs) = x : unEntity xs mediaTypeOf :: FilePath -> Maybe String -mediaTypeOf x = case getMimeType x of - Just y@('i':'m':'a':'g':'e':_) -> Just y - Just y@('v':'i':'d':'e':'o':_) -> Just y - Just y@('a':'u':'d':'i':'o':_) -> Just y - _ -> Nothing +mediaTypeOf x = + let mediaPrefixes = ["image", "video", "audio"] in + case getMimeType x of + Just y | any (`isPrefixOf` y) mediaPrefixes -> Just y + _ -> Nothing data IdentState = IdentState{ chapterNumber :: Int, diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 803617f95..233b8b32b 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -1,3 +1,5 @@ +{-# LANGUAGE PatternGuards #-} + {- Copyright (c) 2011-2012, Sergey Astanin All rights reserved. @@ -28,8 +30,8 @@ module Text.Pandoc.Writers.FB2 (writeFB2) where import Control.Monad.State (StateT, evalStateT, get, modify) import Control.Monad.State (liftM, liftM2, liftIO) import Data.ByteString.Base64 (encode) -import Data.Char (toUpper, toLower, isSpace, isAscii, isControl) -import Data.List (intersperse, intercalate, isPrefixOf) +import Data.Char (toLower, isSpace, isAscii, isControl) +import Data.List (intersperse, intercalate, isPrefixOf, stripPrefix) import Data.Either (lefts, rights) import Network.Browser (browse, request, setAllowRedirects, setOutHandler) import Network.HTTP (catchIO_, getRequest, getHeaders, getResponseBody) @@ -44,8 +46,7 @@ import qualified Text.XML.Light.Cursor as XC import Text.Pandoc.Definition import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..), def) -import Text.Pandoc.Shared (orderedListMarkers, isHeaderBlock) -import Text.Pandoc.Walk +import Text.Pandoc.Shared (orderedListMarkers, isHeaderBlock, capitalize) -- | Data to be written at the end of the document: -- (foot)notes, URLs, references, images. @@ -253,22 +254,21 @@ readDataURI :: String -- ^ URI -> Maybe (String,String,Bool,String) -- ^ Maybe (mime,charset,isBase64,data) readDataURI uri = - let prefix = "data:" - in if not (prefix `isPrefixOf` uri) - then Nothing - else - let rest = drop (length prefix) uri - meta = takeWhile (/= ',') rest -- without trailing ',' - uridata = drop (length meta + 1) rest - parts = split (== ';') meta - (mime,cs,enc)=foldr upd ("text/plain","US-ASCII",False) parts - in Just (mime,cs,enc,uridata) + case stripPrefix "data:" uri of + Nothing -> Nothing + Just rest -> + let meta = takeWhile (/= ',') rest -- without trailing ',' + uridata = drop (length meta + 1) rest + parts = split (== ';') meta + (mime,cs,enc)=foldr upd ("text/plain","US-ASCII",False) parts + in Just (mime,cs,enc,uridata) + where upd str m@(mime,cs,enc) - | isMimeType str = (str,cs,enc) - | "charset=" `isPrefixOf` str = (mime,drop (length "charset=") str,enc) - | str == "base64" = (mime,cs,True) - | otherwise = m + | isMimeType str = (str,cs,enc) + | Just str' <- stripPrefix "charset=" str = (mime,str',enc) + | str == "base64" = (mime,cs,True) + | otherwise = m -- Without parameters like ;charset=...; see RFC 2045, 5.1 isMimeType :: String -> Bool @@ -296,7 +296,6 @@ fetchURL url = do let content_type = lookupHeader HdrContentType (getHeaders r) content <- liftM (Just . toStr . encode . toBS) . getResponseBody $ Right r return $ liftM2 (,) content_type content - where toBS :: String -> B.ByteString toBS = B.pack . map (toEnum . fromEnum) @@ -421,10 +420,6 @@ indent = indentBlock indentLines ins = let lns = split isLineBreak ins :: [[Inline]] in intercalate [LineBreak] $ map ((Str spacer):) lns -capitalize :: Inline -> Inline -capitalize (Str xs) = Str $ map toUpper xs -capitalize x = x - -- | Convert a Pandoc's Inline element to FictionBook XML representation. toXml :: Inline -> FBM [Content] toXml (Str s) = return [txt s] @@ -434,7 +429,7 @@ toXml (Strong ss) = list `liftM` wrap "strong" ss toXml (Strikeout ss) = list `liftM` wrap "strikethrough" ss toXml (Superscript ss) = list `liftM` wrap "sup" ss toXml (Subscript ss) = list `liftM` wrap "sub" ss -toXml (SmallCaps ss) = cMapM toXml $ walk capitalize ss +toXml (SmallCaps ss) = cMapM toXml $ capitalize ss toXml (Quoted SingleQuote ss) = do -- FIXME: should be language-specific inner <- cMapM toXml ss return $ [txt "‘"] ++ inner ++ [txt "’"] diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 9a26cf2ac..36ce2ba21 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -63,6 +63,7 @@ import Text.XML.Light.Output import System.FilePath (takeExtension) import Data.Monoid import Data.Aeson (Value) +import Control.Applicative ((<$>)) data WriterState = WriterState { stNotes :: [Html] -- ^ List of notes @@ -235,6 +236,9 @@ showSecNum = concat . intersperse "." . map show -- | Converts an Element to a list item for a table of contents, -- retrieving the appropriate identifier from state. elementToListItem :: WriterOptions -> Element -> State WriterState (Maybe Html) +-- Don't include the empty headers created in slide shows +-- shows when an hrule is used to separate slides without a new title: +elementToListItem _ (Sec _ _ _ [Str "\0"] _) = return Nothing elementToListItem opts (Sec lev num (id',classes,_) headerText subsecs) | lev <= writerTOCDepth opts = do let num' = zipWith (+) num (writerNumberOffset opts ++ repeat 0) @@ -429,9 +433,11 @@ blockToHtml opts (Div attr@(_,classes,_) bs) = do let contents' = nl opts >> contents >> nl opts return $ if "notes" `elem` classes - then case writerSlideVariant opts of - RevealJsSlides -> addAttrs opts attr $ H5.aside $ contents' - NoSlides -> addAttrs opts attr $ H.div $ contents' + then let opts' = opts{ writerIncremental = False } in + -- we don't want incremental output inside speaker notes + case writerSlideVariant opts of + RevealJsSlides -> addAttrs opts' attr $ H5.aside $ contents' + NoSlides -> addAttrs opts' attr $ H.div $ contents' _ -> mempty else addAttrs opts attr $ H.div $ contents' blockToHtml _ (RawBlock f str) @@ -698,18 +704,18 @@ inlineToHtml opts inline = else DisplayBlock let conf = useShortEmptyTags (const False) defaultConfigPP - case texMathToMathML dt str of - Right r -> return $ preEscapedString $ - ppcElement conf r - Left _ -> inlineListToHtml opts - (readTeXMath' t str) >>= return . - (H.span ! A.class_ "math") + case writeMathML dt <$> readTeX str of + Right r -> return $ preEscapedString $ + ppcElement conf r + Left _ -> inlineListToHtml opts + (texMathToInlines t str) >>= + return . (H.span ! A.class_ "math") MathJax _ -> return $ H.span ! A.class_ "math" $ toHtml $ case t of InlineMath -> "\\(" ++ str ++ "\\)" DisplayMath -> "\\[" ++ str ++ "\\]" PlainMath -> do - x <- inlineListToHtml opts (readTeXMath' t str) + x <- inlineListToHtml opts (texMathToInlines t str) let m = H.span ! A.class_ "math" $ x let brtag = if writerHtml5 opts then H5.br else H.br return $ case t of @@ -723,10 +729,6 @@ inlineToHtml opts inline = _ -> return mempty | f == Format "html" -> return $ preEscapedString str | otherwise -> return mempty - (Link [Str str] (s,_)) | "mailto:" `isPrefixOf` s && - s == escapeURI ("mailto" ++ str) -> - -- autolink - return $ obfuscateLink opts str s (Link txt (s,_)) | "mailto:" `isPrefixOf` s -> do linkText <- inlineListToHtml opts txt return $ obfuscateLink opts (renderHtml linkText) s diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs index 1c82839d0..14f398da9 100644 --- a/src/Text/Pandoc/Writers/Haddock.hs +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -39,7 +39,7 @@ import Text.Pandoc.Options import Data.List ( intersperse, transpose ) import Text.Pandoc.Pretty import Control.Monad.State -import Text.Pandoc.Readers.TeXMath (readTeXMath') +import Text.Pandoc.Readers.TeXMath (texMathToInlines) import Network.URI (isURI) import Data.Default @@ -319,7 +319,7 @@ inlineToHaddock opts (Math mt str) = do let adjust x = case mt of DisplayMath -> cr <> x <> cr InlineMath -> x - adjust `fmap` (inlineListToHaddock opts $ readTeXMath' mt str) + adjust `fmap` (inlineListToHaddock opts $ texMathToInlines mt str) inlineToHaddock _ (RawInline f str) | f == "haddock" = return $ text str | otherwise = return empty diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs index 19d486b25..ae20efd4b 100644 --- a/src/Text/Pandoc/Writers/ICML.hs +++ b/src/Text/Pandoc/Writers/ICML.hs @@ -42,7 +42,7 @@ type WS a = State WriterState a defaultWriterState :: WriterState defaultWriterState = WriterState{ - blockStyles = Set.empty + blockStyles = Set.empty , inlineStyles = Set.empty , links = [] , listDepth = 1 @@ -267,7 +267,7 @@ hyperlinksToDoc (x:xs) = hyp x $$ hyperlinksToDoc xs $ inTags False "BorderColor" [("type","enumeration")] (text "Black") $$ (inTags False "Destination" [("type","object")] $ text $ "HyperlinkURLDestination/"++(escapeStringForXML url)) - + -- | Convert a list of Pandoc blocks to ICML. blocksToICML :: WriterOptions -> Style -> [Block] -> WS Doc @@ -352,7 +352,7 @@ listItemsToICML opts listType style attribs (first:rest) = do -- | Convert a list of blocks to ICML list items. listItemToICML :: WriterOptions -> Style -> Bool-> Maybe ListAttributes -> [Block] -> WS Doc listItemToICML opts style isFirst attribs item = - let makeNumbStart (Just (beginsWith, numbStl, _)) = + let makeNumbStart (Just (beginsWith, numbStl, _)) = let doN DefaultStyle = [] doN LowerRoman = [lowerRomanName] doN UpperRoman = [upperRomanName] @@ -467,7 +467,7 @@ parStyle opts style lst = -- | Wrap a Doc in an ICML Character Style. charStyle :: Style -> Doc -> WS Doc -charStyle style content = +charStyle style content = let (stlStr, attrs) = styleToStrAttr style doc = inTags True "CharacterStyleRange" attrs $ inTagsSimple "Content" $ flush content in do diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index f2f7438c4..d140932a7 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -1,4 +1,5 @@ -{-# LANGUAGE OverloadedStrings, ScopedTypeVariables #-} +{-# LANGUAGE OverloadedStrings, ScopedTypeVariables, + PatternGuards #-} {- Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> @@ -37,7 +38,7 @@ import Text.Pandoc.Options import Text.Pandoc.Templates import Text.Printf ( printf ) import Network.URI ( isURI, unEscapeString ) -import Data.List ( (\\), isSuffixOf, isInfixOf, +import Data.List ( (\\), isSuffixOf, isInfixOf, stripPrefix, isPrefixOf, intercalate, intersperse ) import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, ord ) import Data.Maybe ( fromMaybe ) @@ -220,6 +221,7 @@ stringToLaTeX ctx (x:xs) = do '>' -> "\\textgreater{}" ++ rest '[' -> "{[}" ++ rest -- to avoid interpretation as ']' -> "{]}" ++ rest -- optional arguments + '\'' | ctx == CodeString -> "\\textquotesingle{}" ++ rest '\160' -> "~" ++ rest '\x2026' -> "\\ldots{}" ++ rest '\x2018' | ligatures -> "`" ++ rest @@ -470,19 +472,18 @@ blockToLaTeX (Table caption aligns widths heads rows) = do captionText <- inlineListToLaTeX caption let capt = if isEmpty captionText then empty - else text "\\addlinespace" - $$ text "\\caption" <> braces captionText + else text "\\caption" <> braces captionText <> "\\\\" rows' <- mapM (tableRowToLaTeX False aligns widths) rows let colDescriptors = text $ concat $ map toColDescriptor aligns modify $ \s -> s{ stTable = True } return $ "\\begin{longtable}[c]" <> braces ("@{}" <> colDescriptors <> "@{}") -- the @{} removes extra space at beginning and end + $$ capt $$ "\\toprule\\addlinespace" $$ headers $$ vcat rows' $$ "\\bottomrule" - $$ capt $$ "\\end{longtable}" toColDescriptor :: Alignment -> String @@ -742,7 +743,7 @@ inlineToLaTeX (Quoted qt lst) = do else char '\x2018' <> inner <> char '\x2019' inlineToLaTeX (Str str) = liftM text $ stringToLaTeX TextString str inlineToLaTeX (Math InlineMath str) = - return $ char '$' <> text str <> char '$' + return $ "\\(" <> text str <> "\\)" inlineToLaTeX (Math DisplayMath str) = return $ "\\[" <> text str <> "\\]" inlineToLaTeX (RawInline f str) @@ -757,10 +758,17 @@ inlineToLaTeX (Link txt ('#':ident, _)) = do return $ text "\\hyperref" <> brackets (text lab) <> braces contents inlineToLaTeX (Link txt (src, _)) = case txt of - [Str x] | x == src -> -- autolink + [Str x] | escapeURI x == src -> -- autolink do modify $ \s -> s{ stUrl = True } - src' <- stringToLaTeX URLString x + src' <- stringToLaTeX URLString src return $ text $ "\\url{" ++ src' ++ "}" + [Str x] | Just rest <- stripPrefix "mailto:" src, + escapeURI x == rest -> -- email autolink + do modify $ \s -> s{ stUrl = True } + src' <- stringToLaTeX URLString src + contents <- inlineListToLaTeX txt + return $ "\\href" <> braces (text src') <> + braces ("\\nolinkurl" <> braces contents) _ -> do contents <- inlineListToLaTeX txt src' <- stringToLaTeX URLString src return $ text ("\\href{" ++ src' ++ "}{") <> diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 41eb3e5be..6b2c4c200 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -36,7 +36,8 @@ import Text.Pandoc.Writers.Shared import Text.Pandoc.Options import Text.Pandoc.Readers.TeXMath import Text.Printf ( printf ) -import Data.List ( isPrefixOf, intersperse, intercalate ) +import Data.List ( stripPrefix, intersperse, intercalate ) +import Data.Maybe (fromMaybe) import Text.Pandoc.Pretty import Text.Pandoc.Builder (deleteMeta) import Control.Monad.State @@ -331,9 +332,9 @@ inlineToMan _ (Code _ str) = return $ text $ "\\f[C]" ++ escapeCode str ++ "\\f[]" inlineToMan _ (Str str) = return $ text $ escapeString str inlineToMan opts (Math InlineMath str) = - inlineListToMan opts $ readTeXMath' InlineMath str + inlineListToMan opts $ texMathToInlines InlineMath str inlineToMan opts (Math DisplayMath str) = do - contents <- inlineListToMan opts $ readTeXMath' DisplayMath str + contents <- inlineListToMan opts $ texMathToInlines DisplayMath str return $ cr <> text ".RS" $$ contents $$ text ".RE" inlineToMan _ (RawInline f str) | f == Format "man" = return $ text str @@ -343,7 +344,7 @@ inlineToMan _ (LineBreak) = return $ inlineToMan _ Space = return space inlineToMan opts (Link txt (src, _)) = do linktext <- inlineListToMan opts txt - let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src + let srcSuffix = fromMaybe src (stripPrefix "mailto:" src) return $ case txt of [Str s] | escapeURI s == srcSuffix -> diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index a67271a5d..95d4db29b 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -37,16 +37,17 @@ import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Shared import Text.Pandoc.Writers.Shared import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (blankline, char, space) -import Data.List ( group, isPrefixOf, find, intersperse, transpose, sortBy ) +import Text.Pandoc.Parsing hiding (blankline, blanklines, char, space) +import Data.Maybe (fromMaybe) +import Data.List ( group, stripPrefix, find, intersperse, transpose, sortBy ) import Data.Char ( isSpace, isPunctuation ) import Data.Ord ( comparing ) import Text.Pandoc.Pretty import Control.Monad.State import qualified Data.Set as Set import Text.Pandoc.Writers.HTML (writeHtmlString) -import Text.Pandoc.Readers.TeXMath (readTeXMath') -import Text.HTML.TagSoup (renderTags, parseTags, isTagText, Tag(..)) +import Text.Pandoc.Readers.TeXMath (texMathToInlines) +import Text.HTML.TagSoup (parseTags, isTagText, Tag(..)) import Network.URI (isURI) import Data.Default import Data.Yaml (Value(Object,String,Array,Bool,Number)) @@ -77,26 +78,15 @@ writePlain :: WriterOptions -> Pandoc -> String writePlain opts document = evalState (pandocToMarkdown opts{ writerExtensions = Set.delete Ext_escaped_line_breaks $ + Set.delete Ext_pipe_tables $ + Set.delete Ext_raw_html $ + Set.delete Ext_markdown_in_html_blocks $ + Set.delete Ext_raw_tex $ + Set.delete Ext_footnotes $ + Set.delete Ext_tex_math_dollars $ + Set.delete Ext_citations $ writerExtensions opts } - document') def{ stPlain = True } - where document' = plainify document - -plainify :: Pandoc -> Pandoc -plainify = walk go - where go :: Inline -> Inline - go (Emph xs) = SmallCaps xs - go (Strong xs) = SmallCaps xs - go (Strikeout xs) = SmallCaps xs - go (Superscript xs) = SmallCaps xs - go (Subscript xs) = SmallCaps xs - go (SmallCaps xs) = SmallCaps xs - go (Code _ s) = Str s - go (Math _ s) = Str s - go (RawInline _ _) = Str "" - go (Link xs _) = SmallCaps xs - go (Image xs _) = SmallCaps $ [Str "["] ++ xs ++ [Str "]"] - go (Cite _ cits) = SmallCaps cits - go x = x + document) def{ stPlain = True } pandocTitleBlock :: Doc -> [Doc] -> Doc -> Doc pandocTitleBlock tit auths dat = @@ -187,7 +177,7 @@ pandocToMarkdown opts (Pandoc meta blocks) = do then tableOfContents opts headerBlocks else empty -- Strip off final 'references' header if markdown citations enabled - let blocks' = if not isPlain && isEnabled Ext_citations opts + let blocks' = if isEnabled Ext_citations opts then case reverse blocks of (Div (_,["references"],_) _):xs -> reverse xs _ -> blocks @@ -308,12 +298,12 @@ blockToMarkdown :: WriterOptions -- ^ Options -> State WriterState Doc blockToMarkdown _ Null = return empty blockToMarkdown opts (Div attrs ils) = do - isPlain <- gets stPlain contents <- blockListToMarkdown opts ils - return $ if isPlain || not (isEnabled Ext_markdown_in_html_blocks opts) - then contents <> blankline - else tagWithAttrs "div" attrs <> blankline <> + return $ if isEnabled Ext_raw_html opts && + isEnabled Ext_markdown_in_html_blocks opts + then tagWithAttrs "div" attrs <> blankline <> contents <> blankline <> "</div>" <> blankline + else contents <> blankline blockToMarkdown opts (Plain inlines) = do contents <- inlineListToMarkdown opts inlines -- escape if para starts with ordered list marker @@ -337,21 +327,22 @@ blockToMarkdown opts (Para inlines) = (<> blankline) `fmap` blockToMarkdown opts (Plain inlines) blockToMarkdown opts (RawBlock f str) | f == "html" = do - st <- get - if stPlain st - then return empty - else return $ if isEnabled Ext_markdown_attribute opts + plain <- gets stPlain + return $ if plain + then empty + else if isEnabled Ext_markdown_attribute opts then text (addMarkdownAttribute str) <> text "\n" else text str <> text "\n" | f `elem` ["latex", "tex", "markdown"] = do - st <- get - if stPlain st - then return empty - else return $ text str <> text "\n" + plain <- gets stPlain + return $ if plain + then empty + else text str <> text "\n" blockToMarkdown _ (RawBlock _ _) = return empty -blockToMarkdown _ HorizontalRule = - return $ blankline <> text "* * * * *" <> blankline +blockToMarkdown opts HorizontalRule = do + return $ blankline <> text (replicate (writerColumns opts) '-') <> blankline blockToMarkdown opts (Header level attr inlines) = do + plain <- gets stPlain -- we calculate the id that would be used by auto_identifiers -- so we know whether to print an explicit identifier ids <- gets stIds @@ -366,19 +357,23 @@ blockToMarkdown opts (Header level attr inlines) = do _ | isEnabled Ext_header_attributes opts -> space <> attrsToMarkdown attr | otherwise -> empty - contents <- inlineListToMarkdown opts inlines - st <- get + contents <- inlineListToMarkdown opts $ + if level == 1 && plain + then capitalize inlines + else inlines let setext = writerSetextHeaders opts return $ nowrap $ case level of - 1 | setext -> + 1 | plain -> blanklines 3 <> contents <> blanklines 2 + | setext -> contents <> attr' <> cr <> text (replicate (offset contents) '=') <> blankline - 2 | setext -> + 2 | plain -> blanklines 2 <> contents <> blankline + | setext -> contents <> attr' <> cr <> text (replicate (offset contents) '-') <> blankline -- ghc interprets '#' characters in column 1 as linenum specifiers. - _ | stPlain st || isEnabled Ext_literate_haskell opts -> + _ | plain || isEnabled Ext_literate_haskell opts -> contents <> blankline _ -> text (replicate level '#') <> space <> contents <> attr' <> blankline blockToMarkdown opts (CodeBlock (_,classes,_) str) @@ -405,17 +400,15 @@ blockToMarkdown opts (CodeBlock attribs str) = return $ attrs = if isEnabled Ext_fenced_code_attributes opts then nowrap $ " " <> attrsToMarkdown attribs else case attribs of - (_,[cls],_) -> " " <> text cls - _ -> empty + (_,(cls:_),_) -> " " <> text cls + _ -> empty blockToMarkdown opts (BlockQuote blocks) = do - st <- get + plain <- gets stPlain -- if we're writing literate haskell, put a space before the bird tracks -- so they won't be interpreted as lhs... let leader = if isEnabled Ext_literate_haskell opts then " > " - else if stPlain st - then " " - else "> " + else if plain then " " else "> " contents <- blockListToMarkdown opts blocks return $ (prefixed leader contents) <> blankline blockToMarkdown opts t@(Table caption aligns widths headers rows) = do @@ -471,7 +464,7 @@ addMarkdownAttribute :: String -> String addMarkdownAttribute s = case span isTagText $ reverse $ parseTags s of (xs,(TagOpen t attrs:rest)) -> - renderTags $ reverse rest ++ (TagOpen t attrs' : reverse xs) + renderTags' $ reverse rest ++ (TagOpen t attrs' : reverse xs) where attrs' = ("markdown","1"):[(x,y) | (x,y) <- attrs, x /= "markdown"] _ -> s @@ -609,8 +602,19 @@ definitionListItemToMarkdown opts (label, defs) = do let sps = case writerTabStop opts - 3 of n | n > 0 -> text $ replicate n ' ' _ -> text " " - let contents = vcat $ map (\d -> hang tabStop (leader <> sps) $ vcat d <> cr) defs' - return $ nowrap labelText <> cr <> contents <> cr + if isEnabled Ext_compact_definition_lists opts + then do + let contents = vcat $ map (\d -> hang tabStop (leader <> sps) + $ vcat d <> cr) defs' + return $ nowrap labelText <> cr <> contents <> cr + else do + let contents = vcat $ map (\d -> hang tabStop (leader <> sps) + $ vcat d <> cr) defs' + let isTight = case defs of + ((Plain _ : _): _) -> True + _ -> False + return $ blankline <> nowrap labelText <> + (if isTight then cr else blankline) <> contents <> blankline else do return $ nowrap labelText <> text " " <> cr <> vsep (map vsep defs') <> blankline @@ -625,15 +629,21 @@ blockListToMarkdown opts blocks = -- code block will be treated as a list continuation paragraph where fixBlocks (b : CodeBlock attr x : rest) | (not (isEnabled Ext_fenced_code_blocks opts) || attr == nullAttr) - && isListBlock b = - b : RawBlock "html" "<!-- -->\n" : CodeBlock attr x : - fixBlocks rest + && isListBlock b = b : commentSep : CodeBlock attr x : + fixBlocks rest + fixBlocks (b1@(BulletList _) : b2@(BulletList _) : bs) = + b1 : commentSep : fixBlocks (b2:bs) + fixBlocks (b1@(OrderedList _ _) : b2@(OrderedList _ _) : bs) = + b1 : commentSep : fixBlocks (b2:bs) + fixBlocks (b1@(DefinitionList _) : b2@(DefinitionList _) : bs) = + b1 : commentSep : fixBlocks (b2:bs) fixBlocks (x : xs) = x : fixBlocks xs fixBlocks [] = [] isListBlock (BulletList _) = True isListBlock (OrderedList _ _) = True isListBlock (DefinitionList _) = True isListBlock _ = False + commentSep = RawBlock "html" "<!-- -->\n" -- | Get reference for target; if none exists, create unique one and return. -- Prefer label if possible; otherwise, generate a unique key. @@ -657,7 +667,11 @@ getReference label (src, tit) = do -- | Convert list of Pandoc inline elements to markdown. inlineListToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc inlineListToMarkdown opts lst = - mapM (inlineToMarkdown opts) lst >>= return . cat + mapM (inlineToMarkdown opts) (avoidBadWraps lst) >>= return . cat + where avoidBadWraps [] = [] + avoidBadWraps (Space:Str (c:cs):xs) + | c `elem` "-*+>" = Str (' ':c:cs) : avoidBadWraps xs + avoidBadWraps (x:xs) = x : avoidBadWraps xs escapeSpaces :: Inline -> Inline escapeSpaces (Str s) = Str $ substitute " " "\\ " s @@ -667,52 +681,67 @@ escapeSpaces x = x -- | Convert Pandoc inline element to markdown. inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc inlineToMarkdown opts (Span attrs ils) = do - st <- get contents <- inlineListToMarkdown opts ils - return $ if stPlain st - then contents - else tagWithAttrs "span" attrs <> contents <> text "</span>" + return $ if isEnabled Ext_raw_html opts + then tagWithAttrs "span" attrs <> contents <> text "</span>" + else contents inlineToMarkdown opts (Emph lst) = do + plain <- gets stPlain contents <- inlineListToMarkdown opts lst - return $ "*" <> contents <> "*" + return $ if plain + then "_" <> contents <> "_" + else "*" <> contents <> "*" inlineToMarkdown opts (Strong lst) = do - contents <- inlineListToMarkdown opts lst - return $ "**" <> contents <> "**" + plain <- gets stPlain + if plain + then inlineListToMarkdown opts $ capitalize lst + else do + contents <- inlineListToMarkdown opts lst + return $ "**" <> contents <> "**" inlineToMarkdown opts (Strikeout lst) = do contents <- inlineListToMarkdown opts lst return $ if isEnabled Ext_strikeout opts then "~~" <> contents <> "~~" else "<s>" <> contents <> "</s>" inlineToMarkdown opts (Superscript lst) = do - let lst' = walk escapeSpaces lst - contents <- inlineListToMarkdown opts lst' + contents <- inlineListToMarkdown opts $ walk escapeSpaces lst return $ if isEnabled Ext_superscript opts then "^" <> contents <> "^" else "<sup>" <> contents <> "</sup>" inlineToMarkdown opts (Subscript lst) = do - let lst' = walk escapeSpaces lst - contents <- inlineListToMarkdown opts lst' + contents <- inlineListToMarkdown opts $ walk escapeSpaces lst return $ if isEnabled Ext_subscript opts then "~" <> contents <> "~" else "<sub>" <> contents <> "</sub>" -inlineToMarkdown opts (SmallCaps lst) = inlineListToMarkdown opts lst +inlineToMarkdown opts (SmallCaps lst) = do + plain <- gets stPlain + if plain + then inlineListToMarkdown opts $ capitalize lst + else do + contents <- inlineListToMarkdown opts lst + return $ tagWithAttrs "span" + ("",[],[("style","font-variant:small-caps;")]) + <> contents <> text "</span>" inlineToMarkdown opts (Quoted SingleQuote lst) = do contents <- inlineListToMarkdown opts lst return $ "‘" <> contents <> "’" inlineToMarkdown opts (Quoted DoubleQuote lst) = do contents <- inlineListToMarkdown opts lst return $ "“" <> contents <> "”" -inlineToMarkdown opts (Code attr str) = +inlineToMarkdown opts (Code attr str) = do let tickGroups = filter (\s -> '`' `elem` s) $ group str - longest = if null tickGroups + let longest = if null tickGroups then 0 else maximum $ map length tickGroups - marker = replicate (longest + 1) '`' - spacer = if (longest == 0) then "" else " " - attrs = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr + let marker = replicate (longest + 1) '`' + let spacer = if (longest == 0) then "" else " " + let attrs = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr then attrsToMarkdown attr else empty - in return $ text (marker ++ spacer ++ str ++ spacer ++ marker) <> attrs + plain <- gets stPlain + if plain + then return $ text str + else return $ text (marker ++ spacer ++ str ++ spacer ++ marker) <> attrs inlineToMarkdown _ (Str str) = do st <- get if stPlain st @@ -725,7 +754,11 @@ inlineToMarkdown opts (Math InlineMath str) return $ "\\(" <> text str <> "\\)" | isEnabled Ext_tex_math_double_backslash opts = return $ "\\\\(" <> text str <> "\\\\)" - | otherwise = inlineListToMarkdown opts $ readTeXMath' InlineMath str + | otherwise = do + plain <- gets stPlain + inlineListToMarkdown opts $ + (if plain then makeMathPlainer else id) $ + texMathToInlines InlineMath str inlineToMarkdown opts (Math DisplayMath str) | isEnabled Ext_tex_math_dollars opts = return $ "$$" <> text str <> "$$" @@ -734,16 +767,23 @@ inlineToMarkdown opts (Math DisplayMath str) | isEnabled Ext_tex_math_double_backslash opts = return $ "\\\\[" <> text str <> "\\\\]" | otherwise = (\x -> cr <> x <> cr) `fmap` - inlineListToMarkdown opts (readTeXMath' DisplayMath str) -inlineToMarkdown opts (RawInline f str) - | f == "html" || f == "markdown" || - (isEnabled Ext_raw_tex opts && (f == "latex" || f == "tex")) = - return $ text str -inlineToMarkdown _ (RawInline _ _) = return empty -inlineToMarkdown opts (LineBreak) - | isEnabled Ext_hard_line_breaks opts = return cr - | isEnabled Ext_escaped_line_breaks opts = return $ "\\" <> cr - | otherwise = return $ " " <> cr + inlineListToMarkdown opts (texMathToInlines DisplayMath str) +inlineToMarkdown opts (RawInline f str) = do + plain <- gets stPlain + if not plain && + ( f == "markdown" || + (isEnabled Ext_raw_tex opts && (f == "latex" || f == "tex")) || + (isEnabled Ext_raw_html opts && f == "html") ) + then return $ text str + else return empty +inlineToMarkdown opts (LineBreak) = do + plain <- gets stPlain + if plain || isEnabled Ext_hard_line_breaks opts + then return cr + else return $ + if isEnabled Ext_escaped_line_breaks opts + then "\\" <> cr + else " " <> cr inlineToMarkdown _ Space = return space inlineToMarkdown opts (Cite [] lst) = inlineListToMarkdown opts lst inlineToMarkdown opts (Cite (c:cs) lst) @@ -776,11 +816,12 @@ inlineToMarkdown opts (Cite (c:cs) lst) modekey SuppressAuthor = "-" modekey _ = "" inlineToMarkdown opts (Link txt (src, tit)) = do + plain <- gets stPlain linktext <- inlineListToMarkdown opts txt let linktitle = if null tit then empty else text $ " \"" ++ tit ++ "\"" - let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src + let srcSuffix = fromMaybe src (stripPrefix "mailto:" src) let useAuto = isURI src && case txt of [Str s] | escapeURI s == srcSuffix -> True @@ -789,22 +830,29 @@ inlineToMarkdown opts (Link txt (src, tit)) = do ref <- if useRefLinks then getReference txt (src, tit) else return [] reftext <- inlineListToMarkdown opts ref return $ if useAuto - then "<" <> text srcSuffix <> ">" + then if plain + then text srcSuffix + else "<" <> text srcSuffix <> ">" else if useRefLinks then let first = "[" <> linktext <> "]" second = if txt == ref then "[]" else "[" <> reftext <> "]" in first <> second - else "[" <> linktext <> "](" <> - text src <> linktitle <> ")" + else if plain + then linktext + else "[" <> linktext <> "](" <> + text src <> linktitle <> ")" inlineToMarkdown opts (Image alternate (source, tit)) = do + plain <- gets stPlain let txt = if null alternate || alternate == [Str source] -- to prevent autolinks then [Str ""] else alternate linkPart <- inlineToMarkdown opts (Link txt (source, tit)) - return $ "!" <> linkPart + return $ if plain + then "[" <> linkPart <> "]" + else "!" <> linkPart inlineToMarkdown opts (Note contents) = do modify (\st -> st{ stNotes = contents : stNotes st }) st <- get @@ -812,3 +860,9 @@ inlineToMarkdown opts (Note contents) = do if isEnabled Ext_footnotes opts then return $ "[^" <> ref <> "]" else return $ "[" <> ref <> "]" + +makeMathPlainer :: [Inline] -> [Inline] +makeMathPlainer = walk go + where + go (Emph xs) = Span nullAttr xs + go x = x diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index 3b987ba2b..3f392a5d0 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -37,92 +37,99 @@ import Text.Pandoc.Writers.Shared import Text.Pandoc.Pretty (render) import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.XML ( escapeStringForXML ) -import Data.List ( intersect, intercalate, intersperse ) +import Data.List ( intersect, intercalate ) import Network.URI ( isURI ) +import Control.Monad.Reader import Control.Monad.State data WriterState = WriterState { stNotes :: Bool -- True if there are notes - , stListLevel :: [Char] -- String at beginning of list items, e.g. "**" - , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list } +data WriterReader = WriterReader { + options :: WriterOptions -- Writer options + , listLevel :: String -- String at beginning of list items, e.g. "**" + , useTags :: Bool -- True if we should use HTML tags because we're in a complex list + } + +type MediaWikiWriter = ReaderT WriterReader (State WriterState) + -- | Convert Pandoc to MediaWiki. writeMediaWiki :: WriterOptions -> Pandoc -> String writeMediaWiki opts document = - evalState (pandocToMediaWiki opts document) - WriterState { stNotes = False, stListLevel = [], stUseTags = False } + let initialState = WriterState { stNotes = False } + env = WriterReader { options = opts, listLevel = [], useTags = False } + in evalState (runReaderT (pandocToMediaWiki document) env) initialState -- | Return MediaWiki representation of document. -pandocToMediaWiki :: WriterOptions -> Pandoc -> State WriterState String -pandocToMediaWiki opts (Pandoc meta blocks) = do +pandocToMediaWiki :: Pandoc -> MediaWikiWriter String +pandocToMediaWiki (Pandoc meta blocks) = do + opts <- asks options metadata <- metaToJSON opts - (fmap trimr . blockListToMediaWiki opts) - (inlineListToMediaWiki opts) + (fmap trimr . blockListToMediaWiki) + inlineListToMediaWiki meta - body <- blockListToMediaWiki opts blocks - notesExist <- get >>= return . stNotes + body <- blockListToMediaWiki blocks + notesExist <- gets stNotes let notes = if notesExist then "\n<references />" else "" let main = body ++ notes let context = defField "body" main - $ defField "toc" (writerTableOfContents opts) - $ metadata - if writerStandalone opts - then return $ renderTemplate' (writerTemplate opts) context - else return main + $ defField "toc" (writerTableOfContents opts) metadata + return $ if writerStandalone opts + then renderTemplate' (writerTemplate opts) context + else main -- | Escape special characters for MediaWiki. escapeString :: String -> String escapeString = escapeStringForXML -- | Convert Pandoc block element to MediaWiki. -blockToMediaWiki :: WriterOptions -- ^ Options - -> Block -- ^ Block element - -> State WriterState String +blockToMediaWiki :: Block -- ^ Block element + -> MediaWikiWriter String -blockToMediaWiki _ Null = return "" +blockToMediaWiki Null = return "" -blockToMediaWiki opts (Div attrs bs) = do - contents <- blockListToMediaWiki opts bs +blockToMediaWiki (Div attrs bs) = do + contents <- blockListToMediaWiki bs return $ render Nothing (tagWithAttrs "div" attrs) ++ "\n\n" ++ contents ++ "\n\n" ++ "</div>" -blockToMediaWiki opts (Plain inlines) = - inlineListToMediaWiki opts inlines +blockToMediaWiki (Plain inlines) = + inlineListToMediaWiki inlines -- title beginning with fig: indicates that the image is a figure -blockToMediaWiki opts (Para [Image txt (src,'f':'i':'g':':':tit)]) = do +blockToMediaWiki (Para [Image txt (src,'f':'i':'g':':':tit)]) = do capt <- if null txt then return "" - else ("|caption " ++) `fmap` inlineListToMediaWiki opts txt + else ("|caption " ++) `fmap` inlineListToMediaWiki txt let opt = if null txt then "" else "|alt=" ++ if null tit then capt else tit ++ capt return $ "[[Image:" ++ src ++ "|frame|none" ++ opt ++ "]]\n" -blockToMediaWiki opts (Para inlines) = do - useTags <- get >>= return . stUseTags - listLevel <- get >>= return . stListLevel - contents <- inlineListToMediaWiki opts inlines - return $ if useTags +blockToMediaWiki (Para inlines) = do + tags <- asks useTags + lev <- asks listLevel + contents <- inlineListToMediaWiki inlines + return $ if tags then "<p>" ++ contents ++ "</p>" - else contents ++ if null listLevel then "\n" else "" + else contents ++ if null lev then "\n" else "" -blockToMediaWiki _ (RawBlock f str) +blockToMediaWiki (RawBlock f str) | f == Format "mediawiki" = return str | f == Format "html" = return str | otherwise = return "" -blockToMediaWiki _ HorizontalRule = return "\n-----\n" +blockToMediaWiki HorizontalRule = return "\n-----\n" -blockToMediaWiki opts (Header level _ inlines) = do - contents <- inlineListToMediaWiki opts inlines +blockToMediaWiki (Header level _ inlines) = do + contents <- inlineListToMediaWiki inlines let eqs = replicate level '=' return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n" -blockToMediaWiki _ (CodeBlock (_,classes,_) str) = do +blockToMediaWiki (CodeBlock (_,classes,_) str) = do let at = classes `intersect` ["actionscript", "ada", "apache", "applescript", "asm", "asp", "autoit", "bash", "blitzbasic", "bnf", "c", "c_mac", "caddcl", "cadlisp", "cfdg", "cfm", "cpp", "cpp-qt", "csharp", "css", "d", "delphi", "diff", "div", "dos", "eiffel", "fortran", @@ -132,75 +139,64 @@ blockToMediaWiki _ (CodeBlock (_,classes,_) str) = do "python", "qbasic", "rails", "reg", "robots", "ruby", "sas", "scheme", "sdlbasic", "smalltalk", "smarty", "sql", "tcl", "", "thinbasic", "tsql", "vb", "vbnet", "vhdl", "visualfoxpro", "winbatch", "xml", "xpp", "z80"] - let (beg, end) = if null at - then ("<pre" ++ if null classes then ">" else " class=\"" ++ unwords classes ++ "\">", "</pre>") - else ("<source lang=\"" ++ head at ++ "\">", "</source>") - return $ beg ++ escapeString str ++ end - -blockToMediaWiki opts (BlockQuote blocks) = do - contents <- blockListToMediaWiki opts blocks + return $ + if null at + then "<pre" ++ (if null classes + then ">" + else " class=\"" ++ unwords classes ++ "\">") ++ + escapeString str ++ "</pre>" + else "<source lang=\"" ++ head at ++ "\">" ++ str ++ "</source>" + -- note: no escape! + +blockToMediaWiki (BlockQuote blocks) = do + contents <- blockListToMediaWiki blocks return $ "<blockquote>" ++ contents ++ "</blockquote>" -blockToMediaWiki opts (Table capt aligns widths headers rows') = do +blockToMediaWiki (Table capt aligns widths headers rows') = do caption <- if null capt then return "" else do - c <- inlineListToMediaWiki opts capt + c <- inlineListToMediaWiki capt return $ "|+ " ++ trimr c ++ "\n" let headless = all null headers let allrows = if headless then rows' else headers:rows' - tableBody <- (concat . intersperse "|-\n") `fmap` - mapM (tableRowToMediaWiki opts headless aligns widths) + tableBody <- intercalate "|-\n" `fmap` + mapM (tableRowToMediaWiki headless aligns widths) (zip [1..] allrows) return $ "{|\n" ++ caption ++ tableBody ++ "|}\n" -blockToMediaWiki opts x@(BulletList items) = do - oldUseTags <- get >>= return . stUseTags - listLevel <- get >>= return . stListLevel - let useTags = oldUseTags || not (isSimpleList x) - if useTags +blockToMediaWiki x@(BulletList items) = do + tags <- fmap (|| not (isSimpleList x)) $ asks useTags + if tags then do - modify $ \s -> s { stUseTags = True } - contents <- mapM (listItemToMediaWiki opts) items - modify $ \s -> s { stUseTags = oldUseTags } + contents <- local (\ s -> s { useTags = True }) $ mapM listItemToMediaWiki items return $ "<ul>\n" ++ vcat contents ++ "</ul>\n" else do - modify $ \s -> s { stListLevel = stListLevel s ++ "*" } - contents <- mapM (listItemToMediaWiki opts) items - modify $ \s -> s { stListLevel = init (stListLevel s) } - return $ vcat contents ++ if null listLevel then "\n" else "" - -blockToMediaWiki opts x@(OrderedList attribs items) = do - oldUseTags <- get >>= return . stUseTags - listLevel <- get >>= return . stListLevel - let useTags = oldUseTags || not (isSimpleList x) - if useTags + lev <- asks listLevel + contents <- local (\s -> s { listLevel = listLevel s ++ "*" }) $ mapM listItemToMediaWiki items + return $ vcat contents ++ if null lev then "\n" else "" + +blockToMediaWiki x@(OrderedList attribs items) = do + tags <- fmap (|| not (isSimpleList x)) $ asks useTags + if tags then do - modify $ \s -> s { stUseTags = True } - contents <- mapM (listItemToMediaWiki opts) items - modify $ \s -> s { stUseTags = oldUseTags } + contents <- local (\s -> s { useTags = True }) $ mapM listItemToMediaWiki items return $ "<ol" ++ listAttribsToString attribs ++ ">\n" ++ vcat contents ++ "</ol>\n" else do - modify $ \s -> s { stListLevel = stListLevel s ++ "#" } - contents <- mapM (listItemToMediaWiki opts) items - modify $ \s -> s { stListLevel = init (stListLevel s) } - return $ vcat contents ++ if null listLevel then "\n" else "" - -blockToMediaWiki opts x@(DefinitionList items) = do - oldUseTags <- get >>= return . stUseTags - listLevel <- get >>= return . stListLevel - let useTags = oldUseTags || not (isSimpleList x) - if useTags + lev <- asks listLevel + contents <- local (\s -> s { listLevel = listLevel s ++ "#" }) $ mapM listItemToMediaWiki items + return $ vcat contents ++ if null lev then "\n" else "" + +blockToMediaWiki x@(DefinitionList items) = do + tags <- fmap (|| not (isSimpleList x)) $ asks useTags + if tags then do - modify $ \s -> s { stUseTags = True } - contents <- mapM (definitionListItemToMediaWiki opts) items - modify $ \s -> s { stUseTags = oldUseTags } + contents <- local (\s -> s { useTags = True }) $ mapM definitionListItemToMediaWiki items return $ "<dl>\n" ++ vcat contents ++ "</dl>\n" else do - modify $ \s -> s { stListLevel = stListLevel s ++ ";" } - contents <- mapM (definitionListItemToMediaWiki opts) items - modify $ \s -> s { stListLevel = init (stListLevel s) } - return $ vcat contents ++ if null listLevel then "\n" else "" + lev <- asks listLevel + contents <- local (\s -> s { listLevel = listLevel s ++ ";" }) $ mapM definitionListItemToMediaWiki items + return $ vcat contents ++ if null lev then "\n" else "" -- Auxiliary functions for lists: @@ -216,31 +212,30 @@ listAttribsToString (startnum, numstyle, _) = else "") -- | Convert bullet or ordered list item (list of blocks) to MediaWiki. -listItemToMediaWiki :: WriterOptions -> [Block] -> State WriterState String -listItemToMediaWiki opts items = do - contents <- blockListToMediaWiki opts items - useTags <- get >>= return . stUseTags - if useTags +listItemToMediaWiki :: [Block] -> MediaWikiWriter String +listItemToMediaWiki items = do + contents <- blockListToMediaWiki items + tags <- asks useTags + if tags then return $ "<li>" ++ contents ++ "</li>" else do - marker <- get >>= return . stListLevel + marker <- asks listLevel return $ marker ++ " " ++ contents -- | Convert definition list item (label, list of blocks) to MediaWiki. -definitionListItemToMediaWiki :: WriterOptions - -> ([Inline],[[Block]]) - -> State WriterState String -definitionListItemToMediaWiki opts (label, items) = do - labelText <- inlineListToMediaWiki opts label - contents <- mapM (blockListToMediaWiki opts) items - useTags <- get >>= return . stUseTags - if useTags +definitionListItemToMediaWiki :: ([Inline],[[Block]]) + -> MediaWikiWriter String +definitionListItemToMediaWiki (label, items) = do + labelText <- inlineListToMediaWiki label + contents <- mapM blockListToMediaWiki items + tags <- asks useTags + if tags then return $ "<dt>" ++ labelText ++ "</dt>\n" ++ - (intercalate "\n" $ map (\d -> "<dd>" ++ d ++ "</dd>") contents) + intercalate "\n" (map (\d -> "<dd>" ++ d ++ "</dd>") contents) else do - marker <- get >>= return . stListLevel + marker <- asks listLevel return $ marker ++ " " ++ labelText ++ "\n" ++ - (intercalate "\n" $ map (\d -> init marker ++ ": " ++ d) contents) + intercalate "\n" (map (\d -> init marker ++ ": " ++ d) contents) -- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed. isSimpleList :: Block -> Bool @@ -283,25 +278,22 @@ vcat = intercalate "\n" -- Auxiliary functions for tables: -tableRowToMediaWiki :: WriterOptions - -> Bool +tableRowToMediaWiki :: Bool -> [Alignment] -> [Double] -> (Int, [[Block]]) - -> State WriterState String -tableRowToMediaWiki opts headless alignments widths (rownum, cells) = do - cells' <- mapM (\cellData -> - tableCellToMediaWiki opts headless rownum cellData) + -> MediaWikiWriter String +tableRowToMediaWiki headless alignments widths (rownum, cells) = do + cells' <- mapM (tableCellToMediaWiki headless rownum) $ zip3 alignments widths cells return $ unlines cells' -tableCellToMediaWiki :: WriterOptions - -> Bool +tableCellToMediaWiki :: Bool -> Int -> (Alignment, Double, [Block]) - -> State WriterState String -tableCellToMediaWiki opts headless rownum (alignment, width, bs) = do - contents <- blockListToMediaWiki opts bs + -> MediaWikiWriter String +tableCellToMediaWiki headless rownum (alignment, width, bs) = do + contents <- blockListToMediaWiki bs let marker = if rownum == 1 && not headless then "!" else "|" let percent w = show (truncate (100*w) :: Integer) ++ "%" let attrs = ["align=" ++ show (alignmentToString alignment) | @@ -313,7 +305,7 @@ tableCellToMediaWiki opts headless rownum (alignment, width, bs) = do else unwords attrs ++ "|" return $ marker ++ attr ++ trimr contents -alignmentToString :: Alignment -> [Char] +alignmentToString :: Alignment -> String alignmentToString alignment = case alignment of AlignLeft -> "left" AlignRight -> "right" @@ -321,94 +313,94 @@ alignmentToString alignment = case alignment of AlignDefault -> "left" -- | Convert list of Pandoc block elements to MediaWiki. -blockListToMediaWiki :: WriterOptions -- ^ Options - -> [Block] -- ^ List of block elements - -> State WriterState String -blockListToMediaWiki opts blocks = - mapM (blockToMediaWiki opts) blocks >>= return . vcat +blockListToMediaWiki :: [Block] -- ^ List of block elements + -> MediaWikiWriter String +blockListToMediaWiki blocks = + fmap vcat $ mapM blockToMediaWiki blocks -- | Convert list of Pandoc inline elements to MediaWiki. -inlineListToMediaWiki :: WriterOptions -> [Inline] -> State WriterState String -inlineListToMediaWiki opts lst = - mapM (inlineToMediaWiki opts) lst >>= return . concat +inlineListToMediaWiki :: [Inline] -> MediaWikiWriter String +inlineListToMediaWiki lst = + fmap concat $ mapM inlineToMediaWiki lst -- | Convert Pandoc inline element to MediaWiki. -inlineToMediaWiki :: WriterOptions -> Inline -> State WriterState String +inlineToMediaWiki :: Inline -> MediaWikiWriter String -inlineToMediaWiki opts (Span attrs ils) = do - contents <- inlineListToMediaWiki opts ils +inlineToMediaWiki (Span attrs ils) = do + contents <- inlineListToMediaWiki ils return $ render Nothing (tagWithAttrs "span" attrs) ++ contents ++ "</span>" -inlineToMediaWiki opts (Emph lst) = do - contents <- inlineListToMediaWiki opts lst +inlineToMediaWiki (Emph lst) = do + contents <- inlineListToMediaWiki lst return $ "''" ++ contents ++ "''" -inlineToMediaWiki opts (Strong lst) = do - contents <- inlineListToMediaWiki opts lst +inlineToMediaWiki (Strong lst) = do + contents <- inlineListToMediaWiki lst return $ "'''" ++ contents ++ "'''" -inlineToMediaWiki opts (Strikeout lst) = do - contents <- inlineListToMediaWiki opts lst +inlineToMediaWiki (Strikeout lst) = do + contents <- inlineListToMediaWiki lst return $ "<s>" ++ contents ++ "</s>" -inlineToMediaWiki opts (Superscript lst) = do - contents <- inlineListToMediaWiki opts lst +inlineToMediaWiki (Superscript lst) = do + contents <- inlineListToMediaWiki lst return $ "<sup>" ++ contents ++ "</sup>" -inlineToMediaWiki opts (Subscript lst) = do - contents <- inlineListToMediaWiki opts lst +inlineToMediaWiki (Subscript lst) = do + contents <- inlineListToMediaWiki lst return $ "<sub>" ++ contents ++ "</sub>" -inlineToMediaWiki opts (SmallCaps lst) = inlineListToMediaWiki opts lst +inlineToMediaWiki (SmallCaps lst) = inlineListToMediaWiki lst -inlineToMediaWiki opts (Quoted SingleQuote lst) = do - contents <- inlineListToMediaWiki opts lst +inlineToMediaWiki (Quoted SingleQuote lst) = do + contents <- inlineListToMediaWiki lst return $ "\8216" ++ contents ++ "\8217" -inlineToMediaWiki opts (Quoted DoubleQuote lst) = do - contents <- inlineListToMediaWiki opts lst +inlineToMediaWiki (Quoted DoubleQuote lst) = do + contents <- inlineListToMediaWiki lst return $ "\8220" ++ contents ++ "\8221" -inlineToMediaWiki opts (Cite _ lst) = inlineListToMediaWiki opts lst +inlineToMediaWiki (Cite _ lst) = inlineListToMediaWiki lst -inlineToMediaWiki _ (Code _ str) = - return $ "<code>" ++ (escapeString str) ++ "</code>" +inlineToMediaWiki (Code _ str) = + return $ "<code>" ++ escapeString str ++ "</code>" -inlineToMediaWiki _ (Str str) = return $ escapeString str +inlineToMediaWiki (Str str) = return $ escapeString str -inlineToMediaWiki _ (Math _ str) = return $ "<math>" ++ str ++ "</math>" - -- note: str should NOT be escaped +inlineToMediaWiki (Math _ str) = return $ "<math>" ++ str ++ "</math>" + -- note: str should NOT be escaped -inlineToMediaWiki _ (RawInline f str) +inlineToMediaWiki (RawInline f str) | f == Format "mediawiki" = return str | f == Format "html" = return str | otherwise = return "" -inlineToMediaWiki _ (LineBreak) = return "<br />" +inlineToMediaWiki (LineBreak) = return "<br />" -inlineToMediaWiki _ Space = return " " +inlineToMediaWiki Space = return " " -inlineToMediaWiki opts (Link txt (src, _)) = do - label <- inlineListToMediaWiki opts txt +inlineToMediaWiki (Link txt (src, _)) = do + label <- inlineListToMediaWiki txt case txt of [Str s] | escapeURI s == src -> return src - _ -> if isURI src - then return $ "[" ++ src ++ " " ++ label ++ "]" - else return $ "[[" ++ src' ++ "|" ++ label ++ "]]" + _ -> return $ if isURI src + then "[" ++ src ++ " " ++ label ++ "]" + else "[[" ++ src' ++ "|" ++ label ++ "]]" where src' = case src of '/':xs -> xs -- with leading / it's a _ -> src -- link to a help page -inlineToMediaWiki opts (Image alt (source, tit)) = do - alt' <- inlineListToMediaWiki opts alt - let txt = if (null tit) + +inlineToMediaWiki (Image alt (source, tit)) = do + alt' <- inlineListToMediaWiki alt + let txt = if null tit then if null alt then "" - else "|" ++ alt' - else "|" ++ tit + else '|' : alt' + else '|' : tit return $ "[[Image:" ++ source ++ txt ++ "]]" -inlineToMediaWiki opts (Note contents) = do - contents' <- blockListToMediaWiki opts contents +inlineToMediaWiki (Note contents) = do + contents' <- blockListToMediaWiki contents modify (\s -> s { stNotes = True }) return $ "<ref>" ++ contents' ++ "</ref>" -- note - may not work for notes with multiple blocks diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 15f7c8be8..feaa0167c 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -37,8 +37,9 @@ import Text.TeXMath import qualified Data.ByteString.Lazy as B import Text.Pandoc.UTF8 ( fromStringLazy ) import Codec.Archive.Zip +import Control.Applicative ((<$>)) import Text.Pandoc.Options ( WriterOptions(..) ) -import Text.Pandoc.Shared ( stringify, readDataFile, fetchItem, warn ) +import Text.Pandoc.Shared ( stringify, readDataFile, fetchItem', warn ) import Text.Pandoc.ImageSize ( imageSize, sizeInPoints ) import Text.Pandoc.MIME ( getMimeType ) import Text.Pandoc.Definition @@ -131,7 +132,7 @@ writeODT opts doc@(Pandoc meta _) = do transformPicMath :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline transformPicMath opts entriesRef (Image lab (src,_)) = do - res <- fetchItem (writerSourceURL opts) src + res <- fetchItem' (writerMediaBag opts) (writerSourceURL opts) src case res of Left (_ :: E.SomeException) -> do warn $ "Could not find image `" ++ src ++ "', skipping..." @@ -150,7 +151,7 @@ transformPicMath opts entriesRef (Image lab (src,_)) = do transformPicMath _ entriesRef (Math t math) = do entries <- readIORef entriesRef let dt = if t == InlineMath then DisplayInline else DisplayBlock - case texMathToMathML dt math of + case writeMathML dt <$> readTeX math of Left _ -> return $ Math t math Right r -> do let conf = useShortEmptyTags (const False) defaultConfigPP diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index b6da2694c..773d142f4 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -380,7 +380,7 @@ inlineToOpenDocument o ils | SmallCaps l <- ils = withTextStyle SmallC $ inlinesToOpenDocument o l | Quoted t l <- ils = inQuotes t <$> inlinesToOpenDocument o l | Code _ s <- ils = withTextStyle Pre $ inTextStyle $ preformatted s - | Math t s <- ils = inlinesToOpenDocument o (readTeXMath' t s) + | Math t s <- ils = inlinesToOpenDocument o (texMathToInlines t s) | Cite _ l <- ils = inlinesToOpenDocument o l | RawInline f s <- ils = if f == Format "opendocument" then return $ text s @@ -504,7 +504,7 @@ paraStyle parent attrs = do tight = if t then [ ("fo:margin-top" , "0in" ) , ("fo:margin-bottom" , "0in" )] else [] - indent = if (i /= 0 || b) + indent = if (i /= 0 || b) then [ ("fo:margin-left" , indentVal) , ("fo:margin-right" , "0in" ) , ("fo:text-indent" , "0in" ) @@ -534,7 +534,7 @@ paraTableStyles t s (a:xs) [ ("fo:text-align", x) , ("style:justify-single-word", "false")] -data TextStyle = Italic | Bold | Strike | Sub | Sup | SmallC | Pre +data TextStyle = Italic | Bold | Strike | Sub | Sup | SmallC | Pre deriving ( Eq,Ord ) textStyleAttr :: TextStyle -> [(String,String)] diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 87046537c..414883b29 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -238,6 +238,8 @@ inlineListToOrg lst = mapM inlineToOrg lst >>= return . hcat -- | Convert Pandoc inline element to Org. inlineToOrg :: Inline -> State WriterState Doc +inlineToOrg (Span (uid, [], []) []) = + return $ "<<" <> text uid <> ">>" inlineToOrg (Span _ lst) = inlineListToOrg lst inlineToOrg (Emph lst) = do diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 31c97349b..57ebfc360 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -37,7 +37,8 @@ import Text.Pandoc.Shared import Text.Pandoc.Writers.Shared import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Builder (deleteMeta) -import Data.List ( isPrefixOf, intersperse, transpose ) +import Data.Maybe (fromMaybe) +import Data.List ( isPrefixOf, stripPrefix, intersperse, transpose ) import Network.URI (isURI) import Text.Pandoc.Pretty import Control.Monad.State @@ -174,7 +175,7 @@ blockToRST (Para [Image txt (src,'f':'i':'g':':':tit)]) = do let alt = ":alt: " <> if null tit then capt else text tit return $ hang 3 ".. " $ fig $$ alt $+$ capt $$ blankline blockToRST (Para inlines) - | LineBreak `elem` inlines = do -- use line block if LineBreaks + | LineBreak `elem` inlines = do -- use line block if LineBreaks lns <- mapM inlineListToRST $ splitBy (==LineBreak) inlines return $ (vcat $ map (text "| " <>) lns) <> blankline | otherwise = do @@ -401,7 +402,7 @@ inlineToRST (Link [Str str] (src, _)) if "mailto:" `isPrefixOf` src then src == escapeURI ("mailto:" ++ str) else src == escapeURI str = do - let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src + let srcSuffix = fromMaybe src (stripPrefix "mailto:" src) return $ text srcSuffix inlineToRST (Link [Image alt (imgsrc,imgtit)] (src, _tit)) = do label <- registerImage alt (imgsrc,imgtit) (Just src) diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index e0428aaa8..43405ce3c 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -36,50 +36,64 @@ import Text.Pandoc.Readers.TeXMath import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Walk import Data.List ( isSuffixOf, intercalate ) -import Data.Char ( ord, chr, isDigit, toLower ) -import System.FilePath ( takeExtension ) +import Data.Char ( ord, chr, isDigit ) import qualified Data.ByteString as B +import qualified Data.Map as M import Text.Printf ( printf ) -import Network.URI ( isURI, unEscapeString ) -import qualified Control.Exception as E +import Text.Pandoc.ImageSize --- | Convert Image inlines into a raw RTF embedded image, read from a file. +-- | Convert Image inlines into a raw RTF embedded image, read from a file, +-- or a MediaBag, or the internet. -- If file not found or filetype not jpeg or png, leave the inline unchanged. -rtfEmbedImage :: Inline -> IO Inline -rtfEmbedImage x@(Image _ (src,_)) = do - let ext = map toLower (takeExtension src) - if ext `elem` [".jpg",".jpeg",".png"] && not (isURI src) - then do - let src' = unEscapeString src - imgdata <- E.catch (B.readFile src') - (\e -> let _ = (e :: E.SomeException) in return B.empty) - let bytes = map (printf "%02x") $ B.unpack imgdata - let filetype = case ext of - ".jpg" -> "\\jpegblip" - ".jpeg" -> "\\jpegblip" - ".png" -> "\\pngblip" - _ -> error "Unknown file type" - let raw = "{\\pict" ++ filetype ++ " " ++ concat bytes ++ "}" - return $ if B.null imgdata - then x - else RawInline (Format "rtf") raw - else return x -rtfEmbedImage x = return x +rtfEmbedImage :: WriterOptions -> Inline -> IO Inline +rtfEmbedImage opts x@(Image _ (src,_)) = do + result <- fetchItem' (writerMediaBag opts) (writerSourceURL opts) src + case result of + Right (imgdata, Just mime) + | mime == "image/jpeg" || mime == "image/png" -> do + let bytes = map (printf "%02x") $ B.unpack imgdata + let filetype = case mime of + "image/jpeg" -> "\\jpegblip" + "image/png" -> "\\pngblip" + _ -> error "Unknown file type" + let sizeSpec = case imageSize imgdata of + Nothing -> "" + Just sz -> "\\picw" ++ show xpx ++ + "\\pich" ++ show ypx ++ + "\\picwgoal" ++ show (xpt * 20) + ++ "\\pichgoal" ++ show (ypt * 20) + -- twip = 1/1440in = 1/20pt + where (xpx, ypx) = sizeInPixels sz + (xpt, ypt) = sizeInPoints sz + let raw = "{\\pict" ++ filetype ++ sizeSpec ++ " " ++ + concat bytes ++ "}" + return $ if B.null imgdata + then x + else RawInline (Format "rtf") raw + _ -> return x +rtfEmbedImage _ x = return x -- | Convert Pandoc to a string in rich text format, with -- images embedded as encoded binary data. writeRTFWithEmbeddedImages :: WriterOptions -> Pandoc -> IO String writeRTFWithEmbeddedImages options doc = - writeRTF options `fmap` walkM rtfEmbedImage doc + writeRTF options `fmap` walkM (rtfEmbedImage options) doc -- | Convert Pandoc to a string in rich text format. writeRTF :: WriterOptions -> Pandoc -> String -writeRTF options (Pandoc meta blocks) = +writeRTF options (Pandoc meta@(Meta metamap) blocks) = let spacer = not $ all null $ docTitle meta : docDate meta : docAuthors meta + toPlain (MetaBlocks [Para ils]) = MetaInlines ils + toPlain x = x + -- adjust title, author, date so we don't get para inside para + meta' = Meta $ M.adjust toPlain "title" + . M.adjust toPlain "author" + . M.adjust toPlain "date" + $ metamap Just metadata = metaToJSON options (Just . concatMap (blockToRTF 0 AlignDefault)) (Just . inlineListToRTF) - meta + meta' body = concatMap (blockToRTF 0 AlignDefault) blocks isTOCHeader (Header lev _ _) = lev <= writerTOCDepth options isTOCHeader _ = False @@ -324,7 +338,7 @@ inlineToRTF (Quoted DoubleQuote lst) = "\\u8220\"" ++ (inlineListToRTF lst) ++ "\\u8221\"" inlineToRTF (Code _ str) = "{\\f1 " ++ (codeStringToRTF str) ++ "}" inlineToRTF (Str str) = stringToRTF str -inlineToRTF (Math t str) = inlineListToRTF $ readTeXMath' t str +inlineToRTF (Math t str) = inlineListToRTF $ texMathToInlines t str inlineToRTF (Cite _ lst) = inlineListToRTF lst inlineToRTF (RawInline f str) | f == Format "rtf" = str |