aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/Readers/Org.hs2
-rw-r--r--src/Text/Pandoc/Writers/CommonMark.hs178
-rw-r--r--src/Text/Pandoc/Writers/Docx.hs80
3 files changed, 253 insertions, 7 deletions
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 1dfbdd700..fc63cc11e 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -1093,7 +1093,7 @@ explicitOrImageLink = try $ do
char ']'
alt <- internalLink src title'
return $
- (if isImageFilename src && isImageFilename title
+ (if isImageFilename title
then B.link src "" $ B.image title mempty mempty
else fromMaybe alt (linkToInlines src title'))
diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs
new file mode 100644
index 000000000..706b27175
--- /dev/null
+++ b/src/Text/Pandoc/Writers/CommonMark.hs
@@ -0,0 +1,178 @@
+{-
+Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.CommonMark
+ Copyright : Copyright (C) 2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to CommonMark.
+
+CommonMark: <http://commonmark.org>
+-}
+module Text.Pandoc.Writers.CommonMark (writeCommonMark) where
+
+import Text.Pandoc.Writers.HTML (writeHtmlString)
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared (isTightList)
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Options
+import CMark
+import qualified Data.Text as T
+import Control.Monad.Identity (runIdentity, Identity)
+import Control.Monad.State (runState, State, modify, get)
+import Text.Pandoc.Walk (walkM)
+
+-- | Convert Pandoc to CommonMark.
+writeCommonMark :: WriterOptions -> Pandoc -> String
+writeCommonMark opts (Pandoc meta blocks) = rendered
+ where main = runIdentity $ blocksToCommonMark opts (blocks' ++ notes')
+ (blocks', notes) = runState (walkM processNotes blocks) []
+ notes' = if null notes
+ then []
+ else [OrderedList (1, Decimal, Period) $ reverse notes]
+ metadata = runIdentity $ metaToJSON opts
+ (blocksToCommonMark opts)
+ (inlinesToCommonMark opts)
+ meta
+ context = defField "body" main $ metadata
+ rendered = if writerStandalone opts
+ then renderTemplate' (writerTemplate opts) context
+ else main
+
+processNotes :: Inline -> State [[Block]] Inline
+processNotes (Note bs) = do
+ modify (bs :)
+ notes <- get
+ return $ Str $ "[" ++ show (length notes) ++ "]"
+processNotes x = return x
+
+node :: NodeType -> [Node] -> Node
+node = Node Nothing
+
+blocksToCommonMark :: WriterOptions -> [Block] -> Identity String
+blocksToCommonMark opts bs = return $
+ T.unpack $ nodeToCommonmark cmarkOpts colwidth
+ $ node DOCUMENT (blocksToNodes bs)
+ where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts]
+ colwidth = if writerWrapText opts
+ then writerColumns opts
+ else 0
+
+inlinesToCommonMark :: WriterOptions -> [Inline] -> Identity String
+inlinesToCommonMark opts ils = return $
+ T.unpack $ nodeToCommonmark cmarkOpts colwidth
+ $ node PARAGRAPH (inlinesToNodes ils)
+ where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts]
+ colwidth = if writerWrapText opts
+ then writerColumns opts
+ else 0
+
+blocksToNodes :: [Block] -> [Node]
+blocksToNodes = foldr blockToNodes []
+
+blockToNodes :: Block -> [Node] -> [Node]
+blockToNodes (Plain xs) = (node PARAGRAPH (inlinesToNodes xs) :)
+blockToNodes (Para xs) = (node PARAGRAPH (inlinesToNodes xs) :)
+blockToNodes (CodeBlock (_,classes,_) xs) =
+ (node (CODE_BLOCK (T.pack (unwords classes)) (T.pack xs)) [] :)
+blockToNodes (RawBlock fmt xs)
+ | fmt == Format "html" = (node (HTML (T.pack xs)) [] :)
+ | otherwise = id
+blockToNodes (BlockQuote bs) =
+ (node BLOCK_QUOTE (blocksToNodes bs) :)
+blockToNodes (BulletList items) =
+ (node (LIST ListAttributes{
+ listType = BULLET_LIST,
+ listDelim = PERIOD_DELIM,
+ listTight = isTightList items,
+ listStart = 1 }) (map (node ITEM . blocksToNodes) items) :)
+blockToNodes (OrderedList (start, _sty, delim) items) =
+ (node (LIST ListAttributes{
+ listType = ORDERED_LIST,
+ listDelim = case delim of
+ OneParen -> PAREN_DELIM
+ TwoParens -> PAREN_DELIM
+ _ -> PERIOD_DELIM,
+ listTight = isTightList items,
+ listStart = start }) (map (node ITEM . blocksToNodes) items) :)
+blockToNodes HorizontalRule = (node HRULE [] :)
+blockToNodes (Header lev _ ils) = (node (HEADER lev) (inlinesToNodes ils) :)
+blockToNodes (Div _ bs) = (blocksToNodes bs ++)
+blockToNodes (DefinitionList items) = blockToNodes (BulletList items')
+ where items' = map dlToBullet items
+ dlToBullet (term, ((Para xs : ys) : zs)) =
+ Para (term ++ [LineBreak] ++ xs) : ys ++ concat zs
+ dlToBullet (term, ((Plain xs : ys) : zs)) =
+ Plain (term ++ [LineBreak] ++ xs) : ys ++ concat zs
+ dlToBullet (term, xs) =
+ Para term : concat xs
+blockToNodes t@(Table _ _ _ _ _) =
+ (node (HTML (T.pack $! writeHtmlString def $! Pandoc nullMeta [t])) [] :)
+blockToNodes Null = id
+
+inlinesToNodes :: [Inline] -> [Node]
+inlinesToNodes = foldr inlineToNodes []
+
+inlineToNodes :: Inline -> [Node] -> [Node]
+inlineToNodes (Str s) = (node (TEXT (T.pack s)) [] :)
+inlineToNodes Space = (node (TEXT (T.pack " ")) [] :)
+inlineToNodes LineBreak = (node LINEBREAK [] :)
+inlineToNodes (Emph xs) = (node EMPH (inlinesToNodes xs) :)
+inlineToNodes (Strong xs) = (node STRONG (inlinesToNodes xs) :)
+inlineToNodes (Strikeout xs) =
+ ((node (INLINE_HTML (T.pack "<s>")) [] : inlinesToNodes xs ++
+ [node (INLINE_HTML (T.pack "</s>")) []]) ++ )
+inlineToNodes (Superscript xs) =
+ ((node (INLINE_HTML (T.pack "<sub>")) [] : inlinesToNodes xs ++
+ [node (INLINE_HTML (T.pack "</sub>")) []]) ++ )
+inlineToNodes (Subscript xs) =
+ ((node (INLINE_HTML (T.pack "<sup>")) [] : inlinesToNodes xs ++
+ [node (INLINE_HTML (T.pack "</sup>")) []]) ++ )
+inlineToNodes (SmallCaps xs) =
+ ((node (INLINE_HTML (T.pack "<span style=\"font-variant:small-caps;\">")) []
+ : inlinesToNodes xs ++
+ [node (INLINE_HTML (T.pack "</span>")) []]) ++ )
+inlineToNodes (Link ils (url,tit)) =
+ (node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes ils) :)
+inlineToNodes (Image ils (url,tit)) =
+ (node (IMAGE (T.pack url) (T.pack tit)) (inlinesToNodes ils) :)
+inlineToNodes (RawInline fmt xs)
+ | fmt == Format "html" = (node (INLINE_HTML (T.pack xs)) [] :)
+ | otherwise = id
+inlineToNodes (Quoted qt ils) =
+ ((node (TEXT start) [] : inlinesToNodes ils ++ [node (TEXT end) []]) ++)
+ where (start, end) = case qt of
+ SingleQuote -> (T.pack "‘", T.pack "’")
+ DoubleQuote -> (T.pack "“", T.pack "”")
+inlineToNodes (Code _ str) = (node (CODE (T.pack str)) [] :)
+inlineToNodes (Math mt str) =
+ case mt of
+ InlineMath ->
+ (node (INLINE_HTML (T.pack ("\\(" ++ str ++ "\\)"))) [] :)
+ DisplayMath ->
+ (node (INLINE_HTML (T.pack ("\\[" ++ str ++ "\\]"))) [] :)
+inlineToNodes (Span _ ils) = (inlinesToNodes ils ++)
+inlineToNodes (Cite _ ils) = (inlinesToNodes ils ++)
+inlineToNodes (Note _) = id -- should not occur
+-- we remove Note elements in preprocessing
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 3c32434e7..4e81def60 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE ScopedTypeVariables, PatternGuards #-}
+{-# LANGUAGE ScopedTypeVariables, PatternGuards, ViewPatterns #-}
{-
Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu>
@@ -66,6 +66,7 @@ import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef,
extensionFromMimeType)
import Control.Applicative ((<$>), (<|>), (<*>))
import Data.Maybe (fromMaybe, mapMaybe, maybeToList)
+import Data.Char (ord)
data ListMarker = NoMarker
| BulletMarker
@@ -176,13 +177,29 @@ renumId f renumMap e
renumIds :: (QName -> Bool) -> (M.Map String String) -> [Element] -> [Element]
renumIds f renumMap = map (renumId f renumMap)
+-- | Certain characters are invalid in XML even if escaped.
+-- See #1992
+stripInvalidChars :: Pandoc -> Pandoc
+stripInvalidChars = bottomUp (filter isValidChar)
+
+-- | See XML reference
+isValidChar :: Char -> Bool
+isValidChar (ord -> c)
+ | c == 0x9 = True
+ | c == 0xA = True
+ | c == 0xD = True
+ | 0x20 <= c && c <= 0xD7FF = True
+ | 0xE000 <= c && c <= 0xFFFD = True
+ | 0x10000 <= c && c <= 0x10FFFF = True
+ | otherwise = False
+
-- | Produce an Docx file from a Pandoc document.
writeDocx :: WriterOptions -- ^ Writer options
-> Pandoc -- ^ Document to convert
-> IO BL.ByteString
writeDocx opts doc@(Pandoc meta _) = do
let datadir = writerUserDataDir opts
- let doc' = walk fixDisplayMath doc
+ let doc' = stripInvalidChars . walk fixDisplayMath $ doc
username <- lookup "USERNAME" <$> getEnvironment
utctime <- getCurrentTime
refArchive <- liftM (toArchive . toLazy) $
@@ -432,6 +449,17 @@ writeDocx opts doc@(Pandoc meta _) = do
]
let relsEntry = toEntry relsPath epochtime $ renderXml rels
+ -- we use dist archive for settings.xml, because Word sometimes
+ -- adds references to footnotes or endnotes we don't have...
+ -- we do, however, copy some settings over from reference
+ let settingsPath = "word/settings.xml"
+ settingsList = [ "w:autoHyphenation"
+ , "w:consecutiveHyphenLimit"
+ , "w:hyphenationZone"
+ , "w:doNotHyphenateCap"
+ ]
+ settingsEntry <- copyChildren refArchive distArchive settingsPath epochtime settingsList
+
let entryFromArchive arch path =
maybe (fail $ path ++ " corrupt or missing in reference docx")
return
@@ -439,9 +467,6 @@ writeDocx opts doc@(Pandoc meta _) = do
docPropsAppEntry <- entryFromArchive refArchive "docProps/app.xml"
themeEntry <- entryFromArchive refArchive "word/theme/theme1.xml"
fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml"
- -- we use dist archive for settings.xml, because Word sometimes
- -- adds references to footnotes or endnotes we don't have...
- settingsEntry <- entryFromArchive distArchive "word/settings.xml"
webSettingsEntry <- entryFromArchive refArchive "word/webSettings.xml"
headerFooterEntries <- mapM (entryFromArchive refArchive) $
mapMaybe (fmap ("word/" ++) . extractTarget)
@@ -504,6 +529,21 @@ styleToOpenXml sm style =
$ backgroundColor style )
]
+copyChildren :: Archive -> Archive -> String -> Integer -> [String] -> IO Entry
+copyChildren refArchive distArchive path timestamp elNames = do
+ ref <- parseXml refArchive distArchive path
+ dist <- parseXml distArchive distArchive path
+ return $ toEntry path timestamp $ renderXml dist{
+ elContent = elContent dist ++ copyContent ref
+ }
+ where
+ strName QName{qName=name, qPrefix=prefix}
+ | Just p <- prefix = p++":"++name
+ | otherwise = name
+ shouldCopy = (`elem` elNames) . strName
+ cleanElem el@Element{elName=name} = Elem el{elName=name{qURI=Nothing}}
+ copyContent = map cleanElem . filterChildrenName shouldCopy
+
-- this is the lowest number used for a list numId
baseListId :: Int
baseListId = 1000
@@ -581,6 +621,33 @@ mkLvl marker lvl =
getNumId :: WS Int
getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists
+makeTOC :: WriterOptions -> WS [Element]
+makeTOC opts | writerTableOfContents opts = do
+ let depth = "1-"++(show (writerTOCDepth opts))
+ let tocCmd = "TOC \\o \""++depth++"\" \\h \\z \\u"
+ title <- withParaPropM (pStyleM "TOC Heading") (blocksToOpenXML opts [Para [Str "Table of Contents"]])
+ return $
+ [mknode "w:sdt" [] ([
+ mknode "w:sdtPr" [] (
+ mknode "w:docPartObj" [] (
+ [mknode "w:docPartGallery" [("w:val","Table of Contents")] (),
+ mknode "w:docPartUnique" [] ()]
+ ) -- w:docPartObj
+ ), -- w:sdtPr
+ mknode "w:sdtContent" [] (title++[
+ mknode "w:p" [] (
+ mknode "w:r" [] ([
+ mknode "w:fldChar" [("w:fldCharType","begin"),("w:dirty","true")] (),
+ mknode "w:instrText" [("xml:space","preserve")] tocCmd,
+ mknode "w:fldChar" [("w:fldCharType","separate")] (),
+ mknode "w:fldChar" [("w:fldCharType","end")] ()
+ ]) -- w:r
+ ) -- w:p
+ ])
+ ])] -- w:sdt
+makeTOC _ = return []
+
+
-- | Convert Pandoc document to two lists of
-- OpenXML elements (the main document and footnotes).
writeOpenXML :: WriterOptions -> Pandoc -> WS ([Element], [Element])
@@ -613,7 +680,8 @@ writeOpenXML opts (Pandoc meta blocks) = do
let blocks' = bottomUp convertSpace blocks
doc' <- (setFirstPara >> blocksToOpenXML opts blocks')
notes' <- reverse `fmap` gets stFootnotes
- let meta' = title ++ subtitle ++ authors ++ date ++ abstract
+ toc <- makeTOC opts
+ let meta' = title ++ subtitle ++ authors ++ date ++ abstract ++ toc
return (meta' ++ doc', notes')
-- | Convert a list of Pandoc blocks to OpenXML.