aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc.hs10
-rw-r--r--src/Text/Pandoc/Compat/Directory.hs21
-rw-r--r--src/Text/Pandoc/MediaBag.hs107
-rw-r--r--src/Text/Pandoc/Options.hs8
-rw-r--r--src/Text/Pandoc/PDF.hs23
-rw-r--r--src/Text/Pandoc/Parsing.hs97
-rw-r--r--src/Text/Pandoc/Pretty.hs32
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs126
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs10
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs79
-rw-r--r--src/Text/Pandoc/Readers/Txt2Tags.hs548
-rw-r--r--src/Text/Pandoc/SelfContained.hs86
-rw-r--r--src/Text/Pandoc/Shared.hs96
-rw-r--r--src/Text/Pandoc/Writers/Docx.hs22
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs5
-rw-r--r--src/Text/Pandoc/Writers/FB2.hs11
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs2
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs204
-rw-r--r--src/Text/Pandoc/Writers/MediaWiki.hs12
-rw-r--r--src/Text/Pandoc/Writers/ODT.hs4
-rw-r--r--src/Text/Pandoc/Writers/RTF.hs58
21 files changed, 1220 insertions, 341 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index b303fa7d7..77eb3e82f 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -63,6 +63,7 @@ module Text.Pandoc
, writers
-- * Readers: converting /to/ Pandoc format
, Reader (..)
+ , mkStringReader
, readDocx
, readMarkdown
, readMediaWiki
@@ -76,6 +77,8 @@ module Text.Pandoc
, readHaddock
, readNative
, readJSON
+ , readTxt2Tags
+ , readTxt2TagsNoMacros
-- * Writers: converting /from/ Pandoc format
, Writer (..)
, writeNative
@@ -130,6 +133,7 @@ import Text.Pandoc.Readers.Textile
import Text.Pandoc.Readers.Native
import Text.Pandoc.Readers.Haddock
import Text.Pandoc.Readers.Docx
+import Text.Pandoc.Readers.Txt2Tags
import Text.Pandoc.Writers.Native
import Text.Pandoc.Writers.Markdown
import Text.Pandoc.Writers.RST
@@ -157,6 +161,7 @@ import Text.Pandoc.Writers.Custom
import Text.Pandoc.Templates
import Text.Pandoc.Options
import Text.Pandoc.Shared (safeRead, warn)
+import Text.Pandoc.MediaBag (MediaBag)
import Data.Aeson
import qualified Data.ByteString.Lazy as BL
import Data.List (intercalate)
@@ -200,12 +205,12 @@ markdown o s = do
return doc
data Reader = StringReader (ReaderOptions -> String -> IO Pandoc)
- | ByteStringReader (ReaderOptions -> BL.ByteString -> IO Pandoc)
+ | ByteStringReader (ReaderOptions -> BL.ByteString -> IO (Pandoc, MediaBag))
mkStringReader :: (ReaderOptions -> String -> Pandoc) -> Reader
mkStringReader r = StringReader (\o s -> return $ r o s)
-mkBSReader :: (ReaderOptions -> BL.ByteString -> Pandoc) -> Reader
+mkBSReader :: (ReaderOptions -> BL.ByteString -> (Pandoc, MediaBag)) -> Reader
mkBSReader r = ByteStringReader (\o s -> return $ r o s)
-- | Association list of formats and readers.
@@ -227,6 +232,7 @@ readers = [ ("native" , StringReader $ \_ s -> return $ readNative s)
,("latex" , mkStringReader readLaTeX)
,("haddock" , mkStringReader readHaddock)
,("docx" , mkBSReader readDocx)
+ ,("t2t" , mkStringReader readTxt2TagsNoMacros)
]
data Writer = PureStringWriter (WriterOptions -> Pandoc -> String)
diff --git a/src/Text/Pandoc/Compat/Directory.hs b/src/Text/Pandoc/Compat/Directory.hs
new file mode 100644
index 000000000..61dd5c525
--- /dev/null
+++ b/src/Text/Pandoc/Compat/Directory.hs
@@ -0,0 +1,21 @@
+{-# LANGUAGE CPP #-}
+module Text.Pandoc.Compat.Directory ( getModificationTime )
+ where
+
+#if MIN_VERSION_directory(1,2,0)
+import System.Directory
+
+
+#else
+import qualified System.Directory as S
+import Data.Time.Clock (UTCTime)
+import Data.Time.Clock.POSIX
+import System.Time
+
+getModificationTime :: FilePath -> IO UTCTime
+getModificationTime fp = convert `fmap` S.getModificationTime fp
+ where
+ convert (TOD x _) = posixSecondsToUTCTime (realToFrac x)
+
+#endif
+
diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs
new file mode 100644
index 000000000..667089f55
--- /dev/null
+++ b/src/Text/Pandoc/MediaBag.hs
@@ -0,0 +1,107 @@
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-
+Copyright (C) 2014 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.MediaBag
+ Copyright : Copyright (C) 2014 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Definition of a MediaBag object to hold binary resources, and an
+interface for interacting with it.
+-}
+module Text.Pandoc.MediaBag (
+ MediaBag,
+ lookupMedia,
+ insertMedia,
+ mediaDirectory,
+ extractMediaBag
+ ) where
+import System.FilePath
+import System.Directory (createDirectoryIfMissing)
+import qualified Data.Map as M
+import qualified Data.ByteString.Lazy as BL
+import Data.Monoid (Monoid)
+import Control.Monad (when, MonadPlus(..))
+import Text.Pandoc.MIME (getMimeType)
+import qualified Text.Pandoc.UTF8 as UTF8
+import Data.Maybe (fromMaybe)
+import System.IO (stderr)
+
+-- | A container for a collection of binary resources, with names and
+-- mime types. Note that a 'MediaBag' is a Monoid, so 'mempty'
+-- can be used for an empty 'MediaBag', and '<>' can be used to append
+-- two 'MediaBag's.
+newtype MediaBag = MediaBag (M.Map String (String, BL.ByteString))
+ deriving (Monoid)
+
+instance Show MediaBag where
+ show bag = "MediaBag " ++ show (mediaDirectory bag)
+
+-- | Insert a media item into a 'MediaBag', replacing any existing
+-- value with the same name.
+insertMedia :: FilePath -- ^ relative path and canonical name of resource
+ -> Maybe String -- ^ mime type (Nothing = determine from extension)
+ -> BL.ByteString -- ^ contents of resource
+ -> MediaBag
+ -> MediaBag
+insertMedia fp mbMime contents (MediaBag mediamap) =
+ MediaBag (M.insert fp (mime, contents) mediamap)
+ where mime = fromMaybe "application/octet-stream" (mbMime `mplus` fallback)
+ fallback = case takeExtension fp of
+ ".gz" -> getMimeType $ dropExtension fp
+ _ -> getMimeType fp
+
+-- | Lookup a media item in a 'MediaBag', returning mime type and contents.
+lookupMedia :: FilePath
+ -> MediaBag
+ -> Maybe (String, BL.ByteString)
+lookupMedia fp (MediaBag mediamap) = M.lookup fp mediamap
+
+-- | Get a list of the file paths stored in a 'MediaBag', with
+-- their corresponding mime types and the lengths in bytes of the contents.
+mediaDirectory :: MediaBag -> [(String, String, Int)]
+mediaDirectory (MediaBag mediamap) =
+ M.foldWithKey (\fp (mime,contents) ->
+ ((fp, mime, fromIntegral $ BL.length contents):)) [] mediamap
+
+-- | Extract contents of MediaBag to a given directory. Print informational
+-- messages if 'verbose' is true.
+extractMediaBag :: Bool
+ -> FilePath
+ -> MediaBag
+ -> IO ()
+extractMediaBag verbose dir (MediaBag mediamap) = do
+ sequence_ $ M.foldWithKey
+ (\fp (_ ,contents) ->
+ ((writeMedia verbose dir (fp, contents)):)) [] mediamap
+
+writeMedia :: Bool -> FilePath -> (FilePath, BL.ByteString) -> IO ()
+writeMedia verbose dir (subpath, bs) = do
+ -- we join and split to convert a/b/c to a\b\c on Windows;
+ -- in zip containers all paths use /
+ let fullpath = dir </> joinPath (splitPath subpath)
+ createDirectoryIfMissing True $ takeDirectory fullpath
+ when verbose $ UTF8.hPutStrLn stderr $ "pandoc: extracting " ++ fullpath
+ BL.writeFile fullpath bs
+
+
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index ac791ac74..85a6a3096 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -49,6 +49,8 @@ import Data.Set (Set)
import qualified Data.Set as Set
import Data.Default
import Text.Pandoc.Highlighting (Style, pygments)
+import Text.Pandoc.MediaBag (MediaBag)
+import Data.Monoid
-- | Individually selectable syntax extensions.
data Extension =
@@ -200,7 +202,6 @@ strictExtensions = Set.fromList
data ReaderOptions = ReaderOptions{
readerExtensions :: Set Extension -- ^ Syntax extensions
, readerSmart :: Bool -- ^ Smart punctuation
- , readerStrict :: Bool -- ^ FOR TRANSITION ONLY
, readerStandalone :: Bool -- ^ Standalone document with header
, readerParseRaw :: Bool -- ^ Parse raw HTML, LaTeX
, readerColumns :: Int -- ^ Number of columns in terminal
@@ -220,7 +221,6 @@ instance Default ReaderOptions
where def = ReaderOptions{
readerExtensions = pandocExtensions
, readerSmart = False
- , readerStrict = False
, readerStandalone = False
, readerParseRaw = False
, readerColumns = 80
@@ -315,7 +315,8 @@ data WriterOptions = WriterOptions
, writerEpubChapterLevel :: Int -- ^ Header level for chapters (separate files)
, writerTOCDepth :: Int -- ^ Number of levels to include in TOC
, writerReferenceODT :: Maybe FilePath -- ^ Path to reference ODT if specified
- , writerReferenceDocx :: Maybe FilePath -- ^ Ptah to reference DOCX if specified
+ , writerReferenceDocx :: Maybe FilePath -- ^ Path to reference DOCX if specified
+ , writerMediaBag :: MediaBag -- ^ Media collected by docx or epub reader
} deriving Show
instance Default WriterOptions where
@@ -358,6 +359,7 @@ instance Default WriterOptions where
, writerTOCDepth = 3
, writerReferenceODT = Nothing
, writerReferenceDocx = Nothing
+ , writerMediaBag = mempty
}
-- | Returns True if the given extension is enabled.
diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index bd55c565f..35554637a 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -30,7 +30,6 @@ Conversion of LaTeX documents to PDF.
-}
module Text.Pandoc.PDF ( makePDF ) where
-import System.IO.Temp
import Data.ByteString.Lazy (ByteString)
import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Lazy.Char8 as BC
@@ -46,7 +45,7 @@ import Data.Maybe (fromMaybe)
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.Definition
import Text.Pandoc.Walk (walkM)
-import Text.Pandoc.Shared (fetchItem, warn)
+import Text.Pandoc.Shared (fetchItem', warn, withTempDir)
import Text.Pandoc.Options (WriterOptions(..))
import Text.Pandoc.MIME (extensionFromMimeType)
import Text.Pandoc.Process (pipeProcess)
@@ -55,14 +54,6 @@ import qualified Data.ByteString.Lazy as BL
import Data.List (intercalate)
#endif
-withTempDir :: String -> (FilePath -> IO a) -> IO a
-withTempDir =
-#ifdef _WINDOWS
- withTempDirectory "."
-#else
- withSystemTempDirectory
-#endif
-
#ifdef _WINDOWS
changePathSeparators :: FilePath -> FilePath
changePathSeparators = intercalate "/" . splitDirectories
@@ -74,26 +65,26 @@ makePDF :: String -- ^ pdf creator (pdflatex, lualatex, xelatex)
-> Pandoc -- ^ document
-> IO (Either ByteString ByteString)
makePDF program writer opts doc = withTempDir "tex2pdf." $ \tmpdir -> do
- doc' <- handleImages (writerSourceURL opts) tmpdir doc
+ doc' <- handleImages opts tmpdir doc
let source = writer opts doc'
tex2pdf' tmpdir program source
-handleImages :: Maybe String -- ^ source base URL
+handleImages :: WriterOptions
-> FilePath -- ^ temp dir to store images
-> Pandoc -- ^ document
-> IO Pandoc
-handleImages baseURL tmpdir = walkM (handleImage' baseURL tmpdir)
+handleImages opts tmpdir = walkM (handleImage' opts tmpdir)
-handleImage' :: Maybe String
+handleImage' :: WriterOptions
-> FilePath
-> Inline
-> IO Inline
-handleImage' baseURL tmpdir (Image ils (src,tit)) = do
+handleImage' opts tmpdir (Image ils (src,tit)) = do
exists <- doesFileExist src
if exists
then return $ Image ils (src,tit)
else do
- res <- fetchItem baseURL src
+ res <- fetchItem' (writerMediaBag opts) (writerSourceURL opts) src
case res of
Right (contents, Just mime) -> do
let ext = fromMaybe (takeExtension src) $
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index eec4a3bc9..b25fca100 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -2,6 +2,7 @@
FlexibleContexts
, GeneralizedNewtypeDeriving
, TypeSynonymInstances
+, MultiParamTypeClasses
, FlexibleInstances #-}
{-
Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
@@ -80,6 +81,7 @@ module Text.Pandoc.Parsing ( anyLine,
HeaderType (..),
ParserContext (..),
QuoteContext (..),
+ HasQuoteContext (..),
NoteTable,
NoteTable',
KeyTable,
@@ -88,7 +90,6 @@ module Text.Pandoc.Parsing ( anyLine,
toKey,
registerHeader,
smartPunctuation,
- withQuoteContext,
singleQuoteStart,
singleQuoteEnd,
doubleQuoteStart,
@@ -106,6 +107,7 @@ module Text.Pandoc.Parsing ( anyLine,
runF,
askF,
asksF,
+ token,
-- * Re-exports from Text.Pandoc.Parsec
Stream,
runParser,
@@ -160,7 +162,6 @@ module Text.Pandoc.Parsing ( anyLine,
setSourceColumn,
setSourceLine,
newPos,
- token
)
where
@@ -170,7 +171,7 @@ import Text.Pandoc.Builder (Blocks, Inlines, rawBlock, HasMeta(..))
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.XML (fromEntities)
import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
-import Text.Parsec
+import Text.Parsec hiding (token)
import Text.Parsec.Pos (newPos)
import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum,
isHexDigit, isSpace )
@@ -407,7 +408,7 @@ emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
-- Schemes from http://www.iana.org/assignments/uri-schemes.html plus
--- the unofficial schemes coap, doi, javascript.
+-- the unofficial schemes coap, doi, javascript, isbn, pmid
schemes :: [String]
schemes = ["coap","doi","javascript","aaa","aaas","about","acap","cap","cid",
"crid","data","dav","dict","dns","file","ftp","geo","go","gopher",
@@ -429,7 +430,7 @@ schemes = ["coap","doi","javascript","aaa","aaas","about","acap","cap","cid",
"rtmp","secondlife","sftp","sgn","skype","smb","soldat","spotify",
"ssh","steam","svn","teamspeak","things","udp","unreal","ut2004",
"ventrilo","view-source","webcal","wtai","wyciwyg","xfire","xri",
- "ymsgr"]
+ "ymsgr", "isbn", "pmid"]
uriScheme :: Stream s m Char => ParserT s st m String
uriScheme = oneOfStringsCI schemes
@@ -484,7 +485,8 @@ mathDisplayWith op cl = try $ do
string op
many1Till (noneOf "\n" <|> (newline <* notFollowedBy' blankline)) (try $ string cl)
-mathDisplay :: Stream s m Char => ParserT s ParserState m String
+mathDisplay :: (HasReaderOptions st, Stream s m Char)
+ => ParserT s st m String
mathDisplay =
(guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
<|> (guardEnabled Ext_tex_math_single_backslash >>
@@ -492,7 +494,8 @@ mathDisplay =
<|> (guardEnabled Ext_tex_math_double_backslash >>
mathDisplayWith "\\\\[" "\\\\]")
-mathInline :: Stream s m Char => ParserT s ParserState m String
+mathInline :: (HasReaderOptions st , Stream s m Char)
+ => ParserT s st m String
mathInline =
(guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
<|> (guardEnabled Ext_tex_math_single_backslash >>
@@ -909,6 +912,21 @@ class HasReaderOptions st where
-- default
getOption f = (f . extractReaderOptions) <$> getState
+class HasQuoteContext st m where
+ getQuoteContext :: (Stream s m t) => ParsecT s st m QuoteContext
+ withQuoteContext :: QuoteContext -> ParsecT s st m a -> ParsecT s st m a
+
+instance Monad m => HasQuoteContext ParserState m where
+ getQuoteContext = stateQuoteContext <$> getState
+ withQuoteContext context parser = do
+ oldState <- getState
+ let oldQuoteContext = stateQuoteContext oldState
+ setState oldState { stateQuoteContext = context }
+ result <- parser
+ newState <- getState
+ setState newState { stateQuoteContext = oldQuoteContext }
+ return result
+
instance HasReaderOptions ParserState where
extractReaderOptions = stateOptions
@@ -1051,9 +1069,9 @@ registerHeader (ident,classes,kvs) header' = do
failUnlessSmart :: (Stream s m a, HasReaderOptions st) => ParserT s st m ()
failUnlessSmart = getOption readerSmart >>= guard
-smartPunctuation :: Stream s m Char
- => ParserT s ParserState m Inlines
- -> ParserT s ParserState m Inlines
+smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+ => ParserT s st m Inlines
+ -> ParserT s st m Inlines
smartPunctuation inlineParser = do
failUnlessSmart
choice [ quoted inlineParser, apostrophe, dash, ellipses ]
@@ -1061,46 +1079,33 @@ smartPunctuation inlineParser = do
apostrophe :: Stream s m Char => ParserT s st m Inlines
apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\x2019")
-quoted :: Stream s m Char
- => ParserT s ParserState m Inlines
- -> ParserT s ParserState m Inlines
+quoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+ => ParserT s st m Inlines
+ -> ParserT s st m Inlines
quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
-withQuoteContext :: Stream s m t
- => QuoteContext
- -> ParserT s ParserState m a
- -> ParserT s ParserState m a
-withQuoteContext context parser = do
- oldState <- getState
- let oldQuoteContext = stateQuoteContext oldState
- setState oldState { stateQuoteContext = context }
- result <- parser
- newState <- getState
- setState newState { stateQuoteContext = oldQuoteContext }
- return result
-
-singleQuoted :: Stream s m Char
- => ParserT s ParserState m Inlines
- -> ParserT s ParserState m Inlines
+singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+ => ParserT s st m Inlines
+ -> ParserT s st m Inlines
singleQuoted inlineParser = try $ do
singleQuoteStart
withQuoteContext InSingleQuote $ many1Till inlineParser singleQuoteEnd >>=
return . B.singleQuoted . mconcat
-doubleQuoted :: Stream s m Char
- => ParserT s ParserState m Inlines
- -> ParserT s ParserState m Inlines
+doubleQuoted :: (HasQuoteContext st m, Stream s m Char)
+ => ParserT s st m Inlines
+ -> ParserT s st m Inlines
doubleQuoted inlineParser = try $ do
doubleQuoteStart
withQuoteContext InDoubleQuote $ manyTill inlineParser doubleQuoteEnd >>=
return . B.doubleQuoted . mconcat
-failIfInQuoteContext :: Stream s m t
+failIfInQuoteContext :: (HasQuoteContext st m, Stream s m t)
=> QuoteContext
- -> ParserT s ParserState m ()
+ -> ParserT s st m ()
failIfInQuoteContext context = do
- st <- getState
- if stateQuoteContext st == context
+ context' <- getQuoteContext
+ if context' == context
then fail "already inside quotes"
else return ()
@@ -1110,8 +1115,8 @@ charOrRef cs =
guard (c `elem` cs)
return c)
-singleQuoteStart :: Stream s m Char
- => ParserT s ParserState m ()
+singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+ => ParserT s st m ()
singleQuoteStart = do
failIfInQuoteContext InSingleQuote
-- single quote start can't be right after str
@@ -1124,8 +1129,8 @@ singleQuoteEnd = try $ do
charOrRef "'\8217\146"
notFollowedBy alphaNum
-doubleQuoteStart :: Stream s m Char
- => ParserT s ParserState m ()
+doubleQuoteStart :: (HasQuoteContext st m, Stream s m Char)
+ => ParserT s st m ()
doubleQuoteStart = do
failIfInQuoteContext InDoubleQuote
try $ do charOrRef "\"\8220\147"
@@ -1179,6 +1184,14 @@ citeKey = try $ do
let key = firstChar:rest
return (suppress_author, key)
+
+token :: (Stream s m t)
+ => (t -> String)
+ -> (t -> SourcePos)
+ -> (t -> Maybe a)
+ -> ParsecT s st m a
+token pp pos match = tokenPrim pp (\_ t _ -> pos t) match
+
--
-- Macros
--
@@ -1200,9 +1213,9 @@ macro = do
else return $ rawBlock "latex" def'
-- | Apply current macros to string.
-applyMacros' :: Stream [Char] m Char
+applyMacros' :: (HasReaderOptions st, HasMacros st, Stream [Char] m Char)
=> String
- -> ParserT [Char] ParserState m String
+ -> ParserT [Char] st m String
applyMacros' target = do
apply <- getOption readerApplyMacros
if apply
diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs
index d25ba725f..1e72c2040 100644
--- a/src/Text/Pandoc/Pretty.hs
+++ b/src/Text/Pandoc/Pretty.hs
@@ -35,6 +35,7 @@ module Text.Pandoc.Pretty (
, render
, cr
, blankline
+ , blanklines
, space
, text
, char
@@ -100,7 +101,7 @@ data D = Text Int String
| BreakingSpace
| CarriageReturn
| NewLine
- | BlankLine
+ | BlankLines Int -- number of blank lines
deriving (Show)
newtype Doc = Doc { unDoc :: Seq D }
@@ -113,7 +114,7 @@ isBlank :: D -> Bool
isBlank BreakingSpace = True
isBlank CarriageReturn = True
isBlank NewLine = True
-isBlank BlankLine = True
+isBlank (BlankLines _) = True
isBlank (Text _ (c:_)) = isSpace c
isBlank _ = False
@@ -190,7 +191,7 @@ vsep = foldr ($+$) empty
nestle :: Doc -> Doc
nestle (Doc d) = Doc $ go d
where go x = case viewl x of
- (BlankLine :< rest) -> go rest
+ (BlankLines _ :< rest) -> go rest
(NewLine :< rest) -> go rest
_ -> x
@@ -203,7 +204,7 @@ chomp d = Doc (fromList dl')
go (BreakingSpace : xs) = go xs
go (CarriageReturn : xs) = go xs
go (NewLine : xs) = go xs
- go (BlankLine : xs) = go xs
+ go (BlankLines _ : xs) = go xs
go (Prefixed s d' : xs) = Prefixed s (chomp d') : xs
go xs = xs
@@ -216,9 +217,10 @@ outp off s | off < 0 = do -- offset < 0 means newline characters
let pref = reverse $ dropWhile isSpace $ reverse rawpref
modify $ \st -> st{ output = fromString pref : output st
, column = column st + realLength pref }
+ let numnewlines = length $ takeWhile (=='\n') $ reverse s
modify $ \st -> st { output = fromString s : output st
, column = 0
- , newlines = newlines st + 1 }
+ , newlines = newlines st + numnewlines }
outp off s = do -- offset >= 0 (0 might be combining char)
st' <- get
let pref = prefix st'
@@ -277,15 +279,11 @@ renderList (BeforeNonBlank d : xs) =
| otherwise -> renderDoc d >> renderList xs
[] -> renderList xs
-renderList (BlankLine : xs) = do
+renderList (BlankLines num : xs) = do
st <- get
case output st of
- _ | newlines st > 1 || null xs -> return ()
- _ | column st == 0 -> do
- outp (-1) "\n"
- _ -> do
- outp (-1) "\n"
- outp (-1) "\n"
+ _ | newlines st > num || null xs -> return ()
+ | otherwise -> replicateM_ (1 + num - newlines st) (outp (-1) "\n")
renderList xs
renderList (CarriageReturn : xs) = do
@@ -302,7 +300,7 @@ renderList (NewLine : xs) = do
renderList (BreakingSpace : CarriageReturn : xs) = renderList (CarriageReturn:xs)
renderList (BreakingSpace : NewLine : xs) = renderList (NewLine:xs)
-renderList (BreakingSpace : BlankLine : xs) = renderList (BlankLine:xs)
+renderList (BreakingSpace : BlankLines n : xs) = renderList (BlankLines n:xs)
renderList (BreakingSpace : BreakingSpace : xs) = renderList (BreakingSpace:xs)
renderList (BreakingSpace : xs) = do
let isText (Text _ _) = True
@@ -383,9 +381,13 @@ cr = Doc $ singleton CarriageReturn
-- | Inserts a blank line unless one exists already.
-- (@blankline <> blankline@ has the same effect as @blankline@.
--- If you want multiple blank lines, use @text "\\n\\n"@.
blankline :: Doc
-blankline = Doc $ singleton BlankLine
+blankline = Doc $ singleton (BlankLines 1)
+
+-- | Inserts a blank lines unless they exists already.
+-- (@blanklines m <> blanklines n@ has the same effect as @blankline (max m n)@.
+blanklines :: Int -> Doc
+blanklines n = Doc $ singleton (BlankLines n)
-- | Uses the specified string as a prefix for every line of
-- the inside document (except the first, if not at the beginning
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 196a3cec5..86ce62ced 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -78,19 +78,17 @@ import Codec.Archive.Zip
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Builder (text, toList)
-import Text.Pandoc.MIME (getMimeType)
-import Text.Pandoc.UTF8 (toString)
import Text.Pandoc.Walk
import Text.Pandoc.Readers.Docx.Parse
import Text.Pandoc.Readers.Docx.Lists
import Text.Pandoc.Readers.Docx.Reducible
import Text.Pandoc.Readers.Docx.TexChar
import Text.Pandoc.Shared
+import Text.Pandoc.MediaBag (insertMedia, MediaBag)
import Data.Maybe (mapMaybe, fromMaybe)
-import Data.List (delete, isPrefixOf, (\\), intercalate)
-import qualified Data.ByteString as BS
+import Data.List (delete, isPrefixOf, (\\), intercalate, intersect)
+import Data.Monoid
import qualified Data.ByteString.Lazy as B
-import Data.ByteString.Base64 (encode)
import qualified Data.Map as M
import Control.Monad.Reader
import Control.Monad.State
@@ -98,16 +96,24 @@ import Text.Printf (printf)
readDocx :: ReaderOptions
-> B.ByteString
- -> Pandoc
+ -> (Pandoc, MediaBag)
readDocx opts bytes =
case archiveToDocx (toArchive bytes) of
- Right docx -> Pandoc nullMeta (docxToBlocks opts docx)
+ Right docx -> (Pandoc meta blks, mediaBag) where
+ (meta, blks, mediaBag) = (docxToOutput opts docx)
Left _ -> error $ "couldn't parse docx file"
data DState = DState { docxAnchorMap :: M.Map String String
+ , docxMediaBag :: MediaBag
, docxInHeaderBlock :: Bool
, docxInTexSubscript :: Bool }
+defaultDState :: DState
+defaultDState = DState { docxAnchorMap = M.empty
+ , docxMediaBag = mempty
+ , docxInHeaderBlock = False
+ , docxInTexSubscript = False}
+
data DEnv = DEnv { docxOptions :: ReaderOptions
, docxDocument :: Docx}
@@ -134,6 +140,65 @@ spansToKeep = []
divsToKeep :: [String]
divsToKeep = ["list-item", "Definition", "DefinitionTerm"]
+metaStyles :: M.Map String String
+metaStyles = M.fromList [ ("Title", "title")
+ , ("Subtitle", "subtitle")
+ , ("Author", "author")
+ , ("Date", "date")
+ , ("Abstract", "abstract")]
+
+sepBodyParts :: [BodyPart] -> ([BodyPart], [BodyPart])
+sepBodyParts = span (\bp -> (isMetaPar bp || isEmptyPar bp))
+
+isMetaPar :: BodyPart -> Bool
+isMetaPar (Paragraph pPr _) =
+ not $ null $ intersect (pStyle pPr) (M.keys metaStyles)
+isMetaPar _ = False
+
+isEmptyPar :: BodyPart -> Bool
+isEmptyPar (Paragraph _ parParts) =
+ all isEmptyParPart parParts
+ where
+ isEmptyParPart (PlainRun (Run _ runElems)) = all isEmptyElem runElems
+ isEmptyParPart _ = False
+ isEmptyElem (TextRun s) = trim s == ""
+ isEmptyElem _ = True
+isEmptyPar _ = False
+
+bodyPartsToMeta' :: [BodyPart] -> DocxContext (M.Map String MetaValue)
+bodyPartsToMeta' [] = return M.empty
+bodyPartsToMeta' (bp : bps)
+ | (Paragraph pPr parParts) <- bp
+ , (c : _)<- intersect (pStyle pPr) (M.keys metaStyles)
+ , (Just metaField) <- M.lookup c metaStyles = do
+ inlines <- parPartsToInlines parParts
+ remaining <- bodyPartsToMeta' bps
+ let
+ f (MetaInlines ils) (MetaInlines ils') = MetaBlocks [Para ils, Para ils']
+ f (MetaInlines ils) (MetaBlocks blks) = MetaBlocks ((Para ils) : blks)
+ f m (MetaList mv) = MetaList (m : mv)
+ f m n = MetaList [m, n]
+ return $ M.insertWith f metaField (MetaInlines inlines) remaining
+bodyPartsToMeta' (_ : bps) = bodyPartsToMeta' bps
+
+bodyPartsToMeta :: [BodyPart] -> DocxContext Meta
+bodyPartsToMeta bps = do
+ mp <- bodyPartsToMeta' bps
+ let mp' =
+ case M.lookup "author" mp of
+ Just mv -> M.insert "author" (fixAuthors mv) mp
+ Nothing -> mp
+ return $ Meta mp'
+
+fixAuthors :: MetaValue -> MetaValue
+fixAuthors (MetaBlocks blks) =
+ MetaList $ map g $ filter f blks
+ where f (Para _) = True
+ f _ = False
+ g (Para ils) = MetaInlines ils
+ g _ = MetaInlines []
+fixAuthors mv = mv
+
runStyleToContainers :: RunStyle -> [Container Inline]
runStyleToContainers rPr =
let spanClassToContainers :: String -> [Container Inline]
@@ -154,7 +219,8 @@ runStyleToContainers rPr =
, if isStrike rPr then (Just Strikeout) else Nothing
, if isSuperScript rPr then (Just Superscript) else Nothing
, if isSubScript rPr then (Just Subscript) else Nothing
- , rUnderline rPr >>= (\f -> Just $ Span ("", [], [("underline", f)]))
+ , rUnderline rPr >>=
+ (\f -> if f == "single" then (Just Emph) else Nothing)
]
in
classContainers ++ formatters
@@ -259,13 +325,6 @@ runToInlines (Footnote bps) =
runToInlines (Endnote bps) =
concatMapM bodyPartToBlocks bps >>= (\blks -> return [Note blks])
-makeDataUrl :: String -> B.ByteString -> Maybe String
-makeDataUrl fp bs =
- case getMimeType fp of
- Just mime -> Just $ "data:" ++ mime ++ ";base64," ++
- toString (encode $ BS.concat $ B.toChunks bs)
- Nothing -> Nothing
-
parPartToInlines :: ParPart -> DocxContext [Inline]
parPartToInlines (PlainRun r) = runToInlines r
parPartToInlines (Insertion _ author date runs) = do
@@ -312,11 +371,9 @@ parPartToInlines (BookMark _ anchor) =
modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap}
return [Span (newAnchor, ["anchor"], []) []]
parPartToInlines (Drawing fp bs) = do
- return $ case True of -- TODO: add self-contained images
- True -> [Image [] (fp, "")]
- False -> case makeDataUrl fp bs of
- Just d -> [Image [] (d, "")]
- Nothing -> [Image [] ("", "")]
+ mediaBag <- gets docxMediaBag
+ modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag }
+ return [Image [] (fp, "")]
parPartToInlines (InternalHyperLink anchor runs) = do
ils <- concatMapM runToInlines runs
return [Link ils ('#' : anchor, "")]
@@ -615,24 +672,25 @@ rewriteLink l@(Link ils ('#':target, title)) = do
Nothing -> l
rewriteLink il = return il
-
-bodyToBlocks :: Body -> DocxContext [Block]
-bodyToBlocks (Body bps) = do
- blks <- concatMapM bodyPartToBlocks bps >>=
+bodyToOutput :: Body -> DocxContext (Meta, [Block], MediaBag)
+bodyToOutput (Body bps) = do
+ let (metabps, blkbps) = sepBodyParts bps
+ meta <- bodyPartsToMeta metabps
+ blks <- concatMapM bodyPartToBlocks blkbps >>=
walkM rewriteLink
- return $
- blocksToDefinitions $
- blocksToBullets $ blks
-
-docxToBlocks :: ReaderOptions -> Docx -> [Block]
-docxToBlocks opts d@(Docx (Document _ body)) =
- let dState = DState { docxAnchorMap = M.empty
- , docxInHeaderBlock = False
- , docxInTexSubscript = False}
+ mediaBag <- gets docxMediaBag
+ return $ (meta,
+ blocksToDefinitions $ blocksToBullets $ blks,
+ mediaBag)
+
+docxToOutput :: ReaderOptions -> Docx -> (Meta, [Block], MediaBag)
+docxToOutput opts d@(Docx (Document _ body)) =
+ let dState = defaultDState
dEnv = DEnv { docxOptions = opts
, docxDocument = d}
in
- evalDocxContext (bodyToBlocks body) dEnv dState
+ evalDocxContext (bodyToOutput body) dEnv dState
+
ilToCode :: Inline -> String
ilToCode (Str s) = s
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 8541a1a3a..71938afe0 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -779,11 +779,11 @@ expandDrawingId :: String -> D ParPart
expandDrawingId s = do
target <- asks (lookupRelationship s . envRelationships)
case target of
- Just t -> do let filepath = combine "word" t
- bytes <- asks (lookup filepath . envMedia)
- case bytes of
- Just bs -> return $ Drawing filepath bs
- Nothing -> throwError DocxError
+ Just filepath -> do
+ bytes <- asks (lookup (combine "word" filepath) . envMedia)
+ case bytes of
+ Just bs -> return $ Drawing filepath bs
+ Nothing -> throwError DocxError
Nothing -> throwError DocxError
elemToParPart :: NameSpaces -> Element -> D ParPart
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 2414dfbf7..597156a5e 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses #-}
{-
Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
@@ -40,7 +41,7 @@ import Text.HTML.TagSoup
import Text.HTML.TagSoup.Match
import Text.Pandoc.Definition
import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
+import Text.Pandoc.Builder (HasMeta (..), Blocks, Inlines, trimInlines)
import Text.Pandoc.Shared
import Text.Pandoc.Options
import Text.Pandoc.Parsing
@@ -52,6 +53,8 @@ import Control.Applicative ( (<$>), (<$), (<*) )
import Data.Monoid
import Text.Printf (printf)
import Debug.Trace (trace)
+import Data.Default (Default (..))
+import Control.Monad.Reader (Reader, runReader, asks, local, ask)
isSpace :: Char -> Bool
isSpace ' ' = True
@@ -64,17 +67,26 @@ readHtml :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assumes @'\n'@ line endings)
-> Pandoc
readHtml opts inp =
- case runParser parseDoc def{ stateOptions = opts } "source" tags of
+ case flip runReader def $ runParserT parseDoc (HTMLState def{ stateOptions = opts } ) "source" tags of
Left err' -> error $ "\nError at " ++ show err'
Right result -> result
where tags = canonicalizeTags $
parseTagsOptions parseOptions{ optTagPosition = True } inp
parseDoc = do
blocks <- (fixPlains False) . mconcat <$> manyTill block eof
- meta <- stateMeta <$> getState
+ meta <- stateMeta . parserState <$> getState
return $ Pandoc meta (B.toList blocks)
-type TagParser = Parser [Tag String] ParserState
+data HTMLState =
+ HTMLState
+ { parserState :: ParserState
+ }
+
+data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext }
+
+type HTMLParser s = ParserT s HTMLState (Reader HTMLLocal)
+
+type TagParser = HTMLParser [Tag String]
pBody :: TagParser Blocks
pBody = pInTags "body" block
@@ -115,7 +127,6 @@ block = do
(take 60 $ show $ B.toList res)) (return ())
return res
-
pList :: TagParser Blocks
pList = pBulletList <|> pOrderedList <|> pDefinitionList
@@ -365,8 +376,8 @@ pSelfClosing f g = do
pQ :: TagParser Inlines
pQ = do
- quoteContext <- stateQuoteContext `fmap` getState
- let quoteType = case quoteContext of
+ context <- asks quoteContext
+ let quoteType = case context of
InDoubleQuote -> SingleQuote
_ -> DoubleQuote
let innerQuoteContext = if quoteType == SingleQuote
@@ -477,7 +488,8 @@ pTagText :: TagParser Inlines
pTagText = try $ do
(TagText str) <- pSatisfy isTagText
st <- getState
- case runParser (many pTagContents) st "text" str of
+ qu <- ask
+ case flip runReader qu $ runParserT (many pTagContents) st "text" str of
Left _ -> fail $ "Could not parse `" ++ str ++ "'"
Right result -> return $ mconcat result
@@ -486,7 +498,9 @@ pBlank = try $ do
(TagText str) <- pSatisfy isTagText
guard $ all isSpace str
-pTagContents :: Parser [Char] ParserState Inlines
+type InlinesParser = HTMLParser String
+
+pTagContents :: InlinesParser Inlines
pTagContents =
B.displayMath <$> mathDisplay
<|> B.math <$> mathInline
@@ -496,12 +510,11 @@ pTagContents =
<|> pSymbol
<|> pBad
-pStr :: Parser [Char] ParserState Inlines
+pStr :: InlinesParser Inlines
pStr = do
result <- many1 $ satisfy $ \c ->
not (isSpace c) && not (isSpecial c) && not (isBad c)
- pos <- getPosition
- updateState $ \s -> s{ stateLastStrPos = Just pos }
+ updateLastStrPos
return $ B.str result
isSpecial :: Char -> Bool
@@ -516,13 +529,13 @@ isSpecial '\8220' = True
isSpecial '\8221' = True
isSpecial _ = False
-pSymbol :: Parser [Char] ParserState Inlines
+pSymbol :: InlinesParser Inlines
pSymbol = satisfy isSpecial >>= return . B.str . (:[])
isBad :: Char -> Bool
isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML
-pBad :: Parser [Char] ParserState Inlines
+pBad :: InlinesParser Inlines
pBad = do
c <- satisfy isBad
let c' = case c of
@@ -556,7 +569,7 @@ pBad = do
_ -> '?'
return $ B.str [c']
-pSpace :: Parser [Char] ParserState Inlines
+pSpace :: InlinesParser Inlines
pSpace = many1 (satisfy isSpace) >> return B.space
--
@@ -672,19 +685,23 @@ _ `closes` _ = False
--- parsers for use in markdown, textile readers
-- | Matches a stretch of HTML in balanced tags.
-htmlInBalanced :: (Tag String -> Bool) -> Parser [Char] ParserState String
+htmlInBalanced :: (Monad m)
+ => (Tag String -> Bool)
+ -> ParserT String st m String
htmlInBalanced f = try $ do
(TagOpen t _, tag) <- htmlTag f
guard $ '/' `notElem` tag -- not a self-closing tag
let stopper = htmlTag (~== TagClose t)
- let anytag = liftM snd $ htmlTag (const True)
+ let anytag = snd <$> htmlTag (const True)
contents <- many $ notFollowedBy' stopper >>
(htmlInBalanced f <|> anytag <|> count 1 anyChar)
endtag <- liftM snd stopper
return $ tag ++ concat contents ++ endtag
-- | Matches a tag meeting a certain condition.
-htmlTag :: (Tag String -> Bool) -> Parser [Char] st (Tag String, String)
+htmlTag :: Monad m
+ => (Tag String -> Bool)
+ -> ParserT [Char] st m (Tag String, String)
htmlTag f = try $ do
lookAhead $ char '<' >> (oneOf "/!?" <|> letter)
(next : _) <- getInput >>= return . canonicalizeTags . parseTags
@@ -707,3 +724,29 @@ mkAttr attr = (attribsId, attribsClasses, attribsKV)
attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr
+-- Instances
+
+-- This signature should be more general
+-- MonadReader HTMLLocal m => HasQuoteContext st m
+instance HasQuoteContext st (Reader HTMLLocal) where
+ getQuoteContext = asks quoteContext
+ withQuoteContext q = local (\s -> s{quoteContext = q})
+
+instance HasReaderOptions HTMLState where
+ extractReaderOptions = extractReaderOptions . parserState
+
+instance Default HTMLState where
+ def = HTMLState def
+
+instance HasMeta HTMLState where
+ setMeta s b st = st {parserState = setMeta s b $ parserState st}
+ deleteMeta s st = st {parserState = deleteMeta s $ parserState st}
+
+instance Default HTMLLocal where
+ def = HTMLLocal NoQuote
+
+instance HasLastStrPosition HTMLState where
+ setLastStrPos s st = st {parserState = setLastStrPos s (parserState st)}
+ getLastStrPos = getLastStrPos . parserState
+
+
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
new file mode 100644
index 000000000..3a51b9d84
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -0,0 +1,548 @@
+{-# LANGUAGE ViewPatterns #-}
+{-
+Copyright (C) 2014 Matthew Pickering <matthewtpickering@gmail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Txt2Tags
+ Copyright : Copyright (C) 2014 Matthew Pickering
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Matthew Pickering <matthewtpickering@gmail.com>
+
+Conversion of txt2tags formatted plain text to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.Txt2Tags ( readTxt2Tags
+ , getT2TMeta
+ , T2TMeta (..)
+ , readTxt2TagsNoMacros)
+ where
+
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Builder ( Inlines, Blocks, (<>)
+ , trimInlines )
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Shared (escapeURI,compactify', compactify'DL)
+import Text.Pandoc.Parsing hiding (space, spaces, uri, macro)
+import Control.Applicative ((<$>), (<$), (<*>), (<*), (*>))
+import Data.Char (toLower)
+import Data.List (transpose, intersperse, intercalate)
+import Data.Maybe (fromMaybe)
+import Data.Monoid (Monoid, mconcat, mempty, mappend)
+--import Network.URI (isURI) -- Not sure whether to use this function
+import Control.Monad (void, guard, when)
+import Data.Default
+import Control.Monad.Reader (Reader, runReader, asks)
+
+import Data.Time.LocalTime (getZonedTime)
+import Text.Pandoc.Compat.Directory(getModificationTime)
+import Data.Time.Format (formatTime)
+import System.Locale (defaultTimeLocale)
+import System.IO.Error (catchIOError)
+
+type T2T = ParserT String ParserState (Reader T2TMeta)
+
+-- | An object for the T2T macros meta information
+-- the contents of each field is simply substituted verbatim into the file
+data T2TMeta = T2TMeta {
+ date :: String -- ^ Current date
+ , mtime :: String -- ^ Last modification time of infile
+ , infile :: FilePath -- ^ Input file
+ , outfile :: FilePath -- ^ Output file
+ } deriving Show
+
+instance Default T2TMeta where
+ def = T2TMeta "" "" "" ""
+
+-- | Get the meta information required by Txt2Tags macros
+getT2TMeta :: [FilePath] -> FilePath -> IO T2TMeta
+getT2TMeta inps out = do
+ curDate <- formatTime defaultTimeLocale "%F" <$> getZonedTime
+ let getModTime = fmap (formatTime defaultTimeLocale "%F") .
+ getModificationTime
+ curMtime <- catchIOError
+ (maximum <$> mapM getModTime inps)
+ (const (return ""))
+ return $ T2TMeta curDate curMtime (intercalate ", " inps) out
+
+-- | Read Txt2Tags from an input string returning a Pandoc document
+readTxt2Tags :: T2TMeta -> ReaderOptions -> String -> Pandoc
+readTxt2Tags t opts s = flip runReader t $ readWithM parseT2T (def {stateOptions = opts}) (s ++ "\n\n")
+
+-- | Read Txt2Tags (ignoring all macros) from an input string returning
+-- a Pandoc document
+readTxt2TagsNoMacros :: ReaderOptions -> String -> Pandoc
+readTxt2TagsNoMacros = readTxt2Tags def
+
+parseT2T :: T2T Pandoc
+parseT2T = do
+ _ <- (Nothing <$ try blankline) <|> (Just <$> (count 3 anyLine))
+ config <- manyTill setting (notFollowedBy setting)
+ -- TODO: Handle settings better
+ let settings = foldr (\(k,v) -> B.setMeta k (MetaString v)) nullMeta config
+ updateState (\s -> s {stateMeta = settings})
+ body <- mconcat <$> manyTill block eof
+ return $ Pandoc mempty (B.toList body)
+
+type Keyword = String
+type Value = String
+
+setting :: T2T (Keyword, Value)
+setting = do
+ string "%!"
+ keyword <- ignoreSpacesCap (many1 alphaNum)
+ char ':'
+ value <- ignoreSpacesCap (manyTill anyChar (newline))
+ return (keyword, value)
+
+-- Blocks
+
+parseBlocks :: T2T Blocks
+parseBlocks = mconcat <$> manyTill block eof
+
+block :: T2T Blocks
+block = do
+ choice
+ [ mempty <$ blanklines
+ , quote
+ , hrule -- hrule must go above title
+ , title
+ , commentBlock
+ , verbatim
+ , rawBlock
+ , taggedBlock
+ , list
+ , table
+ , para
+ ]
+
+title :: T2T Blocks
+title = try $ balancedTitle '+' <|> balancedTitle '='
+
+balancedTitle :: Char -> T2T Blocks
+balancedTitle c = try $ do
+ spaces
+ level <- length <$> many1 (char c)
+ guard (level <= 5) -- Max header level 5
+ heading <- manyTill (noneOf "\n\r") (count level (char c))
+ label <- optionMaybe (enclosed (char '[') (char ']') (alphaNum <|> oneOf "_-"))
+ many spaceChar *> newline
+ let attr = maybe nullAttr (\x -> (x, [], [])) label
+ return $ B.headerWith attr level (trimInlines $ B.text heading)
+
+para :: T2T Blocks
+para = try $ do
+ ils <- parseInlines
+ nl <- option False (True <$ newline)
+ option (B.plain ils) (guard nl >> notFollowedBy listStart >> return (B.para ils))
+ where
+ listStart = try bulletListStart <|> orderedListStart
+
+commentBlock :: T2T Blocks
+commentBlock = try (blockMarkupArea (anyLine) (const mempty) "%%%") <|> comment
+
+-- Seperator and Strong line treated the same
+hrule :: T2T Blocks
+hrule = try $ do
+ spaces
+ line <- many1 (oneOf "=-_")
+ guard (length line >= 20)
+ B.horizontalRule <$ blankline
+
+quote :: T2T Blocks
+quote = try $ do
+ lookAhead tab
+ rawQuote <- many1 (tab *> optional spaces *> anyLine)
+ contents <- parseFromString parseBlocks (intercalate "\n" rawQuote ++ "\n\n")
+ return $ B.blockQuote contents
+
+commentLine :: T2T Inlines
+commentLine = comment
+
+-- List Parsing code from Org Reader
+
+list :: T2T Blocks
+list = choice [bulletList, orderedList, definitionList]
+
+bulletList :: T2T Blocks
+bulletList = B.bulletList . compactify'
+ <$> many1 (listItem bulletListStart parseBlocks)
+
+orderedList :: T2T Blocks
+orderedList = B.orderedList . compactify'
+ <$> many1 (listItem orderedListStart parseBlocks)
+
+definitionList :: T2T Blocks
+definitionList = try $ do
+ B.definitionList . compactify'DL <$>
+ many1 (listItem definitionListStart definitionListEnd)
+
+definitionListEnd :: T2T (Inlines, [Blocks])
+definitionListEnd = (,) <$> (mconcat <$> manyTill inline newline) <*> ((:[]) <$> parseBlocks)
+
+genericListStart :: T2T Char
+ -> T2T Int
+genericListStart listMarker = try $
+ (2+) <$> (length <$> many spaceChar
+ <* listMarker <* space <* notFollowedBy space)
+
+-- parses bullet list \start and returns its length (excl. following whitespace)
+bulletListStart :: T2T Int
+bulletListStart = genericListStart (char '-')
+
+orderedListStart :: T2T Int
+orderedListStart = genericListStart (char '+' )
+
+definitionListStart :: T2T Int
+definitionListStart = genericListStart (char ':')
+
+-- parse raw text for one list item, excluding start marker and continuations
+listItem :: T2T Int
+ -> T2T a
+ -> T2T a
+listItem start end = try $ do
+ markerLength <- try start
+ firstLine <- anyLineNewline
+ blank <- option "" ("\n" <$ blankline)
+ rest <- concat <$> many (listContinuation markerLength)
+ parseFromString end $ firstLine ++ blank ++ rest
+
+-- continuation of a list item - indented and separated by blankline or endline.
+-- Note: nested lists are parsed as continuations.
+listContinuation :: Int
+ -> T2T String
+listContinuation markerLength = try $
+ notFollowedBy' (blankline >> blankline)
+ *> (mappend <$> (concat <$> many1 listLine)
+ <*> many blankline)
+ where listLine = try $ indentWith markerLength *> anyLineNewline
+
+anyLineNewline :: T2T String
+anyLineNewline = (++ "\n") <$> anyLine
+
+indentWith :: Int -> T2T String
+indentWith n = count n space
+
+-- Table
+
+table :: T2T Blocks
+table = try $ do
+ header <- fmap snd <$> option mempty (try headerRow)
+ rows <- many1 (many commentLine *> tableRow)
+ let columns = transpose rows
+ let ncolumns = length columns
+ let aligns = map (foldr1 findAlign) (map (map fst) columns)
+ let rows' = map (map snd) rows
+ let size = maximum (map length rows')
+ let rowsPadded = map (pad size) rows'
+ let headerPadded = if (not (null header)) then pad size header else mempty
+ return $ B.table mempty
+ (zip aligns (replicate ncolumns 0.0))
+ headerPadded rowsPadded
+
+pad :: (Show a, Monoid a) => Int -> [a] -> [a]
+pad n xs = xs ++ (replicate (n - length xs) mempty)
+
+
+findAlign :: Alignment -> Alignment -> Alignment
+findAlign x y
+ | x == y = x
+ | otherwise = AlignDefault
+
+headerRow :: T2T [(Alignment, Blocks)]
+headerRow = genericRow (string "||")
+
+tableRow :: T2T [(Alignment, Blocks)]
+tableRow = genericRow (char '|')
+
+genericRow :: T2T a -> T2T [(Alignment, Blocks)]
+genericRow start = try $ do
+ spaces *> start
+ manyTill tableCell newline <?> "genericRow"
+
+
+tableCell :: T2T (Alignment, Blocks)
+tableCell = try $ do
+ leftSpaces <- length <$> lookAhead (many1 space) -- Case of empty cell means we must lookAhead
+ content <- (manyTill inline (try $ lookAhead (cellEnd)))
+ rightSpaces <- length <$> many space
+ let align =
+ case compare leftSpaces rightSpaces of
+ LT -> AlignLeft
+ EQ -> AlignCenter
+ GT -> AlignRight
+ endOfCell
+ return $ (align, B.plain (B.trimInlines $ mconcat content))
+ where
+ cellEnd = (void newline <|> (many1 space *> endOfCell))
+
+endOfCell :: T2T ()
+endOfCell = try (skipMany1 $ char '|') <|> ( () <$ lookAhead newline)
+
+-- Raw area
+
+verbatim :: T2T Blocks
+verbatim = genericBlock anyLineNewline B.codeBlock "```"
+
+rawBlock :: T2T Blocks
+rawBlock = genericBlock anyLineNewline (B.para . B.str) "\"\"\""
+
+taggedBlock :: T2T Blocks
+taggedBlock = do
+ target <- getTarget
+ genericBlock anyLineNewline (B.rawBlock target) "'''"
+
+-- Generic
+
+genericBlock :: Monoid a => T2T a -> (a -> Blocks) -> String -> T2T Blocks
+genericBlock p f s = blockMarkupArea p f s <|> blockMarkupLine p f s
+
+blockMarkupArea :: Monoid a => (T2T a) -> (a -> Blocks) -> String -> T2T Blocks
+blockMarkupArea p f s = try $ (do
+ string s *> blankline
+ f . mconcat <$> (manyTill p (eof <|> void (string s *> blankline))))
+
+blockMarkupLine :: T2T a -> (a -> Blocks) -> String -> T2T Blocks
+blockMarkupLine p f s = try (f <$> (string s *> space *> p))
+
+-- Can be in either block or inline position
+comment :: Monoid a => T2T a
+comment = try $ do
+ atStart
+ notFollowedBy macro
+ mempty <$ (char '%' *> anyLine)
+
+-- Inline
+
+parseInlines :: T2T Inlines
+parseInlines = trimInlines . mconcat <$> many1 inline
+
+inline :: T2T Inlines
+inline = do
+ choice
+ [ endline
+ , macro
+ , commentLine
+ , whitespace
+ , url
+ , link
+ , image
+ , bold
+ , underline
+ , code
+ , raw
+ , tagged
+ , strike
+ , italic
+ , code
+ , str
+ , symbol
+ ]
+
+bold :: T2T Inlines
+bold = inlineMarkup inline B.strong '*' (B.str)
+
+underline :: T2T Inlines
+underline = inlineMarkup inline B.emph '_' (B.str)
+
+strike :: T2T Inlines
+strike = inlineMarkup inline B.strikeout '-' (B.str)
+
+italic :: T2T Inlines
+italic = inlineMarkup inline B.emph '/' (B.str)
+
+code :: T2T Inlines
+code = inlineMarkup ((:[]) <$> anyChar) B.code '`' id
+
+raw :: T2T Inlines
+raw = inlineMarkup ((:[]) <$> anyChar) B.text '"' id
+
+tagged :: T2T Inlines
+tagged = do
+ target <- getTarget
+ inlineMarkup ((:[]) <$> anyChar) (B.rawInline target) '\'' id
+
+-- Parser for markup indicated by a double character.
+-- Inline markup is greedy and glued
+-- Greedy meaning ***a*** = Bold [Str "*a*"]
+-- Glued meaning that markup must be tight to content
+-- Markup can't pass newlines
+inlineMarkup :: Monoid a
+ => (T2T a) -- Content parser
+ -> (a -> Inlines) -- Constructor
+ -> Char -- Fence
+ -> (String -> a) -- Special Case to handle ******
+ -> T2T Inlines
+inlineMarkup p f c special = try $ do
+ start <- many1 (char c)
+ let l = length start
+ guard (l >= 2)
+ when (l == 2) (void $ notFollowedBy space)
+ -- We must make sure that there is no space before the start of the
+ -- closing tags
+ body <- optionMaybe (try $ manyTill (noneOf "\n\r") $
+ (try $ lookAhead (noneOf " " >> string [c,c] )))
+ case body of
+ Just middle -> do
+ lastChar <- anyChar
+ end <- many1 (char c)
+ let parser inp = parseFromString (mconcat <$> many p) inp
+ let start' = special (drop 2 start)
+ body' <- parser (middle ++ [lastChar])
+ let end' = special (drop 2 end)
+ return $ f (start' <> body' <> end')
+ Nothing -> do -- Either bad or case such as *****
+ guard (l >= 5)
+ let body' = (replicate (l - 4) c)
+ return $ f (special body')
+
+link :: T2T Inlines
+link = try imageLink <|> titleLink
+
+-- Link with title
+titleLink :: T2T Inlines
+titleLink = try $ do
+ char '['
+ notFollowedBy space
+ tokens <- sepBy1 (many $ noneOf " ]") space
+ guard (length tokens >= 2)
+ char ']'
+ let link' = last tokens
+ guard (length link' > 0)
+ let tit = concat (intersperse " " (init tokens))
+ return $ B.link link' "" (B.text tit)
+
+-- Link with image
+imageLink :: T2T Inlines
+imageLink = try $ do
+ char '['
+ body <- image
+ many1 space
+ l <- manyTill (noneOf "\n\r ") (char ']')
+ return (B.link l "" body)
+
+macro :: T2T Inlines
+macro = try $ do
+ name <- string "%%" *> oneOfStringsCI (map fst commands)
+ optional (try $ enclosed (char '(') (char ')') anyChar)
+ lookAhead (spaceChar <|> oneOf specialChars <|> newline)
+ maybe (return mempty) (\f -> B.str <$> asks f) (lookup name commands)
+ where
+ commands = [ ("date", date), ("mtime", mtime)
+ , ("infile", infile), ("outfile", outfile)]
+
+-- raw URLs in text are automatically linked
+url :: T2T Inlines
+url = try $ do
+ (rawUrl, escapedUrl) <- (try uri <|> emailAddress)
+ return $ B.link rawUrl "" (B.str escapedUrl)
+
+uri :: T2T (String, String)
+uri = try $ do
+ address <- t2tURI
+ return (address, escapeURI address)
+
+-- The definition of a URI in the T2T source differs from the
+-- actual definition. This is a transcription of the definition in
+-- the source of v2.6
+--isT2TURI :: String -> Bool
+--isT2TURI (parse t2tURI "" -> Right _) = True
+--isT2TURI _ = False
+
+t2tURI :: T2T String
+t2tURI = do
+ start <- try ((++) <$> proto <*> urlLogin) <|> guess
+ domain <- many1 chars
+ sep <- many (char '/')
+ form' <- option mempty ((:) <$> char '?' <*> many1 form)
+ anchor' <- option mempty ((:) <$> char '#' <*> many anchor)
+ return (start ++ domain ++ sep ++ form' ++ anchor')
+ where
+ protos = ["http", "https", "ftp", "telnet", "gopher", "wais"]
+ proto = (++) <$> oneOfStrings protos <*> string "://"
+ guess = (++) <$> (((++) <$> stringAnyCase "www" <*> option mempty ((:[]) <$> oneOf "23"))
+ <|> stringAnyCase "ftp") <*> ((:[]) <$> char '.')
+ login = alphaNum <|> oneOf "_.-"
+ pass = many (noneOf " @")
+ chars = alphaNum <|> oneOf "%._/~:,=$@&+-"
+ anchor = alphaNum <|> oneOf "%._0"
+ form = chars <|> oneOf ";*"
+ urlLogin = option mempty $ try ((\x y z -> x ++ y ++ [z]) <$> many1 login <*> option mempty ((:) <$> char ':' <*> pass) <*> char '@')
+
+
+image :: T2T Inlines
+image = try $ do
+ -- List taken from txt2tags source
+ let extensions = [".jpg", ".jpeg", ".gif", ".png", ".eps", ".bmp"]
+ char '['
+ path <- manyTill (noneOf "\n\t\r ") (try $ lookAhead (oneOfStrings extensions))
+ ext <- oneOfStrings extensions
+ char ']'
+ return $ B.image (path ++ ext) "" mempty
+
+-- Characters used in markup
+specialChars :: String
+specialChars = "%*-_/|:+"
+
+tab :: T2T Char
+tab = char '\t'
+
+space :: T2T Char
+space = char ' '
+
+spaces :: T2T String
+spaces = many space
+
+endline :: T2T Inlines
+endline = try $ do
+ newline
+ notFollowedBy blankline
+ notFollowedBy hrule
+ notFollowedBy title
+ notFollowedBy verbatim
+ notFollowedBy rawBlock
+ notFollowedBy taggedBlock
+ notFollowedBy quote
+ notFollowedBy list
+ notFollowedBy table
+ return $ B.space
+
+str :: T2T Inlines
+str = try $ do
+ B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
+
+whitespace :: T2T Inlines
+whitespace = try $ B.space <$ spaceChar
+
+symbol :: T2T Inlines
+symbol = B.str . (:[]) <$> oneOf specialChars
+
+-- Utility
+
+getTarget :: T2T String
+getTarget = do
+ mv <- lookupMeta "target" . stateMeta <$> getState
+ let MetaString target = fromMaybe (MetaString "html") mv
+ return target
+
+atStart :: T2T ()
+atStart = (sourceColumn <$> getPosition) >>= guard . (== 1)
+
+ignoreSpacesCap :: T2T String -> T2T String
+ignoreSpacesCap p = map toLower <$> (spaces *> p <* spaces)
+
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
index 2a2f56281..1a4e037cf 100644
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -32,53 +32,54 @@ the HTML using data URIs.
-}
module Text.Pandoc.SelfContained ( makeSelfContained ) where
import Text.HTML.TagSoup
-import Network.URI (isURI, escapeURIString)
+import Network.URI (isURI, escapeURIString, URI(..), parseURI)
import Data.ByteString.Base64
import qualified Data.ByteString.Char8 as B
import Data.ByteString (ByteString)
-import System.FilePath (takeExtension, dropExtension, takeDirectory, (</>))
+import System.FilePath (takeExtension, takeDirectory, (</>))
import Data.Char (toLower, isAscii, isAlphaNum)
import Codec.Compression.GZip as Gzip
import qualified Data.ByteString.Lazy as L
-import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err)
+import Text.Pandoc.Shared (renderTags', err, fetchItem')
+import Text.Pandoc.MediaBag (MediaBag)
import Text.Pandoc.UTF8 (toString, fromString)
-import Text.Pandoc.MIME (getMimeType)
-import System.Directory (doesFileExist)
+import Text.Pandoc.Options (WriterOptions(..))
isOk :: Char -> Bool
isOk c = isAscii c && isAlphaNum c
-convertTag :: Maybe FilePath -> Tag String -> IO (Tag String)
-convertTag userdata t@(TagOpen tagname as)
+convertTag :: MediaBag -> Maybe String -> Tag String -> IO (Tag String)
+convertTag media sourceURL t@(TagOpen tagname as)
| tagname `elem` ["img", "embed", "video", "input", "audio", "source"] = do
as' <- mapM processAttribute as
return $ TagOpen tagname as'
where processAttribute (x,y) =
if x == "src" || x == "href" || x == "poster"
then do
- (raw, mime) <- getRaw userdata (fromAttrib "type" t) y
+ (raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) y
let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw)
return (x, enc)
else return (x,y)
-convertTag userdata t@(TagOpen "script" as) =
+convertTag media sourceURL t@(TagOpen "script" as) =
case fromAttrib "src" t of
[] -> return t
src -> do
- (raw, mime) <- getRaw userdata (fromAttrib "type" t) src
+ (raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) src
let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw)
return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"])
-convertTag userdata t@(TagOpen "link" as) =
+convertTag media sourceURL t@(TagOpen "link" as) =
case fromAttrib "href" t of
[] -> return t
src -> do
- (raw, mime) <- getRaw userdata (fromAttrib "type" t) src
+ (raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) src
let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw)
return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"])
-convertTag _ t = return t
+convertTag _ _ t = return t
-- NOTE: This is really crude, it doesn't respect CSS comments.
-cssURLs :: Maybe FilePath -> FilePath -> ByteString -> IO ByteString
-cssURLs userdata d orig =
+cssURLs :: MediaBag -> Maybe String -> FilePath -> ByteString
+ -> IO ByteString
+cssURLs media sourceURL d orig =
case B.breakSubstring "url(" orig of
(x,y) | B.null y -> return orig
| otherwise -> do
@@ -91,33 +92,21 @@ cssURLs userdata d orig =
let url' = if isURI url
then url
else d </> url
- (raw, mime) <- getRaw userdata "" url'
- rest <- cssURLs userdata d v
+ (raw, mime) <- getRaw media sourceURL "" url'
+ rest <- cssURLs media sourceURL d v
let enc = "data:" `B.append` fromString mime `B.append`
";base64," `B.append` (encode raw)
return $ x `B.append` "url(" `B.append` enc `B.append` rest
-getItem :: Maybe FilePath -> String -> IO (ByteString, Maybe String)
-getItem userdata f =
- if isURI f
- then openURL f >>= either handleErr return
- else do
- -- strip off trailing query or fragment part, if relative URL.
- -- this is needed for things like cmunrm.eot?#iefix,
- -- which is used to get old versions of IE to work with web fonts.
- let f' = takeWhile (\c -> c /= '?' && c /= '#') f
- let mime = case takeExtension f' of
- ".gz" -> getMimeType $ dropExtension f'
- x -> getMimeType x
- exists <- doesFileExist f'
- cont <- if exists then B.readFile f' else readDataFile userdata f'
- return (cont, mime)
- where handleErr e = err 61 $ "Failed to retrieve " ++ f ++ "\n" ++ show e
-
-getRaw :: Maybe FilePath -> String -> String -> IO (ByteString, String)
-getRaw userdata mimetype src = do
+getRaw :: MediaBag -> Maybe String -> String -> String
+ -> IO (ByteString, String)
+getRaw media sourceURL mimetype src = do
let ext = map toLower $ takeExtension src
- (raw, respMime) <- getItem userdata src
+ fetchResult <- fetchItem' media sourceURL src
+ (raw, respMime) <- case fetchResult of
+ Left msg -> err 67 $ "Could not fetch " ++ src ++
+ "\n" ++ show msg
+ Right x -> return x
let raw' = if ext == ".gz"
then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks
$ [raw]
@@ -127,21 +116,22 @@ getRaw userdata mimetype src = do
$ "Could not determine mime type for `" ++ src ++ "'"
(x, Nothing) -> x
(_, Just x ) -> x
+ let cssSourceURL = case parseURI src of
+ Just u
+ | uriScheme u `elem` ["http:","https:"] ->
+ Just $ show u{ uriPath = "",
+ uriQuery = "",
+ uriFragment = "" }
+ _ -> Nothing
result <- if mime == "text/css"
- then cssURLs userdata (takeDirectory src) raw'
+ then cssURLs media cssSourceURL (takeDirectory src) raw'
else return raw'
return (result, mime)
-- | Convert HTML into self-contained HTML, incorporating images,
--- scripts, and CSS using data: URIs. Items specified using absolute
--- URLs will be downloaded; those specified using relative URLs will
--- be sought first relative to the working directory, then relative
--- to the user data directory (if the first parameter is 'Just'
--- a directory), and finally relative to pandoc's default data
--- directory.
-makeSelfContained :: Maybe FilePath -> String -> IO String
-makeSelfContained userdata inp = do
+-- scripts, and CSS using data: URIs.
+makeSelfContained :: WriterOptions -> String -> IO String
+makeSelfContained opts inp = do
let tags = parseTags inp
- out' <- mapM (convertTag userdata) tags
+ out' <- mapM (convertTag (writerMediaBag opts) (writerSourceURL opts)) tags
return $ renderTags' out'
-
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index bb13836f2..f0e5bbe5d 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -59,6 +59,7 @@ module Text.Pandoc.Shared (
normalizeBlocks,
removeFormatting,
stringify,
+ capitalize,
compactify,
compactify',
compactify'DL,
@@ -77,16 +78,20 @@ module Text.Pandoc.Shared (
readDataFile,
readDataFileUTF8,
fetchItem,
+ fetchItem',
openURL,
-- * Error handling
err,
warn,
-- * Safe read
- safeRead
+ safeRead,
+ -- * Temp directory
+ withTempDir
) where
import Text.Pandoc.Definition
import Text.Pandoc.Walk
+import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..))
import qualified Text.Pandoc.Builder as B
import qualified Text.Pandoc.UTF8 as UTF8
@@ -97,11 +102,11 @@ import Data.Char ( toLower, isLower, isUpper, isAlpha,
import Data.List ( find, isPrefixOf, intercalate )
import qualified Data.Map as M
import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo,
- unEscapeString, parseURIReference )
+ unEscapeString, parseURIReference, isAllowedInURI )
import qualified Data.Set as Set
import System.Directory
import Text.Pandoc.MIME (getMimeType)
-import System.FilePath ( (</>), takeExtension, dropExtension )
+import System.FilePath ( (</>), takeExtension, dropExtension)
import Data.Generics (Typeable, Data)
import qualified Control.Monad.State as S
import qualified Control.Exception as E
@@ -110,6 +115,7 @@ import Text.Pandoc.Pretty (charWidth)
import System.Locale (defaultTimeLocale)
import Data.Time
import System.IO (stderr)
+import System.IO.Temp
import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
renderOptions)
import qualified Data.ByteString as BS
@@ -117,6 +123,7 @@ import qualified Data.ByteString.Char8 as B8
import Text.Pandoc.Compat.Monoid
import Data.ByteString.Base64 (decodeLenient)
import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
+import qualified Data.Text as T (toUpper, pack, unpack)
#ifdef EMBED_DATA_FILES
import Text.Pandoc.Data (dataFiles)
@@ -522,6 +529,17 @@ stringify = query go . walk deNote
deNote (Note _) = Str ""
deNote x = x
+-- | Bring all regular text in a pandoc structure to uppercase.
+--
+-- This function correctly handles cases where a lowercase character doesn't
+-- match to a single uppercase character – e.g. “Straße” would be converted
+-- to “STRASSE”, not “STRAßE”.
+capitalize :: Walkable Inline a => a -> a
+capitalize = walk go
+ where go :: Inline -> Inline
+ go (Str s) = Str (T.unpack $ T.toUpper $ T.pack s)
+ go x = x
+
-- | Change final list item from @Para@ to @Plain@ if the list contains
-- no other @Para@ blocks.
compactify :: [[Block]] -- ^ List of list items (each a list of blocks)
@@ -553,20 +571,22 @@ compactify' items =
_ -> items
_ -> items
--- | Like @compactify'@, but akts on items of definition lists.
+-- | Like @compactify'@, but acts on items of definition lists.
compactify'DL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
compactify'DL items =
let defs = concatMap snd items
- defBlocks = reverse $ concatMap B.toList defs
- in case defBlocks of
- (Para x:_) -> if not $ any isPara (drop 1 defBlocks)
- then let (t,ds) = last items
- lastDef = B.toList $ last ds
- ds' = init ds ++
- [B.fromList $ init lastDef ++ [Plain x]]
- in init items ++ [(t, ds')]
- else items
- _ -> items
+ in case reverse (concatMap B.toList defs) of
+ (Para x:xs)
+ | not (any isPara xs) ->
+ let (t,ds) = last items
+ lastDef = B.toList $ last ds
+ ds' = init ds ++
+ if null lastDef
+ then [B.fromList lastDef]
+ else [B.fromList $ init lastDef ++ [Plain x]]
+ in init items ++ [(t, ds')]
+ | otherwise -> items
+ _ -> items
isPara :: Block -> Bool
isPara (Para _) = True
@@ -759,21 +779,31 @@ readDataFileUTF8 userDir fname =
-- Returns raw content and maybe mime type.
fetchItem :: Maybe String -> String
-> IO (Either E.SomeException (BS.ByteString, Maybe String))
-fetchItem sourceURL s
- | isURI s = openURL s
- | otherwise =
- case sourceURL >>= parseURIReference of
- Just u -> case parseURIReference s of
- Just s' -> openURL $ show $
- s' `nonStrictRelativeTo` u
- Nothing -> openURL $ show u ++ "/" ++ s
- Nothing -> E.try readLocalFile
+fetchItem sourceURL s =
+ case (sourceURL >>= parseURIReference . ensureEscaped, ensureEscaped s) of
+ (_, s') | isURI s' -> openURL s'
+ (Just u, s') -> -- try fetching from relative path at source
+ case parseURIReference s' of
+ Just u' -> openURL $ show $ u' `nonStrictRelativeTo` u
+ Nothing -> openURL s' -- will throw error
+ (Nothing, _) -> E.try readLocalFile -- get from local file system
where readLocalFile = do
- let mime = case takeExtension s of
- ".gz" -> getMimeType $ dropExtension s
- x -> getMimeType x
- cont <- BS.readFile $ unEscapeString s
+ cont <- BS.readFile fp
return (cont, mime)
+ dropFragmentAndQuery = takeWhile (\c -> c /= '?' && c /= '#')
+ fp = unEscapeString $ dropFragmentAndQuery s
+ mime = case takeExtension fp of
+ ".gz" -> getMimeType $ dropExtension fp
+ x -> getMimeType x
+ ensureEscaped = escapeURIString isAllowedInURI
+
+-- | Like 'fetchItem', but also looks for items in a 'MediaBag'.
+fetchItem' :: MediaBag -> Maybe String -> String
+ -> IO (Either E.SomeException (BS.ByteString, Maybe String))
+fetchItem' media sourceURL s = do
+ case lookupMedia s media of
+ Nothing -> fetchItem sourceURL s
+ Just (mime, bs) -> return $ Right (BS.concat $ toChunks bs, Just mime)
-- | Read from a URL and return raw data and maybe mime type.
openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe String))
@@ -833,3 +863,15 @@ safeRead s = case reads s of
(d,x):_
| all isSpace x -> return d
_ -> fail $ "Could not read `" ++ s ++ "'"
+
+--
+-- Temp directory
+--
+
+withTempDir :: String -> (FilePath -> IO a) -> IO a
+withTempDir =
+#ifdef _WINDOWS
+ withTempDirectory "."
+#else
+ withSystemTempDirectory
+#endif
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index de31e462e..6be6eb1d3 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -481,17 +481,30 @@ writeOpenXML opts (Pandoc meta blocks) = do
_ -> []
let auths = docAuthors meta
let dat = docDate meta
+ let abstract' = case lookupMeta "abstract" meta of
+ Just (MetaBlocks bs) -> bs
+ Just (MetaInlines ils) -> [Plain ils]
+ _ -> []
+ let subtitle' = case lookupMeta "subtitle" meta of
+ Just (MetaBlocks [Plain xs]) -> xs
+ Just (MetaBlocks [Para xs]) -> xs
+ Just (MetaInlines xs) -> xs
+ _ -> []
title <- withParaProp (pStyle "Title") $ blocksToOpenXML opts [Para tit | not (null tit)]
- authors <- withParaProp (pStyle "Authors") $ blocksToOpenXML opts
- [Para (intercalate [LineBreak] auths) | not (null auths)]
+ subtitle <- withParaProp (pStyle "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')]
+ authors <- withParaProp (pStyle "Author") $ blocksToOpenXML opts $
+ map Para auths
date <- withParaProp (pStyle "Date") $ blocksToOpenXML opts [Para dat | not (null dat)]
+ abstract <- if null abstract'
+ then return []
+ else withParaProp (pStyle "Abstract") $ blocksToOpenXML opts abstract'
let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs
convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs
convertSpace xs = xs
let blocks' = bottomUp convertSpace $ blocks
doc' <- blocksToOpenXML opts blocks'
notes' <- reverse `fmap` gets stFootnotes
- let meta' = title ++ authors ++ date
+ let meta' = title ++ subtitle ++ authors ++ date ++ abstract
return (meta' ++ doc', notes')
-- | Convert a list of Pandoc blocks to OpenXML.
@@ -817,7 +830,8 @@ inlineToOpenXML opts (Image alt (src, tit)) = do
case M.lookup src imgs of
Just (_,_,_,elt,_) -> return [elt]
Nothing -> do
- res <- liftIO $ fetchItem (writerSourceURL opts) src
+ res <- liftIO $
+ fetchItem' (writerMediaBag opts) (writerSourceURL opts) src
case res of
Left (_ :: E.SomeException) -> do
liftIO $ warn $ "Could not find image `" ++ src ++ "', skipping..."
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index ec206086a..34a6dcb2f 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -360,7 +360,8 @@ writeEPUB opts doc@(Pandoc meta _) = do
walkM (transformBlock opts' mediaRef)
pics <- readIORef mediaRef
let readPicEntry entries (oldsrc, newsrc) = do
- res <- fetchItem (writerSourceURL opts') oldsrc
+ res <- fetchItem' (writerMediaBag opts')
+ (writerSourceURL opts') oldsrc
case res of
Left _ -> do
warn $ "Could not find media `" ++ oldsrc ++ "', skipping..."
@@ -792,7 +793,7 @@ transformInline opts mediaRef (Image lab (src,tit)) = do
return $ Image lab (newsrc, tit)
transformInline opts _ (x@(Math _ _))
| WebTeX _ <- writerHTMLMathMethod opts = do
- raw <- makeSelfContained Nothing $ writeHtmlInline opts x
+ raw <- makeSelfContained opts $ writeHtmlInline opts x
return $ RawInline (Format "html") raw
transformInline opts mediaRef (RawInline fmt raw)
| fmt == Format "html" = do
diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs
index 803617f95..7a9bff4fe 100644
--- a/src/Text/Pandoc/Writers/FB2.hs
+++ b/src/Text/Pandoc/Writers/FB2.hs
@@ -28,7 +28,7 @@ module Text.Pandoc.Writers.FB2 (writeFB2) where
import Control.Monad.State (StateT, evalStateT, get, modify)
import Control.Monad.State (liftM, liftM2, liftIO)
import Data.ByteString.Base64 (encode)
-import Data.Char (toUpper, toLower, isSpace, isAscii, isControl)
+import Data.Char (toLower, isSpace, isAscii, isControl)
import Data.List (intersperse, intercalate, isPrefixOf)
import Data.Either (lefts, rights)
import Network.Browser (browse, request, setAllowRedirects, setOutHandler)
@@ -44,8 +44,7 @@ import qualified Text.XML.Light.Cursor as XC
import Text.Pandoc.Definition
import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..), def)
-import Text.Pandoc.Shared (orderedListMarkers, isHeaderBlock)
-import Text.Pandoc.Walk
+import Text.Pandoc.Shared (orderedListMarkers, isHeaderBlock, capitalize)
-- | Data to be written at the end of the document:
-- (foot)notes, URLs, references, images.
@@ -421,10 +420,6 @@ indent = indentBlock
indentLines ins = let lns = split isLineBreak ins :: [[Inline]]
in intercalate [LineBreak] $ map ((Str spacer):) lns
-capitalize :: Inline -> Inline
-capitalize (Str xs) = Str $ map toUpper xs
-capitalize x = x
-
-- | Convert a Pandoc's Inline element to FictionBook XML representation.
toXml :: Inline -> FBM [Content]
toXml (Str s) = return [txt s]
@@ -434,7 +429,7 @@ toXml (Strong ss) = list `liftM` wrap "strong" ss
toXml (Strikeout ss) = list `liftM` wrap "strikethrough" ss
toXml (Superscript ss) = list `liftM` wrap "sup" ss
toXml (Subscript ss) = list `liftM` wrap "sub" ss
-toXml (SmallCaps ss) = cMapM toXml $ walk capitalize ss
+toXml (SmallCaps ss) = cMapM toXml $ capitalize ss
toXml (Quoted SingleQuote ss) = do -- FIXME: should be language-specific
inner <- cMapM toXml ss
return $ [txt "‘"] ++ inner ++ [txt "’"]
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 0197d5db6..ea704c91d 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -742,7 +742,7 @@ inlineToLaTeX (Quoted qt lst) = do
else char '\x2018' <> inner <> char '\x2019'
inlineToLaTeX (Str str) = liftM text $ stringToLaTeX TextString str
inlineToLaTeX (Math InlineMath str) =
- return $ char '$' <> text str <> char '$'
+ return $ "\\(" <> text str <> "\\)"
inlineToLaTeX (Math DisplayMath str) =
return $ "\\[" <> text str <> "\\]"
inlineToLaTeX (RawInline f str)
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 37f148c0a..a859267cc 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -37,7 +37,7 @@ import Text.Pandoc.Templates (renderTemplate')
import Text.Pandoc.Shared
import Text.Pandoc.Writers.Shared
import Text.Pandoc.Options
-import Text.Pandoc.Parsing hiding (blankline, char, space)
+import Text.Pandoc.Parsing hiding (blankline, blanklines, char, space)
import Data.List ( group, isPrefixOf, find, intersperse, transpose, sortBy )
import Data.Char ( isSpace, isPunctuation )
import Data.Ord ( comparing )
@@ -77,27 +77,13 @@ writePlain :: WriterOptions -> Pandoc -> String
writePlain opts document =
evalState (pandocToMarkdown opts{
writerExtensions = Set.delete Ext_escaped_line_breaks $
+ Set.delete Ext_pipe_tables $
+ Set.delete Ext_raw_html $
+ Set.delete Ext_footnotes $
+ Set.delete Ext_tex_math_dollars $
+ Set.delete Ext_citations $
writerExtensions opts }
- document') def{ stPlain = True }
- where document' = plainify document
-
-plainify :: Pandoc -> Pandoc
-plainify = walk go
- where go :: Inline -> Inline
- go (Emph xs) = Span ("",[],[]) xs
- go (Strong xs) = Span ("",[],[]) xs
- go (Strikeout xs) = Span ("",[],[]) xs
- go (Superscript xs) = Span ("",[],[]) xs
- go (Subscript xs) = Span ("",[],[]) xs
- go (SmallCaps xs) = Span ("",[],[]) xs
- go (Span _ xs) = Span ("",[],[]) xs
- go (Code _ s) = Str s
- go (Math _ s) = Str s
- go (RawInline _ _) = Str ""
- go (Link xs _) = Span ("",[],[]) xs
- go (Image xs _) = Span ("",[],[]) $ [Str "["] ++ xs ++ [Str "]"]
- go (Cite _ cits) = Span ("",[],[]) cits
- go x = x
+ document) def{ stPlain = True }
pandocTitleBlock :: Doc -> [Doc] -> Doc -> Doc
pandocTitleBlock tit auths dat =
@@ -188,7 +174,7 @@ pandocToMarkdown opts (Pandoc meta blocks) = do
then tableOfContents opts headerBlocks
else empty
-- Strip off final 'references' header if markdown citations enabled
- let blocks' = if not isPlain && isEnabled Ext_citations opts
+ let blocks' = if isEnabled Ext_citations opts
then case reverse blocks of
(Div (_,["references"],_) _):xs -> reverse xs
_ -> blocks
@@ -309,9 +295,9 @@ blockToMarkdown :: WriterOptions -- ^ Options
-> State WriterState Doc
blockToMarkdown _ Null = return empty
blockToMarkdown opts (Div attrs ils) = do
- isPlain <- gets stPlain
+ plain <- gets stPlain
contents <- blockListToMarkdown opts ils
- return $ if isPlain || not (isEnabled Ext_markdown_in_html_blocks opts)
+ return $ if plain || not (isEnabled Ext_markdown_in_html_blocks opts)
then contents <> blankline
else tagWithAttrs "div" attrs <> blankline <>
contents <> blankline <> "</div>" <> blankline
@@ -338,21 +324,22 @@ blockToMarkdown opts (Para inlines) =
(<> blankline) `fmap` blockToMarkdown opts (Plain inlines)
blockToMarkdown opts (RawBlock f str)
| f == "html" = do
- st <- get
- if stPlain st
- then return empty
- else return $ if isEnabled Ext_markdown_attribute opts
+ plain <- gets stPlain
+ return $ if plain
+ then empty
+ else if isEnabled Ext_markdown_attribute opts
then text (addMarkdownAttribute str) <> text "\n"
else text str <> text "\n"
| f `elem` ["latex", "tex", "markdown"] = do
- st <- get
- if stPlain st
- then return empty
- else return $ text str <> text "\n"
+ plain <- gets stPlain
+ return $ if plain
+ then empty
+ else text str <> text "\n"
blockToMarkdown _ (RawBlock _ _) = return empty
-blockToMarkdown _ HorizontalRule =
- return $ blankline <> text "* * * * *" <> blankline
+blockToMarkdown opts HorizontalRule = do
+ return $ blankline <> text (replicate (writerColumns opts) '-') <> blankline
blockToMarkdown opts (Header level attr inlines) = do
+ plain <- gets stPlain
-- we calculate the id that would be used by auto_identifiers
-- so we know whether to print an explicit identifier
ids <- gets stIds
@@ -368,18 +355,19 @@ blockToMarkdown opts (Header level attr inlines) = do
space <> attrsToMarkdown attr
| otherwise -> empty
contents <- inlineListToMarkdown opts inlines
- st <- get
let setext = writerSetextHeaders opts
return $ nowrap
$ case level of
- 1 | setext ->
+ 1 | plain -> blanklines 3 <> contents <> blanklines 2
+ | setext ->
contents <> attr' <> cr <> text (replicate (offset contents) '=') <>
blankline
- 2 | setext ->
+ 2 | plain -> blanklines 2 <> contents <> blankline
+ | setext ->
contents <> attr' <> cr <> text (replicate (offset contents) '-') <>
blankline
-- ghc interprets '#' characters in column 1 as linenum specifiers.
- _ | stPlain st || isEnabled Ext_literate_haskell opts ->
+ _ | plain || isEnabled Ext_literate_haskell opts ->
contents <> blankline
_ -> text (replicate level '#') <> space <> contents <> attr' <> blankline
blockToMarkdown opts (CodeBlock (_,classes,_) str)
@@ -409,14 +397,12 @@ blockToMarkdown opts (CodeBlock attribs str) = return $
(_,(cls:_),_) -> " " <> text cls
_ -> empty
blockToMarkdown opts (BlockQuote blocks) = do
- st <- get
+ plain <- gets stPlain
-- if we're writing literate haskell, put a space before the bird tracks
-- so they won't be interpreted as lhs...
let leader = if isEnabled Ext_literate_haskell opts
then " > "
- else if stPlain st
- then " "
- else "> "
+ else if plain then " " else "> "
contents <- blockListToMarkdown opts blocks
return $ (prefixed leader contents) <> blankline
blockToMarkdown opts t@(Table caption aligns widths headers rows) = do
@@ -610,8 +596,19 @@ definitionListItemToMarkdown opts (label, defs) = do
let sps = case writerTabStop opts - 3 of
n | n > 0 -> text $ replicate n ' '
_ -> text " "
- let contents = vcat $ map (\d -> hang tabStop (leader <> sps) $ vcat d <> cr) defs'
- return $ nowrap labelText <> cr <> contents <> cr
+ if isEnabled Ext_compact_definition_lists opts
+ then do
+ let contents = vcat $ map (\d -> hang tabStop (leader <> sps)
+ $ vcat d <> cr) defs'
+ return $ nowrap labelText <> cr <> contents <> cr
+ else do
+ let contents = vcat $ map (\d -> hang tabStop (leader <> sps)
+ $ vcat d <> cr) defs'
+ let isTight = case defs of
+ ((Plain _ : _): _) -> True
+ _ -> False
+ return $ blankline <> nowrap labelText <>
+ (if isTight then cr else blankline) <> contents <> blankline
else do
return $ nowrap labelText <> text " " <> cr <>
vsep (map vsep defs') <> blankline
@@ -626,15 +623,21 @@ blockListToMarkdown opts blocks =
-- code block will be treated as a list continuation paragraph
where fixBlocks (b : CodeBlock attr x : rest)
| (not (isEnabled Ext_fenced_code_blocks opts) || attr == nullAttr)
- && isListBlock b =
- b : RawBlock "html" "<!-- -->\n" : CodeBlock attr x :
- fixBlocks rest
+ && isListBlock b = b : commentSep : CodeBlock attr x :
+ fixBlocks rest
+ fixBlocks (b1@(BulletList _) : b2@(BulletList _) : bs) =
+ b1 : commentSep : fixBlocks (b2:bs)
+ fixBlocks (b1@(OrderedList _ _) : b2@(OrderedList _ _) : bs) =
+ b1 : commentSep : fixBlocks (b2:bs)
+ fixBlocks (b1@(DefinitionList _) : b2@(DefinitionList _) : bs) =
+ b1 : commentSep : fixBlocks (b2:bs)
fixBlocks (x : xs) = x : fixBlocks xs
fixBlocks [] = []
isListBlock (BulletList _) = True
isListBlock (OrderedList _ _) = True
isListBlock (DefinitionList _) = True
isListBlock _ = False
+ commentSep = RawBlock "html" "<!-- -->\n"
-- | Get reference for target; if none exists, create unique one and return.
-- Prefer label if possible; otherwise, generate a unique key.
@@ -672,59 +675,69 @@ escapeSpaces x = x
-- | Convert Pandoc inline element to markdown.
inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc
inlineToMarkdown opts (Span attrs ils) = do
- st <- get
+ plain <- gets stPlain
contents <- inlineListToMarkdown opts ils
- return $ if stPlain st
+ return $ if plain
then contents
else tagWithAttrs "span" attrs <> contents <> text "</span>"
inlineToMarkdown opts (Emph lst) = do
+ plain <- gets stPlain
contents <- inlineListToMarkdown opts lst
- return $ "*" <> contents <> "*"
+ return $ if plain
+ then "_" <> contents <> "_"
+ else "*" <> contents <> "*"
inlineToMarkdown opts (Strong lst) = do
- contents <- inlineListToMarkdown opts lst
- return $ "**" <> contents <> "**"
+ plain <- gets stPlain
+ if plain
+ then inlineListToMarkdown opts $ capitalize lst
+ else do
+ contents <- inlineListToMarkdown opts lst
+ return $ "**" <> contents <> "**"
inlineToMarkdown opts (Strikeout lst) = do
contents <- inlineListToMarkdown opts lst
- return $ if isEnabled Ext_strikeout opts
+ plain <- gets stPlain
+ return $ if plain || isEnabled Ext_strikeout opts
then "~~" <> contents <> "~~"
else "<s>" <> contents <> "</s>"
inlineToMarkdown opts (Superscript lst) = do
- let lst' = walk escapeSpaces lst
- contents <- inlineListToMarkdown opts lst'
+ contents <- inlineListToMarkdown opts $ walk escapeSpaces lst
return $ if isEnabled Ext_superscript opts
then "^" <> contents <> "^"
else "<sup>" <> contents <> "</sup>"
inlineToMarkdown opts (Subscript lst) = do
- let lst' = walk escapeSpaces lst
- contents <- inlineListToMarkdown opts lst'
+ contents <- inlineListToMarkdown opts $ walk escapeSpaces lst
return $ if isEnabled Ext_subscript opts
then "~" <> contents <> "~"
else "<sub>" <> contents <> "</sub>"
inlineToMarkdown opts (SmallCaps lst) = do
- contents <- inlineListToMarkdown opts lst
- st <- get
- return $ if stPlain st
- then contents
- else tagWithAttrs "span"
- ("",[],[("style","font-variant:small-caps;")])
- <> contents <> text "</span>"
+ plain <- gets stPlain
+ if plain
+ then inlineListToMarkdown opts $ capitalize lst
+ else do
+ contents <- inlineListToMarkdown opts lst
+ return $ tagWithAttrs "span"
+ ("",[],[("style","font-variant:small-caps;")])
+ <> contents <> text "</span>"
inlineToMarkdown opts (Quoted SingleQuote lst) = do
contents <- inlineListToMarkdown opts lst
return $ "‘" <> contents <> "’"
inlineToMarkdown opts (Quoted DoubleQuote lst) = do
contents <- inlineListToMarkdown opts lst
return $ "“" <> contents <> "”"
-inlineToMarkdown opts (Code attr str) =
+inlineToMarkdown opts (Code attr str) = do
let tickGroups = filter (\s -> '`' `elem` s) $ group str
- longest = if null tickGroups
+ let longest = if null tickGroups
then 0
else maximum $ map length tickGroups
- marker = replicate (longest + 1) '`'
- spacer = if (longest == 0) then "" else " "
- attrs = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr
+ let marker = replicate (longest + 1) '`'
+ let spacer = if (longest == 0) then "" else " "
+ let attrs = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr
then attrsToMarkdown attr
else empty
- in return $ text (marker ++ spacer ++ str ++ spacer ++ marker) <> attrs
+ plain <- gets stPlain
+ if plain
+ then return $ text str
+ else return $ text (marker ++ spacer ++ str ++ spacer ++ marker) <> attrs
inlineToMarkdown _ (Str str) = do
st <- get
if stPlain st
@@ -737,7 +750,11 @@ inlineToMarkdown opts (Math InlineMath str)
return $ "\\(" <> text str <> "\\)"
| isEnabled Ext_tex_math_double_backslash opts =
return $ "\\\\(" <> text str <> "\\\\)"
- | otherwise = inlineListToMarkdown opts $ texMathToInlines InlineMath str
+ | otherwise = do
+ plain <- gets stPlain
+ inlineListToMarkdown opts $
+ (if plain then makeMathPlainer else id) $
+ texMathToInlines InlineMath str
inlineToMarkdown opts (Math DisplayMath str)
| isEnabled Ext_tex_math_dollars opts =
return $ "$$" <> text str <> "$$"
@@ -747,15 +764,20 @@ inlineToMarkdown opts (Math DisplayMath str)
return $ "\\\\[" <> text str <> "\\\\]"
| otherwise = (\x -> cr <> x <> cr) `fmap`
inlineListToMarkdown opts (texMathToInlines DisplayMath str)
-inlineToMarkdown opts (RawInline f str)
- | f == "html" || f == "markdown" ||
- (isEnabled Ext_raw_tex opts && (f == "latex" || f == "tex")) =
- return $ text str
-inlineToMarkdown _ (RawInline _ _) = return empty
-inlineToMarkdown opts (LineBreak)
- | isEnabled Ext_hard_line_breaks opts = return cr
- | isEnabled Ext_escaped_line_breaks opts = return $ "\\" <> cr
- | otherwise = return $ " " <> cr
+inlineToMarkdown opts (RawInline f str) = do
+ plain <- gets stPlain
+ if not plain && f == "html" || f == "markdown" ||
+ (isEnabled Ext_raw_tex opts && (f == "latex" || f == "tex"))
+ then return $ text str
+ else return empty
+inlineToMarkdown opts (LineBreak) = do
+ plain <- gets stPlain
+ if plain || isEnabled Ext_hard_line_breaks opts
+ then return cr
+ else return $
+ if isEnabled Ext_escaped_line_breaks opts
+ then "\\" <> cr
+ else " " <> cr
inlineToMarkdown _ Space = return space
inlineToMarkdown opts (Cite [] lst) = inlineListToMarkdown opts lst
inlineToMarkdown opts (Cite (c:cs) lst)
@@ -788,6 +810,7 @@ inlineToMarkdown opts (Cite (c:cs) lst)
modekey SuppressAuthor = "-"
modekey _ = ""
inlineToMarkdown opts (Link txt (src, tit)) = do
+ plain <- gets stPlain
linktext <- inlineListToMarkdown opts txt
let linktitle = if null tit
then empty
@@ -801,22 +824,29 @@ inlineToMarkdown opts (Link txt (src, tit)) = do
ref <- if useRefLinks then getReference txt (src, tit) else return []
reftext <- inlineListToMarkdown opts ref
return $ if useAuto
- then "<" <> text srcSuffix <> ">"
+ then if plain
+ then text srcSuffix
+ else "<" <> text srcSuffix <> ">"
else if useRefLinks
then let first = "[" <> linktext <> "]"
second = if txt == ref
then "[]"
else "[" <> reftext <> "]"
in first <> second
- else "[" <> linktext <> "](" <>
- text src <> linktitle <> ")"
+ else if plain
+ then linktext
+ else "[" <> linktext <> "](" <>
+ text src <> linktitle <> ")"
inlineToMarkdown opts (Image alternate (source, tit)) = do
+ plain <- gets stPlain
let txt = if null alternate || alternate == [Str source]
-- to prevent autolinks
then [Str ""]
else alternate
linkPart <- inlineToMarkdown opts (Link txt (source, tit))
- return $ "!" <> linkPart
+ return $ if plain
+ then "[" <> linkPart <> "]"
+ else "!" <> linkPart
inlineToMarkdown opts (Note contents) = do
modify (\st -> st{ stNotes = contents : stNotes st })
st <- get
@@ -824,3 +854,9 @@ inlineToMarkdown opts (Note contents) = do
if isEnabled Ext_footnotes opts
then return $ "[^" <> ref <> "]"
else return $ "[" <> ref <> "]"
+
+makeMathPlainer :: [Inline] -> [Inline]
+makeMathPlainer = walk go
+ where
+ go (Emph xs) = Span nullAttr xs
+ go x = x
diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs
index cab55be9b..3f392a5d0 100644
--- a/src/Text/Pandoc/Writers/MediaWiki.hs
+++ b/src/Text/Pandoc/Writers/MediaWiki.hs
@@ -139,10 +139,14 @@ blockToMediaWiki (CodeBlock (_,classes,_) str) = do
"python", "qbasic", "rails", "reg", "robots", "ruby", "sas", "scheme", "sdlbasic",
"smalltalk", "smarty", "sql", "tcl", "", "thinbasic", "tsql", "vb", "vbnet", "vhdl",
"visualfoxpro", "winbatch", "xml", "xpp", "z80"]
- let (beg, end) = if null at
- then ("<pre" ++ if null classes then ">" else " class=\"" ++ unwords classes ++ "\">", "</pre>")
- else ("<source lang=\"" ++ head at ++ "\">", "</source>")
- return $ beg ++ escapeString str ++ end
+ return $
+ if null at
+ then "<pre" ++ (if null classes
+ then ">"
+ else " class=\"" ++ unwords classes ++ "\">") ++
+ escapeString str ++ "</pre>"
+ else "<source lang=\"" ++ head at ++ "\">" ++ str ++ "</source>"
+ -- note: no escape!
blockToMediaWiki (BlockQuote blocks) = do
contents <- blockListToMediaWiki blocks
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index 15f7c8be8..02794f76d 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -38,7 +38,7 @@ import qualified Data.ByteString.Lazy as B
import Text.Pandoc.UTF8 ( fromStringLazy )
import Codec.Archive.Zip
import Text.Pandoc.Options ( WriterOptions(..) )
-import Text.Pandoc.Shared ( stringify, readDataFile, fetchItem, warn )
+import Text.Pandoc.Shared ( stringify, readDataFile, fetchItem', warn )
import Text.Pandoc.ImageSize ( imageSize, sizeInPoints )
import Text.Pandoc.MIME ( getMimeType )
import Text.Pandoc.Definition
@@ -131,7 +131,7 @@ writeODT opts doc@(Pandoc meta _) = do
transformPicMath :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline
transformPicMath opts entriesRef (Image lab (src,_)) = do
- res <- fetchItem (writerSourceURL opts) src
+ res <- fetchItem' (writerMediaBag opts) (writerSourceURL opts) src
case res of
Left (_ :: E.SomeException) -> do
warn $ "Could not find image `" ++ src ++ "', skipping..."
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
index fe241b8d7..43405ce3c 100644
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -36,42 +36,48 @@ import Text.Pandoc.Readers.TeXMath
import Text.Pandoc.Templates (renderTemplate')
import Text.Pandoc.Walk
import Data.List ( isSuffixOf, intercalate )
-import Data.Char ( ord, chr, isDigit, toLower )
-import System.FilePath ( takeExtension )
+import Data.Char ( ord, chr, isDigit )
import qualified Data.ByteString as B
import qualified Data.Map as M
import Text.Printf ( printf )
-import Network.URI ( isURI, unEscapeString )
-import qualified Control.Exception as E
+import Text.Pandoc.ImageSize
--- | Convert Image inlines into a raw RTF embedded image, read from a file.
+-- | Convert Image inlines into a raw RTF embedded image, read from a file,
+-- or a MediaBag, or the internet.
-- If file not found or filetype not jpeg or png, leave the inline unchanged.
-rtfEmbedImage :: Inline -> IO Inline
-rtfEmbedImage x@(Image _ (src,_)) = do
- let ext = map toLower (takeExtension src)
- if ext `elem` [".jpg",".jpeg",".png"] && not (isURI src)
- then do
- let src' = unEscapeString src
- imgdata <- E.catch (B.readFile src')
- (\e -> let _ = (e :: E.SomeException) in return B.empty)
- let bytes = map (printf "%02x") $ B.unpack imgdata
- let filetype = case ext of
- ".jpg" -> "\\jpegblip"
- ".jpeg" -> "\\jpegblip"
- ".png" -> "\\pngblip"
- _ -> error "Unknown file type"
- let raw = "{\\pict" ++ filetype ++ " " ++ concat bytes ++ "}"
- return $ if B.null imgdata
- then x
- else RawInline (Format "rtf") raw
- else return x
-rtfEmbedImage x = return x
+rtfEmbedImage :: WriterOptions -> Inline -> IO Inline
+rtfEmbedImage opts x@(Image _ (src,_)) = do
+ result <- fetchItem' (writerMediaBag opts) (writerSourceURL opts) src
+ case result of
+ Right (imgdata, Just mime)
+ | mime == "image/jpeg" || mime == "image/png" -> do
+ let bytes = map (printf "%02x") $ B.unpack imgdata
+ let filetype = case mime of
+ "image/jpeg" -> "\\jpegblip"
+ "image/png" -> "\\pngblip"
+ _ -> error "Unknown file type"
+ let sizeSpec = case imageSize imgdata of
+ Nothing -> ""
+ Just sz -> "\\picw" ++ show xpx ++
+ "\\pich" ++ show ypx ++
+ "\\picwgoal" ++ show (xpt * 20)
+ ++ "\\pichgoal" ++ show (ypt * 20)
+ -- twip = 1/1440in = 1/20pt
+ where (xpx, ypx) = sizeInPixels sz
+ (xpt, ypt) = sizeInPoints sz
+ let raw = "{\\pict" ++ filetype ++ sizeSpec ++ " " ++
+ concat bytes ++ "}"
+ return $ if B.null imgdata
+ then x
+ else RawInline (Format "rtf") raw
+ _ -> return x
+rtfEmbedImage _ x = return x
-- | Convert Pandoc to a string in rich text format, with
-- images embedded as encoded binary data.
writeRTFWithEmbeddedImages :: WriterOptions -> Pandoc -> IO String
writeRTFWithEmbeddedImages options doc =
- writeRTF options `fmap` walkM rtfEmbedImage doc
+ writeRTF options `fmap` walkM (rtfEmbedImage options) doc
-- | Convert Pandoc to a string in rich text format.
writeRTF :: WriterOptions -> Pandoc -> String