diff options
Diffstat (limited to 'src/Text')
-rw-r--r-- | src/Text/Pandoc.hs | 10 | ||||
-rw-r--r-- | src/Text/Pandoc/Compat/Directory.hs | 21 | ||||
-rw-r--r-- | src/Text/Pandoc/MediaBag.hs | 107 | ||||
-rw-r--r-- | src/Text/Pandoc/Options.hs | 8 | ||||
-rw-r--r-- | src/Text/Pandoc/PDF.hs | 23 | ||||
-rw-r--r-- | src/Text/Pandoc/Parsing.hs | 97 | ||||
-rw-r--r-- | src/Text/Pandoc/Pretty.hs | 32 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx.hs | 126 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse.hs | 10 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 79 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Txt2Tags.hs | 548 | ||||
-rw-r--r-- | src/Text/Pandoc/SelfContained.hs | 86 | ||||
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 96 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docx.hs | 22 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/EPUB.hs | 5 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/FB2.hs | 11 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/LaTeX.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Markdown.hs | 204 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/MediaWiki.hs | 12 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/ODT.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/RTF.hs | 58 |
21 files changed, 1220 insertions, 341 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index b303fa7d7..77eb3e82f 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -63,6 +63,7 @@ module Text.Pandoc , writers -- * Readers: converting /to/ Pandoc format , Reader (..) + , mkStringReader , readDocx , readMarkdown , readMediaWiki @@ -76,6 +77,8 @@ module Text.Pandoc , readHaddock , readNative , readJSON + , readTxt2Tags + , readTxt2TagsNoMacros -- * Writers: converting /from/ Pandoc format , Writer (..) , writeNative @@ -130,6 +133,7 @@ import Text.Pandoc.Readers.Textile import Text.Pandoc.Readers.Native import Text.Pandoc.Readers.Haddock import Text.Pandoc.Readers.Docx +import Text.Pandoc.Readers.Txt2Tags import Text.Pandoc.Writers.Native import Text.Pandoc.Writers.Markdown import Text.Pandoc.Writers.RST @@ -157,6 +161,7 @@ import Text.Pandoc.Writers.Custom import Text.Pandoc.Templates import Text.Pandoc.Options import Text.Pandoc.Shared (safeRead, warn) +import Text.Pandoc.MediaBag (MediaBag) import Data.Aeson import qualified Data.ByteString.Lazy as BL import Data.List (intercalate) @@ -200,12 +205,12 @@ markdown o s = do return doc data Reader = StringReader (ReaderOptions -> String -> IO Pandoc) - | ByteStringReader (ReaderOptions -> BL.ByteString -> IO Pandoc) + | ByteStringReader (ReaderOptions -> BL.ByteString -> IO (Pandoc, MediaBag)) mkStringReader :: (ReaderOptions -> String -> Pandoc) -> Reader mkStringReader r = StringReader (\o s -> return $ r o s) -mkBSReader :: (ReaderOptions -> BL.ByteString -> Pandoc) -> Reader +mkBSReader :: (ReaderOptions -> BL.ByteString -> (Pandoc, MediaBag)) -> Reader mkBSReader r = ByteStringReader (\o s -> return $ r o s) -- | Association list of formats and readers. @@ -227,6 +232,7 @@ readers = [ ("native" , StringReader $ \_ s -> return $ readNative s) ,("latex" , mkStringReader readLaTeX) ,("haddock" , mkStringReader readHaddock) ,("docx" , mkBSReader readDocx) + ,("t2t" , mkStringReader readTxt2TagsNoMacros) ] data Writer = PureStringWriter (WriterOptions -> Pandoc -> String) diff --git a/src/Text/Pandoc/Compat/Directory.hs b/src/Text/Pandoc/Compat/Directory.hs new file mode 100644 index 000000000..61dd5c525 --- /dev/null +++ b/src/Text/Pandoc/Compat/Directory.hs @@ -0,0 +1,21 @@ +{-# LANGUAGE CPP #-} +module Text.Pandoc.Compat.Directory ( getModificationTime ) + where + +#if MIN_VERSION_directory(1,2,0) +import System.Directory + + +#else +import qualified System.Directory as S +import Data.Time.Clock (UTCTime) +import Data.Time.Clock.POSIX +import System.Time + +getModificationTime :: FilePath -> IO UTCTime +getModificationTime fp = convert `fmap` S.getModificationTime fp + where + convert (TOD x _) = posixSecondsToUTCTime (realToFrac x) + +#endif + diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs new file mode 100644 index 000000000..667089f55 --- /dev/null +++ b/src/Text/Pandoc/MediaBag.hs @@ -0,0 +1,107 @@ +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{- +Copyright (C) 2014 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.MediaBag + Copyright : Copyright (C) 2014 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Definition of a MediaBag object to hold binary resources, and an +interface for interacting with it. +-} +module Text.Pandoc.MediaBag ( + MediaBag, + lookupMedia, + insertMedia, + mediaDirectory, + extractMediaBag + ) where +import System.FilePath +import System.Directory (createDirectoryIfMissing) +import qualified Data.Map as M +import qualified Data.ByteString.Lazy as BL +import Data.Monoid (Monoid) +import Control.Monad (when, MonadPlus(..)) +import Text.Pandoc.MIME (getMimeType) +import qualified Text.Pandoc.UTF8 as UTF8 +import Data.Maybe (fromMaybe) +import System.IO (stderr) + +-- | A container for a collection of binary resources, with names and +-- mime types. Note that a 'MediaBag' is a Monoid, so 'mempty' +-- can be used for an empty 'MediaBag', and '<>' can be used to append +-- two 'MediaBag's. +newtype MediaBag = MediaBag (M.Map String (String, BL.ByteString)) + deriving (Monoid) + +instance Show MediaBag where + show bag = "MediaBag " ++ show (mediaDirectory bag) + +-- | Insert a media item into a 'MediaBag', replacing any existing +-- value with the same name. +insertMedia :: FilePath -- ^ relative path and canonical name of resource + -> Maybe String -- ^ mime type (Nothing = determine from extension) + -> BL.ByteString -- ^ contents of resource + -> MediaBag + -> MediaBag +insertMedia fp mbMime contents (MediaBag mediamap) = + MediaBag (M.insert fp (mime, contents) mediamap) + where mime = fromMaybe "application/octet-stream" (mbMime `mplus` fallback) + fallback = case takeExtension fp of + ".gz" -> getMimeType $ dropExtension fp + _ -> getMimeType fp + +-- | Lookup a media item in a 'MediaBag', returning mime type and contents. +lookupMedia :: FilePath + -> MediaBag + -> Maybe (String, BL.ByteString) +lookupMedia fp (MediaBag mediamap) = M.lookup fp mediamap + +-- | Get a list of the file paths stored in a 'MediaBag', with +-- their corresponding mime types and the lengths in bytes of the contents. +mediaDirectory :: MediaBag -> [(String, String, Int)] +mediaDirectory (MediaBag mediamap) = + M.foldWithKey (\fp (mime,contents) -> + ((fp, mime, fromIntegral $ BL.length contents):)) [] mediamap + +-- | Extract contents of MediaBag to a given directory. Print informational +-- messages if 'verbose' is true. +extractMediaBag :: Bool + -> FilePath + -> MediaBag + -> IO () +extractMediaBag verbose dir (MediaBag mediamap) = do + sequence_ $ M.foldWithKey + (\fp (_ ,contents) -> + ((writeMedia verbose dir (fp, contents)):)) [] mediamap + +writeMedia :: Bool -> FilePath -> (FilePath, BL.ByteString) -> IO () +writeMedia verbose dir (subpath, bs) = do + -- we join and split to convert a/b/c to a\b\c on Windows; + -- in zip containers all paths use / + let fullpath = dir </> joinPath (splitPath subpath) + createDirectoryIfMissing True $ takeDirectory fullpath + when verbose $ UTF8.hPutStrLn stderr $ "pandoc: extracting " ++ fullpath + BL.writeFile fullpath bs + + diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index ac791ac74..85a6a3096 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -49,6 +49,8 @@ import Data.Set (Set) import qualified Data.Set as Set import Data.Default import Text.Pandoc.Highlighting (Style, pygments) +import Text.Pandoc.MediaBag (MediaBag) +import Data.Monoid -- | Individually selectable syntax extensions. data Extension = @@ -200,7 +202,6 @@ strictExtensions = Set.fromList data ReaderOptions = ReaderOptions{ readerExtensions :: Set Extension -- ^ Syntax extensions , readerSmart :: Bool -- ^ Smart punctuation - , readerStrict :: Bool -- ^ FOR TRANSITION ONLY , readerStandalone :: Bool -- ^ Standalone document with header , readerParseRaw :: Bool -- ^ Parse raw HTML, LaTeX , readerColumns :: Int -- ^ Number of columns in terminal @@ -220,7 +221,6 @@ instance Default ReaderOptions where def = ReaderOptions{ readerExtensions = pandocExtensions , readerSmart = False - , readerStrict = False , readerStandalone = False , readerParseRaw = False , readerColumns = 80 @@ -315,7 +315,8 @@ data WriterOptions = WriterOptions , writerEpubChapterLevel :: Int -- ^ Header level for chapters (separate files) , writerTOCDepth :: Int -- ^ Number of levels to include in TOC , writerReferenceODT :: Maybe FilePath -- ^ Path to reference ODT if specified - , writerReferenceDocx :: Maybe FilePath -- ^ Ptah to reference DOCX if specified + , writerReferenceDocx :: Maybe FilePath -- ^ Path to reference DOCX if specified + , writerMediaBag :: MediaBag -- ^ Media collected by docx or epub reader } deriving Show instance Default WriterOptions where @@ -358,6 +359,7 @@ instance Default WriterOptions where , writerTOCDepth = 3 , writerReferenceODT = Nothing , writerReferenceDocx = Nothing + , writerMediaBag = mempty } -- | Returns True if the given extension is enabled. diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index bd55c565f..35554637a 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -30,7 +30,6 @@ Conversion of LaTeX documents to PDF. -} module Text.Pandoc.PDF ( makePDF ) where -import System.IO.Temp import Data.ByteString.Lazy (ByteString) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as BC @@ -46,7 +45,7 @@ import Data.Maybe (fromMaybe) import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Definition import Text.Pandoc.Walk (walkM) -import Text.Pandoc.Shared (fetchItem, warn) +import Text.Pandoc.Shared (fetchItem', warn, withTempDir) import Text.Pandoc.Options (WriterOptions(..)) import Text.Pandoc.MIME (extensionFromMimeType) import Text.Pandoc.Process (pipeProcess) @@ -55,14 +54,6 @@ import qualified Data.ByteString.Lazy as BL import Data.List (intercalate) #endif -withTempDir :: String -> (FilePath -> IO a) -> IO a -withTempDir = -#ifdef _WINDOWS - withTempDirectory "." -#else - withSystemTempDirectory -#endif - #ifdef _WINDOWS changePathSeparators :: FilePath -> FilePath changePathSeparators = intercalate "/" . splitDirectories @@ -74,26 +65,26 @@ makePDF :: String -- ^ pdf creator (pdflatex, lualatex, xelatex) -> Pandoc -- ^ document -> IO (Either ByteString ByteString) makePDF program writer opts doc = withTempDir "tex2pdf." $ \tmpdir -> do - doc' <- handleImages (writerSourceURL opts) tmpdir doc + doc' <- handleImages opts tmpdir doc let source = writer opts doc' tex2pdf' tmpdir program source -handleImages :: Maybe String -- ^ source base URL +handleImages :: WriterOptions -> FilePath -- ^ temp dir to store images -> Pandoc -- ^ document -> IO Pandoc -handleImages baseURL tmpdir = walkM (handleImage' baseURL tmpdir) +handleImages opts tmpdir = walkM (handleImage' opts tmpdir) -handleImage' :: Maybe String +handleImage' :: WriterOptions -> FilePath -> Inline -> IO Inline -handleImage' baseURL tmpdir (Image ils (src,tit)) = do +handleImage' opts tmpdir (Image ils (src,tit)) = do exists <- doesFileExist src if exists then return $ Image ils (src,tit) else do - res <- fetchItem baseURL src + res <- fetchItem' (writerMediaBag opts) (writerSourceURL opts) src case res of Right (contents, Just mime) -> do let ext = fromMaybe (takeExtension src) $ diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index eec4a3bc9..b25fca100 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -2,6 +2,7 @@ FlexibleContexts , GeneralizedNewtypeDeriving , TypeSynonymInstances +, MultiParamTypeClasses , FlexibleInstances #-} {- Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> @@ -80,6 +81,7 @@ module Text.Pandoc.Parsing ( anyLine, HeaderType (..), ParserContext (..), QuoteContext (..), + HasQuoteContext (..), NoteTable, NoteTable', KeyTable, @@ -88,7 +90,6 @@ module Text.Pandoc.Parsing ( anyLine, toKey, registerHeader, smartPunctuation, - withQuoteContext, singleQuoteStart, singleQuoteEnd, doubleQuoteStart, @@ -106,6 +107,7 @@ module Text.Pandoc.Parsing ( anyLine, runF, askF, asksF, + token, -- * Re-exports from Text.Pandoc.Parsec Stream, runParser, @@ -160,7 +162,6 @@ module Text.Pandoc.Parsing ( anyLine, setSourceColumn, setSourceLine, newPos, - token ) where @@ -170,7 +171,7 @@ import Text.Pandoc.Builder (Blocks, Inlines, rawBlock, HasMeta(..)) import qualified Text.Pandoc.Builder as B import Text.Pandoc.XML (fromEntities) import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) -import Text.Parsec +import Text.Parsec hiding (token) import Text.Parsec.Pos (newPos) import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum, isHexDigit, isSpace ) @@ -407,7 +408,7 @@ emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain) -- Schemes from http://www.iana.org/assignments/uri-schemes.html plus --- the unofficial schemes coap, doi, javascript. +-- the unofficial schemes coap, doi, javascript, isbn, pmid schemes :: [String] schemes = ["coap","doi","javascript","aaa","aaas","about","acap","cap","cid", "crid","data","dav","dict","dns","file","ftp","geo","go","gopher", @@ -429,7 +430,7 @@ schemes = ["coap","doi","javascript","aaa","aaas","about","acap","cap","cid", "rtmp","secondlife","sftp","sgn","skype","smb","soldat","spotify", "ssh","steam","svn","teamspeak","things","udp","unreal","ut2004", "ventrilo","view-source","webcal","wtai","wyciwyg","xfire","xri", - "ymsgr"] + "ymsgr", "isbn", "pmid"] uriScheme :: Stream s m Char => ParserT s st m String uriScheme = oneOfStringsCI schemes @@ -484,7 +485,8 @@ mathDisplayWith op cl = try $ do string op many1Till (noneOf "\n" <|> (newline <* notFollowedBy' blankline)) (try $ string cl) -mathDisplay :: Stream s m Char => ParserT s ParserState m String +mathDisplay :: (HasReaderOptions st, Stream s m Char) + => ParserT s st m String mathDisplay = (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$") <|> (guardEnabled Ext_tex_math_single_backslash >> @@ -492,7 +494,8 @@ mathDisplay = <|> (guardEnabled Ext_tex_math_double_backslash >> mathDisplayWith "\\\\[" "\\\\]") -mathInline :: Stream s m Char => ParserT s ParserState m String +mathInline :: (HasReaderOptions st , Stream s m Char) + => ParserT s st m String mathInline = (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$") <|> (guardEnabled Ext_tex_math_single_backslash >> @@ -909,6 +912,21 @@ class HasReaderOptions st where -- default getOption f = (f . extractReaderOptions) <$> getState +class HasQuoteContext st m where + getQuoteContext :: (Stream s m t) => ParsecT s st m QuoteContext + withQuoteContext :: QuoteContext -> ParsecT s st m a -> ParsecT s st m a + +instance Monad m => HasQuoteContext ParserState m where + getQuoteContext = stateQuoteContext <$> getState + withQuoteContext context parser = do + oldState <- getState + let oldQuoteContext = stateQuoteContext oldState + setState oldState { stateQuoteContext = context } + result <- parser + newState <- getState + setState newState { stateQuoteContext = oldQuoteContext } + return result + instance HasReaderOptions ParserState where extractReaderOptions = stateOptions @@ -1051,9 +1069,9 @@ registerHeader (ident,classes,kvs) header' = do failUnlessSmart :: (Stream s m a, HasReaderOptions st) => ParserT s st m () failUnlessSmart = getOption readerSmart >>= guard -smartPunctuation :: Stream s m Char - => ParserT s ParserState m Inlines - -> ParserT s ParserState m Inlines +smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext st m, Stream s m Char) + => ParserT s st m Inlines + -> ParserT s st m Inlines smartPunctuation inlineParser = do failUnlessSmart choice [ quoted inlineParser, apostrophe, dash, ellipses ] @@ -1061,46 +1079,33 @@ smartPunctuation inlineParser = do apostrophe :: Stream s m Char => ParserT s st m Inlines apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\x2019") -quoted :: Stream s m Char - => ParserT s ParserState m Inlines - -> ParserT s ParserState m Inlines +quoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char) + => ParserT s st m Inlines + -> ParserT s st m Inlines quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser -withQuoteContext :: Stream s m t - => QuoteContext - -> ParserT s ParserState m a - -> ParserT s ParserState m a -withQuoteContext context parser = do - oldState <- getState - let oldQuoteContext = stateQuoteContext oldState - setState oldState { stateQuoteContext = context } - result <- parser - newState <- getState - setState newState { stateQuoteContext = oldQuoteContext } - return result - -singleQuoted :: Stream s m Char - => ParserT s ParserState m Inlines - -> ParserT s ParserState m Inlines +singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char) + => ParserT s st m Inlines + -> ParserT s st m Inlines singleQuoted inlineParser = try $ do singleQuoteStart withQuoteContext InSingleQuote $ many1Till inlineParser singleQuoteEnd >>= return . B.singleQuoted . mconcat -doubleQuoted :: Stream s m Char - => ParserT s ParserState m Inlines - -> ParserT s ParserState m Inlines +doubleQuoted :: (HasQuoteContext st m, Stream s m Char) + => ParserT s st m Inlines + -> ParserT s st m Inlines doubleQuoted inlineParser = try $ do doubleQuoteStart withQuoteContext InDoubleQuote $ manyTill inlineParser doubleQuoteEnd >>= return . B.doubleQuoted . mconcat -failIfInQuoteContext :: Stream s m t +failIfInQuoteContext :: (HasQuoteContext st m, Stream s m t) => QuoteContext - -> ParserT s ParserState m () + -> ParserT s st m () failIfInQuoteContext context = do - st <- getState - if stateQuoteContext st == context + context' <- getQuoteContext + if context' == context then fail "already inside quotes" else return () @@ -1110,8 +1115,8 @@ charOrRef cs = guard (c `elem` cs) return c) -singleQuoteStart :: Stream s m Char - => ParserT s ParserState m () +singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char) + => ParserT s st m () singleQuoteStart = do failIfInQuoteContext InSingleQuote -- single quote start can't be right after str @@ -1124,8 +1129,8 @@ singleQuoteEnd = try $ do charOrRef "'\8217\146" notFollowedBy alphaNum -doubleQuoteStart :: Stream s m Char - => ParserT s ParserState m () +doubleQuoteStart :: (HasQuoteContext st m, Stream s m Char) + => ParserT s st m () doubleQuoteStart = do failIfInQuoteContext InDoubleQuote try $ do charOrRef "\"\8220\147" @@ -1179,6 +1184,14 @@ citeKey = try $ do let key = firstChar:rest return (suppress_author, key) + +token :: (Stream s m t) + => (t -> String) + -> (t -> SourcePos) + -> (t -> Maybe a) + -> ParsecT s st m a +token pp pos match = tokenPrim pp (\_ t _ -> pos t) match + -- -- Macros -- @@ -1200,9 +1213,9 @@ macro = do else return $ rawBlock "latex" def' -- | Apply current macros to string. -applyMacros' :: Stream [Char] m Char +applyMacros' :: (HasReaderOptions st, HasMacros st, Stream [Char] m Char) => String - -> ParserT [Char] ParserState m String + -> ParserT [Char] st m String applyMacros' target = do apply <- getOption readerApplyMacros if apply diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs index d25ba725f..1e72c2040 100644 --- a/src/Text/Pandoc/Pretty.hs +++ b/src/Text/Pandoc/Pretty.hs @@ -35,6 +35,7 @@ module Text.Pandoc.Pretty ( , render , cr , blankline + , blanklines , space , text , char @@ -100,7 +101,7 @@ data D = Text Int String | BreakingSpace | CarriageReturn | NewLine - | BlankLine + | BlankLines Int -- number of blank lines deriving (Show) newtype Doc = Doc { unDoc :: Seq D } @@ -113,7 +114,7 @@ isBlank :: D -> Bool isBlank BreakingSpace = True isBlank CarriageReturn = True isBlank NewLine = True -isBlank BlankLine = True +isBlank (BlankLines _) = True isBlank (Text _ (c:_)) = isSpace c isBlank _ = False @@ -190,7 +191,7 @@ vsep = foldr ($+$) empty nestle :: Doc -> Doc nestle (Doc d) = Doc $ go d where go x = case viewl x of - (BlankLine :< rest) -> go rest + (BlankLines _ :< rest) -> go rest (NewLine :< rest) -> go rest _ -> x @@ -203,7 +204,7 @@ chomp d = Doc (fromList dl') go (BreakingSpace : xs) = go xs go (CarriageReturn : xs) = go xs go (NewLine : xs) = go xs - go (BlankLine : xs) = go xs + go (BlankLines _ : xs) = go xs go (Prefixed s d' : xs) = Prefixed s (chomp d') : xs go xs = xs @@ -216,9 +217,10 @@ outp off s | off < 0 = do -- offset < 0 means newline characters let pref = reverse $ dropWhile isSpace $ reverse rawpref modify $ \st -> st{ output = fromString pref : output st , column = column st + realLength pref } + let numnewlines = length $ takeWhile (=='\n') $ reverse s modify $ \st -> st { output = fromString s : output st , column = 0 - , newlines = newlines st + 1 } + , newlines = newlines st + numnewlines } outp off s = do -- offset >= 0 (0 might be combining char) st' <- get let pref = prefix st' @@ -277,15 +279,11 @@ renderList (BeforeNonBlank d : xs) = | otherwise -> renderDoc d >> renderList xs [] -> renderList xs -renderList (BlankLine : xs) = do +renderList (BlankLines num : xs) = do st <- get case output st of - _ | newlines st > 1 || null xs -> return () - _ | column st == 0 -> do - outp (-1) "\n" - _ -> do - outp (-1) "\n" - outp (-1) "\n" + _ | newlines st > num || null xs -> return () + | otherwise -> replicateM_ (1 + num - newlines st) (outp (-1) "\n") renderList xs renderList (CarriageReturn : xs) = do @@ -302,7 +300,7 @@ renderList (NewLine : xs) = do renderList (BreakingSpace : CarriageReturn : xs) = renderList (CarriageReturn:xs) renderList (BreakingSpace : NewLine : xs) = renderList (NewLine:xs) -renderList (BreakingSpace : BlankLine : xs) = renderList (BlankLine:xs) +renderList (BreakingSpace : BlankLines n : xs) = renderList (BlankLines n:xs) renderList (BreakingSpace : BreakingSpace : xs) = renderList (BreakingSpace:xs) renderList (BreakingSpace : xs) = do let isText (Text _ _) = True @@ -383,9 +381,13 @@ cr = Doc $ singleton CarriageReturn -- | Inserts a blank line unless one exists already. -- (@blankline <> blankline@ has the same effect as @blankline@. --- If you want multiple blank lines, use @text "\\n\\n"@. blankline :: Doc -blankline = Doc $ singleton BlankLine +blankline = Doc $ singleton (BlankLines 1) + +-- | Inserts a blank lines unless they exists already. +-- (@blanklines m <> blanklines n@ has the same effect as @blankline (max m n)@. +blanklines :: Int -> Doc +blanklines n = Doc $ singleton (BlankLines n) -- | Uses the specified string as a prefix for every line of -- the inside document (except the first, if not at the beginning diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 196a3cec5..86ce62ced 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -78,19 +78,17 @@ import Codec.Archive.Zip import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Builder (text, toList) -import Text.Pandoc.MIME (getMimeType) -import Text.Pandoc.UTF8 (toString) import Text.Pandoc.Walk import Text.Pandoc.Readers.Docx.Parse import Text.Pandoc.Readers.Docx.Lists import Text.Pandoc.Readers.Docx.Reducible import Text.Pandoc.Readers.Docx.TexChar import Text.Pandoc.Shared +import Text.Pandoc.MediaBag (insertMedia, MediaBag) import Data.Maybe (mapMaybe, fromMaybe) -import Data.List (delete, isPrefixOf, (\\), intercalate) -import qualified Data.ByteString as BS +import Data.List (delete, isPrefixOf, (\\), intercalate, intersect) +import Data.Monoid import qualified Data.ByteString.Lazy as B -import Data.ByteString.Base64 (encode) import qualified Data.Map as M import Control.Monad.Reader import Control.Monad.State @@ -98,16 +96,24 @@ import Text.Printf (printf) readDocx :: ReaderOptions -> B.ByteString - -> Pandoc + -> (Pandoc, MediaBag) readDocx opts bytes = case archiveToDocx (toArchive bytes) of - Right docx -> Pandoc nullMeta (docxToBlocks opts docx) + Right docx -> (Pandoc meta blks, mediaBag) where + (meta, blks, mediaBag) = (docxToOutput opts docx) Left _ -> error $ "couldn't parse docx file" data DState = DState { docxAnchorMap :: M.Map String String + , docxMediaBag :: MediaBag , docxInHeaderBlock :: Bool , docxInTexSubscript :: Bool } +defaultDState :: DState +defaultDState = DState { docxAnchorMap = M.empty + , docxMediaBag = mempty + , docxInHeaderBlock = False + , docxInTexSubscript = False} + data DEnv = DEnv { docxOptions :: ReaderOptions , docxDocument :: Docx} @@ -134,6 +140,65 @@ spansToKeep = [] divsToKeep :: [String] divsToKeep = ["list-item", "Definition", "DefinitionTerm"] +metaStyles :: M.Map String String +metaStyles = M.fromList [ ("Title", "title") + , ("Subtitle", "subtitle") + , ("Author", "author") + , ("Date", "date") + , ("Abstract", "abstract")] + +sepBodyParts :: [BodyPart] -> ([BodyPart], [BodyPart]) +sepBodyParts = span (\bp -> (isMetaPar bp || isEmptyPar bp)) + +isMetaPar :: BodyPart -> Bool +isMetaPar (Paragraph pPr _) = + not $ null $ intersect (pStyle pPr) (M.keys metaStyles) +isMetaPar _ = False + +isEmptyPar :: BodyPart -> Bool +isEmptyPar (Paragraph _ parParts) = + all isEmptyParPart parParts + where + isEmptyParPart (PlainRun (Run _ runElems)) = all isEmptyElem runElems + isEmptyParPart _ = False + isEmptyElem (TextRun s) = trim s == "" + isEmptyElem _ = True +isEmptyPar _ = False + +bodyPartsToMeta' :: [BodyPart] -> DocxContext (M.Map String MetaValue) +bodyPartsToMeta' [] = return M.empty +bodyPartsToMeta' (bp : bps) + | (Paragraph pPr parParts) <- bp + , (c : _)<- intersect (pStyle pPr) (M.keys metaStyles) + , (Just metaField) <- M.lookup c metaStyles = do + inlines <- parPartsToInlines parParts + remaining <- bodyPartsToMeta' bps + let + f (MetaInlines ils) (MetaInlines ils') = MetaBlocks [Para ils, Para ils'] + f (MetaInlines ils) (MetaBlocks blks) = MetaBlocks ((Para ils) : blks) + f m (MetaList mv) = MetaList (m : mv) + f m n = MetaList [m, n] + return $ M.insertWith f metaField (MetaInlines inlines) remaining +bodyPartsToMeta' (_ : bps) = bodyPartsToMeta' bps + +bodyPartsToMeta :: [BodyPart] -> DocxContext Meta +bodyPartsToMeta bps = do + mp <- bodyPartsToMeta' bps + let mp' = + case M.lookup "author" mp of + Just mv -> M.insert "author" (fixAuthors mv) mp + Nothing -> mp + return $ Meta mp' + +fixAuthors :: MetaValue -> MetaValue +fixAuthors (MetaBlocks blks) = + MetaList $ map g $ filter f blks + where f (Para _) = True + f _ = False + g (Para ils) = MetaInlines ils + g _ = MetaInlines [] +fixAuthors mv = mv + runStyleToContainers :: RunStyle -> [Container Inline] runStyleToContainers rPr = let spanClassToContainers :: String -> [Container Inline] @@ -154,7 +219,8 @@ runStyleToContainers rPr = , if isStrike rPr then (Just Strikeout) else Nothing , if isSuperScript rPr then (Just Superscript) else Nothing , if isSubScript rPr then (Just Subscript) else Nothing - , rUnderline rPr >>= (\f -> Just $ Span ("", [], [("underline", f)])) + , rUnderline rPr >>= + (\f -> if f == "single" then (Just Emph) else Nothing) ] in classContainers ++ formatters @@ -259,13 +325,6 @@ runToInlines (Footnote bps) = runToInlines (Endnote bps) = concatMapM bodyPartToBlocks bps >>= (\blks -> return [Note blks]) -makeDataUrl :: String -> B.ByteString -> Maybe String -makeDataUrl fp bs = - case getMimeType fp of - Just mime -> Just $ "data:" ++ mime ++ ";base64," ++ - toString (encode $ BS.concat $ B.toChunks bs) - Nothing -> Nothing - parPartToInlines :: ParPart -> DocxContext [Inline] parPartToInlines (PlainRun r) = runToInlines r parPartToInlines (Insertion _ author date runs) = do @@ -312,11 +371,9 @@ parPartToInlines (BookMark _ anchor) = modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap} return [Span (newAnchor, ["anchor"], []) []] parPartToInlines (Drawing fp bs) = do - return $ case True of -- TODO: add self-contained images - True -> [Image [] (fp, "")] - False -> case makeDataUrl fp bs of - Just d -> [Image [] (d, "")] - Nothing -> [Image [] ("", "")] + mediaBag <- gets docxMediaBag + modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag } + return [Image [] (fp, "")] parPartToInlines (InternalHyperLink anchor runs) = do ils <- concatMapM runToInlines runs return [Link ils ('#' : anchor, "")] @@ -615,24 +672,25 @@ rewriteLink l@(Link ils ('#':target, title)) = do Nothing -> l rewriteLink il = return il - -bodyToBlocks :: Body -> DocxContext [Block] -bodyToBlocks (Body bps) = do - blks <- concatMapM bodyPartToBlocks bps >>= +bodyToOutput :: Body -> DocxContext (Meta, [Block], MediaBag) +bodyToOutput (Body bps) = do + let (metabps, blkbps) = sepBodyParts bps + meta <- bodyPartsToMeta metabps + blks <- concatMapM bodyPartToBlocks blkbps >>= walkM rewriteLink - return $ - blocksToDefinitions $ - blocksToBullets $ blks - -docxToBlocks :: ReaderOptions -> Docx -> [Block] -docxToBlocks opts d@(Docx (Document _ body)) = - let dState = DState { docxAnchorMap = M.empty - , docxInHeaderBlock = False - , docxInTexSubscript = False} + mediaBag <- gets docxMediaBag + return $ (meta, + blocksToDefinitions $ blocksToBullets $ blks, + mediaBag) + +docxToOutput :: ReaderOptions -> Docx -> (Meta, [Block], MediaBag) +docxToOutput opts d@(Docx (Document _ body)) = + let dState = defaultDState dEnv = DEnv { docxOptions = opts , docxDocument = d} in - evalDocxContext (bodyToBlocks body) dEnv dState + evalDocxContext (bodyToOutput body) dEnv dState + ilToCode :: Inline -> String ilToCode (Str s) = s diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 8541a1a3a..71938afe0 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -779,11 +779,11 @@ expandDrawingId :: String -> D ParPart expandDrawingId s = do target <- asks (lookupRelationship s . envRelationships) case target of - Just t -> do let filepath = combine "word" t - bytes <- asks (lookup filepath . envMedia) - case bytes of - Just bs -> return $ Drawing filepath bs - Nothing -> throwError DocxError + Just filepath -> do + bytes <- asks (lookup (combine "word" filepath) . envMedia) + case bytes of + Just bs -> return $ Drawing filepath bs + Nothing -> throwError DocxError Nothing -> throwError DocxError elemToParPart :: NameSpaces -> Element -> D ParPart diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 2414dfbf7..597156a5e 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses #-} {- Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> @@ -40,7 +41,7 @@ import Text.HTML.TagSoup import Text.HTML.TagSoup.Match import Text.Pandoc.Definition import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder (Blocks, Inlines, trimInlines) +import Text.Pandoc.Builder (HasMeta (..), Blocks, Inlines, trimInlines) import Text.Pandoc.Shared import Text.Pandoc.Options import Text.Pandoc.Parsing @@ -52,6 +53,8 @@ import Control.Applicative ( (<$>), (<$), (<*) ) import Data.Monoid import Text.Printf (printf) import Debug.Trace (trace) +import Data.Default (Default (..)) +import Control.Monad.Reader (Reader, runReader, asks, local, ask) isSpace :: Char -> Bool isSpace ' ' = True @@ -64,17 +67,26 @@ readHtml :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assumes @'\n'@ line endings) -> Pandoc readHtml opts inp = - case runParser parseDoc def{ stateOptions = opts } "source" tags of + case flip runReader def $ runParserT parseDoc (HTMLState def{ stateOptions = opts } ) "source" tags of Left err' -> error $ "\nError at " ++ show err' Right result -> result where tags = canonicalizeTags $ parseTagsOptions parseOptions{ optTagPosition = True } inp parseDoc = do blocks <- (fixPlains False) . mconcat <$> manyTill block eof - meta <- stateMeta <$> getState + meta <- stateMeta . parserState <$> getState return $ Pandoc meta (B.toList blocks) -type TagParser = Parser [Tag String] ParserState +data HTMLState = + HTMLState + { parserState :: ParserState + } + +data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext } + +type HTMLParser s = ParserT s HTMLState (Reader HTMLLocal) + +type TagParser = HTMLParser [Tag String] pBody :: TagParser Blocks pBody = pInTags "body" block @@ -115,7 +127,6 @@ block = do (take 60 $ show $ B.toList res)) (return ()) return res - pList :: TagParser Blocks pList = pBulletList <|> pOrderedList <|> pDefinitionList @@ -365,8 +376,8 @@ pSelfClosing f g = do pQ :: TagParser Inlines pQ = do - quoteContext <- stateQuoteContext `fmap` getState - let quoteType = case quoteContext of + context <- asks quoteContext + let quoteType = case context of InDoubleQuote -> SingleQuote _ -> DoubleQuote let innerQuoteContext = if quoteType == SingleQuote @@ -477,7 +488,8 @@ pTagText :: TagParser Inlines pTagText = try $ do (TagText str) <- pSatisfy isTagText st <- getState - case runParser (many pTagContents) st "text" str of + qu <- ask + case flip runReader qu $ runParserT (many pTagContents) st "text" str of Left _ -> fail $ "Could not parse `" ++ str ++ "'" Right result -> return $ mconcat result @@ -486,7 +498,9 @@ pBlank = try $ do (TagText str) <- pSatisfy isTagText guard $ all isSpace str -pTagContents :: Parser [Char] ParserState Inlines +type InlinesParser = HTMLParser String + +pTagContents :: InlinesParser Inlines pTagContents = B.displayMath <$> mathDisplay <|> B.math <$> mathInline @@ -496,12 +510,11 @@ pTagContents = <|> pSymbol <|> pBad -pStr :: Parser [Char] ParserState Inlines +pStr :: InlinesParser Inlines pStr = do result <- many1 $ satisfy $ \c -> not (isSpace c) && not (isSpecial c) && not (isBad c) - pos <- getPosition - updateState $ \s -> s{ stateLastStrPos = Just pos } + updateLastStrPos return $ B.str result isSpecial :: Char -> Bool @@ -516,13 +529,13 @@ isSpecial '\8220' = True isSpecial '\8221' = True isSpecial _ = False -pSymbol :: Parser [Char] ParserState Inlines +pSymbol :: InlinesParser Inlines pSymbol = satisfy isSpecial >>= return . B.str . (:[]) isBad :: Char -> Bool isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML -pBad :: Parser [Char] ParserState Inlines +pBad :: InlinesParser Inlines pBad = do c <- satisfy isBad let c' = case c of @@ -556,7 +569,7 @@ pBad = do _ -> '?' return $ B.str [c'] -pSpace :: Parser [Char] ParserState Inlines +pSpace :: InlinesParser Inlines pSpace = many1 (satisfy isSpace) >> return B.space -- @@ -672,19 +685,23 @@ _ `closes` _ = False --- parsers for use in markdown, textile readers -- | Matches a stretch of HTML in balanced tags. -htmlInBalanced :: (Tag String -> Bool) -> Parser [Char] ParserState String +htmlInBalanced :: (Monad m) + => (Tag String -> Bool) + -> ParserT String st m String htmlInBalanced f = try $ do (TagOpen t _, tag) <- htmlTag f guard $ '/' `notElem` tag -- not a self-closing tag let stopper = htmlTag (~== TagClose t) - let anytag = liftM snd $ htmlTag (const True) + let anytag = snd <$> htmlTag (const True) contents <- many $ notFollowedBy' stopper >> (htmlInBalanced f <|> anytag <|> count 1 anyChar) endtag <- liftM snd stopper return $ tag ++ concat contents ++ endtag -- | Matches a tag meeting a certain condition. -htmlTag :: (Tag String -> Bool) -> Parser [Char] st (Tag String, String) +htmlTag :: Monad m + => (Tag String -> Bool) + -> ParserT [Char] st m (Tag String, String) htmlTag f = try $ do lookAhead $ char '<' >> (oneOf "/!?" <|> letter) (next : _) <- getInput >>= return . canonicalizeTags . parseTags @@ -707,3 +724,29 @@ mkAttr attr = (attribsId, attribsClasses, attribsKV) attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr +-- Instances + +-- This signature should be more general +-- MonadReader HTMLLocal m => HasQuoteContext st m +instance HasQuoteContext st (Reader HTMLLocal) where + getQuoteContext = asks quoteContext + withQuoteContext q = local (\s -> s{quoteContext = q}) + +instance HasReaderOptions HTMLState where + extractReaderOptions = extractReaderOptions . parserState + +instance Default HTMLState where + def = HTMLState def + +instance HasMeta HTMLState where + setMeta s b st = st {parserState = setMeta s b $ parserState st} + deleteMeta s st = st {parserState = deleteMeta s $ parserState st} + +instance Default HTMLLocal where + def = HTMLLocal NoQuote + +instance HasLastStrPosition HTMLState where + setLastStrPos s st = st {parserState = setLastStrPos s (parserState st)} + getLastStrPos = getLastStrPos . parserState + + diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs new file mode 100644 index 000000000..3a51b9d84 --- /dev/null +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -0,0 +1,548 @@ +{-# LANGUAGE ViewPatterns #-} +{- +Copyright (C) 2014 Matthew Pickering <matthewtpickering@gmail.com> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Txt2Tags + Copyright : Copyright (C) 2014 Matthew Pickering + License : GNU GPL, version 2 or above + + Maintainer : Matthew Pickering <matthewtpickering@gmail.com> + +Conversion of txt2tags formatted plain text to 'Pandoc' document. +-} +module Text.Pandoc.Readers.Txt2Tags ( readTxt2Tags + , getT2TMeta + , T2TMeta (..) + , readTxt2TagsNoMacros) + where + +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Builder ( Inlines, Blocks, (<>) + , trimInlines ) +import Text.Pandoc.Definition +import Text.Pandoc.Options +import Text.Pandoc.Shared (escapeURI,compactify', compactify'DL) +import Text.Pandoc.Parsing hiding (space, spaces, uri, macro) +import Control.Applicative ((<$>), (<$), (<*>), (<*), (*>)) +import Data.Char (toLower) +import Data.List (transpose, intersperse, intercalate) +import Data.Maybe (fromMaybe) +import Data.Monoid (Monoid, mconcat, mempty, mappend) +--import Network.URI (isURI) -- Not sure whether to use this function +import Control.Monad (void, guard, when) +import Data.Default +import Control.Monad.Reader (Reader, runReader, asks) + +import Data.Time.LocalTime (getZonedTime) +import Text.Pandoc.Compat.Directory(getModificationTime) +import Data.Time.Format (formatTime) +import System.Locale (defaultTimeLocale) +import System.IO.Error (catchIOError) + +type T2T = ParserT String ParserState (Reader T2TMeta) + +-- | An object for the T2T macros meta information +-- the contents of each field is simply substituted verbatim into the file +data T2TMeta = T2TMeta { + date :: String -- ^ Current date + , mtime :: String -- ^ Last modification time of infile + , infile :: FilePath -- ^ Input file + , outfile :: FilePath -- ^ Output file + } deriving Show + +instance Default T2TMeta where + def = T2TMeta "" "" "" "" + +-- | Get the meta information required by Txt2Tags macros +getT2TMeta :: [FilePath] -> FilePath -> IO T2TMeta +getT2TMeta inps out = do + curDate <- formatTime defaultTimeLocale "%F" <$> getZonedTime + let getModTime = fmap (formatTime defaultTimeLocale "%F") . + getModificationTime + curMtime <- catchIOError + (maximum <$> mapM getModTime inps) + (const (return "")) + return $ T2TMeta curDate curMtime (intercalate ", " inps) out + +-- | Read Txt2Tags from an input string returning a Pandoc document +readTxt2Tags :: T2TMeta -> ReaderOptions -> String -> Pandoc +readTxt2Tags t opts s = flip runReader t $ readWithM parseT2T (def {stateOptions = opts}) (s ++ "\n\n") + +-- | Read Txt2Tags (ignoring all macros) from an input string returning +-- a Pandoc document +readTxt2TagsNoMacros :: ReaderOptions -> String -> Pandoc +readTxt2TagsNoMacros = readTxt2Tags def + +parseT2T :: T2T Pandoc +parseT2T = do + _ <- (Nothing <$ try blankline) <|> (Just <$> (count 3 anyLine)) + config <- manyTill setting (notFollowedBy setting) + -- TODO: Handle settings better + let settings = foldr (\(k,v) -> B.setMeta k (MetaString v)) nullMeta config + updateState (\s -> s {stateMeta = settings}) + body <- mconcat <$> manyTill block eof + return $ Pandoc mempty (B.toList body) + +type Keyword = String +type Value = String + +setting :: T2T (Keyword, Value) +setting = do + string "%!" + keyword <- ignoreSpacesCap (many1 alphaNum) + char ':' + value <- ignoreSpacesCap (manyTill anyChar (newline)) + return (keyword, value) + +-- Blocks + +parseBlocks :: T2T Blocks +parseBlocks = mconcat <$> manyTill block eof + +block :: T2T Blocks +block = do + choice + [ mempty <$ blanklines + , quote + , hrule -- hrule must go above title + , title + , commentBlock + , verbatim + , rawBlock + , taggedBlock + , list + , table + , para + ] + +title :: T2T Blocks +title = try $ balancedTitle '+' <|> balancedTitle '=' + +balancedTitle :: Char -> T2T Blocks +balancedTitle c = try $ do + spaces + level <- length <$> many1 (char c) + guard (level <= 5) -- Max header level 5 + heading <- manyTill (noneOf "\n\r") (count level (char c)) + label <- optionMaybe (enclosed (char '[') (char ']') (alphaNum <|> oneOf "_-")) + many spaceChar *> newline + let attr = maybe nullAttr (\x -> (x, [], [])) label + return $ B.headerWith attr level (trimInlines $ B.text heading) + +para :: T2T Blocks +para = try $ do + ils <- parseInlines + nl <- option False (True <$ newline) + option (B.plain ils) (guard nl >> notFollowedBy listStart >> return (B.para ils)) + where + listStart = try bulletListStart <|> orderedListStart + +commentBlock :: T2T Blocks +commentBlock = try (blockMarkupArea (anyLine) (const mempty) "%%%") <|> comment + +-- Seperator and Strong line treated the same +hrule :: T2T Blocks +hrule = try $ do + spaces + line <- many1 (oneOf "=-_") + guard (length line >= 20) + B.horizontalRule <$ blankline + +quote :: T2T Blocks +quote = try $ do + lookAhead tab + rawQuote <- many1 (tab *> optional spaces *> anyLine) + contents <- parseFromString parseBlocks (intercalate "\n" rawQuote ++ "\n\n") + return $ B.blockQuote contents + +commentLine :: T2T Inlines +commentLine = comment + +-- List Parsing code from Org Reader + +list :: T2T Blocks +list = choice [bulletList, orderedList, definitionList] + +bulletList :: T2T Blocks +bulletList = B.bulletList . compactify' + <$> many1 (listItem bulletListStart parseBlocks) + +orderedList :: T2T Blocks +orderedList = B.orderedList . compactify' + <$> many1 (listItem orderedListStart parseBlocks) + +definitionList :: T2T Blocks +definitionList = try $ do + B.definitionList . compactify'DL <$> + many1 (listItem definitionListStart definitionListEnd) + +definitionListEnd :: T2T (Inlines, [Blocks]) +definitionListEnd = (,) <$> (mconcat <$> manyTill inline newline) <*> ((:[]) <$> parseBlocks) + +genericListStart :: T2T Char + -> T2T Int +genericListStart listMarker = try $ + (2+) <$> (length <$> many spaceChar + <* listMarker <* space <* notFollowedBy space) + +-- parses bullet list \start and returns its length (excl. following whitespace) +bulletListStart :: T2T Int +bulletListStart = genericListStart (char '-') + +orderedListStart :: T2T Int +orderedListStart = genericListStart (char '+' ) + +definitionListStart :: T2T Int +definitionListStart = genericListStart (char ':') + +-- parse raw text for one list item, excluding start marker and continuations +listItem :: T2T Int + -> T2T a + -> T2T a +listItem start end = try $ do + markerLength <- try start + firstLine <- anyLineNewline + blank <- option "" ("\n" <$ blankline) + rest <- concat <$> many (listContinuation markerLength) + parseFromString end $ firstLine ++ blank ++ rest + +-- continuation of a list item - indented and separated by blankline or endline. +-- Note: nested lists are parsed as continuations. +listContinuation :: Int + -> T2T String +listContinuation markerLength = try $ + notFollowedBy' (blankline >> blankline) + *> (mappend <$> (concat <$> many1 listLine) + <*> many blankline) + where listLine = try $ indentWith markerLength *> anyLineNewline + +anyLineNewline :: T2T String +anyLineNewline = (++ "\n") <$> anyLine + +indentWith :: Int -> T2T String +indentWith n = count n space + +-- Table + +table :: T2T Blocks +table = try $ do + header <- fmap snd <$> option mempty (try headerRow) + rows <- many1 (many commentLine *> tableRow) + let columns = transpose rows + let ncolumns = length columns + let aligns = map (foldr1 findAlign) (map (map fst) columns) + let rows' = map (map snd) rows + let size = maximum (map length rows') + let rowsPadded = map (pad size) rows' + let headerPadded = if (not (null header)) then pad size header else mempty + return $ B.table mempty + (zip aligns (replicate ncolumns 0.0)) + headerPadded rowsPadded + +pad :: (Show a, Monoid a) => Int -> [a] -> [a] +pad n xs = xs ++ (replicate (n - length xs) mempty) + + +findAlign :: Alignment -> Alignment -> Alignment +findAlign x y + | x == y = x + | otherwise = AlignDefault + +headerRow :: T2T [(Alignment, Blocks)] +headerRow = genericRow (string "||") + +tableRow :: T2T [(Alignment, Blocks)] +tableRow = genericRow (char '|') + +genericRow :: T2T a -> T2T [(Alignment, Blocks)] +genericRow start = try $ do + spaces *> start + manyTill tableCell newline <?> "genericRow" + + +tableCell :: T2T (Alignment, Blocks) +tableCell = try $ do + leftSpaces <- length <$> lookAhead (many1 space) -- Case of empty cell means we must lookAhead + content <- (manyTill inline (try $ lookAhead (cellEnd))) + rightSpaces <- length <$> many space + let align = + case compare leftSpaces rightSpaces of + LT -> AlignLeft + EQ -> AlignCenter + GT -> AlignRight + endOfCell + return $ (align, B.plain (B.trimInlines $ mconcat content)) + where + cellEnd = (void newline <|> (many1 space *> endOfCell)) + +endOfCell :: T2T () +endOfCell = try (skipMany1 $ char '|') <|> ( () <$ lookAhead newline) + +-- Raw area + +verbatim :: T2T Blocks +verbatim = genericBlock anyLineNewline B.codeBlock "```" + +rawBlock :: T2T Blocks +rawBlock = genericBlock anyLineNewline (B.para . B.str) "\"\"\"" + +taggedBlock :: T2T Blocks +taggedBlock = do + target <- getTarget + genericBlock anyLineNewline (B.rawBlock target) "'''" + +-- Generic + +genericBlock :: Monoid a => T2T a -> (a -> Blocks) -> String -> T2T Blocks +genericBlock p f s = blockMarkupArea p f s <|> blockMarkupLine p f s + +blockMarkupArea :: Monoid a => (T2T a) -> (a -> Blocks) -> String -> T2T Blocks +blockMarkupArea p f s = try $ (do + string s *> blankline + f . mconcat <$> (manyTill p (eof <|> void (string s *> blankline)))) + +blockMarkupLine :: T2T a -> (a -> Blocks) -> String -> T2T Blocks +blockMarkupLine p f s = try (f <$> (string s *> space *> p)) + +-- Can be in either block or inline position +comment :: Monoid a => T2T a +comment = try $ do + atStart + notFollowedBy macro + mempty <$ (char '%' *> anyLine) + +-- Inline + +parseInlines :: T2T Inlines +parseInlines = trimInlines . mconcat <$> many1 inline + +inline :: T2T Inlines +inline = do + choice + [ endline + , macro + , commentLine + , whitespace + , url + , link + , image + , bold + , underline + , code + , raw + , tagged + , strike + , italic + , code + , str + , symbol + ] + +bold :: T2T Inlines +bold = inlineMarkup inline B.strong '*' (B.str) + +underline :: T2T Inlines +underline = inlineMarkup inline B.emph '_' (B.str) + +strike :: T2T Inlines +strike = inlineMarkup inline B.strikeout '-' (B.str) + +italic :: T2T Inlines +italic = inlineMarkup inline B.emph '/' (B.str) + +code :: T2T Inlines +code = inlineMarkup ((:[]) <$> anyChar) B.code '`' id + +raw :: T2T Inlines +raw = inlineMarkup ((:[]) <$> anyChar) B.text '"' id + +tagged :: T2T Inlines +tagged = do + target <- getTarget + inlineMarkup ((:[]) <$> anyChar) (B.rawInline target) '\'' id + +-- Parser for markup indicated by a double character. +-- Inline markup is greedy and glued +-- Greedy meaning ***a*** = Bold [Str "*a*"] +-- Glued meaning that markup must be tight to content +-- Markup can't pass newlines +inlineMarkup :: Monoid a + => (T2T a) -- Content parser + -> (a -> Inlines) -- Constructor + -> Char -- Fence + -> (String -> a) -- Special Case to handle ****** + -> T2T Inlines +inlineMarkup p f c special = try $ do + start <- many1 (char c) + let l = length start + guard (l >= 2) + when (l == 2) (void $ notFollowedBy space) + -- We must make sure that there is no space before the start of the + -- closing tags + body <- optionMaybe (try $ manyTill (noneOf "\n\r") $ + (try $ lookAhead (noneOf " " >> string [c,c] ))) + case body of + Just middle -> do + lastChar <- anyChar + end <- many1 (char c) + let parser inp = parseFromString (mconcat <$> many p) inp + let start' = special (drop 2 start) + body' <- parser (middle ++ [lastChar]) + let end' = special (drop 2 end) + return $ f (start' <> body' <> end') + Nothing -> do -- Either bad or case such as ***** + guard (l >= 5) + let body' = (replicate (l - 4) c) + return $ f (special body') + +link :: T2T Inlines +link = try imageLink <|> titleLink + +-- Link with title +titleLink :: T2T Inlines +titleLink = try $ do + char '[' + notFollowedBy space + tokens <- sepBy1 (many $ noneOf " ]") space + guard (length tokens >= 2) + char ']' + let link' = last tokens + guard (length link' > 0) + let tit = concat (intersperse " " (init tokens)) + return $ B.link link' "" (B.text tit) + +-- Link with image +imageLink :: T2T Inlines +imageLink = try $ do + char '[' + body <- image + many1 space + l <- manyTill (noneOf "\n\r ") (char ']') + return (B.link l "" body) + +macro :: T2T Inlines +macro = try $ do + name <- string "%%" *> oneOfStringsCI (map fst commands) + optional (try $ enclosed (char '(') (char ')') anyChar) + lookAhead (spaceChar <|> oneOf specialChars <|> newline) + maybe (return mempty) (\f -> B.str <$> asks f) (lookup name commands) + where + commands = [ ("date", date), ("mtime", mtime) + , ("infile", infile), ("outfile", outfile)] + +-- raw URLs in text are automatically linked +url :: T2T Inlines +url = try $ do + (rawUrl, escapedUrl) <- (try uri <|> emailAddress) + return $ B.link rawUrl "" (B.str escapedUrl) + +uri :: T2T (String, String) +uri = try $ do + address <- t2tURI + return (address, escapeURI address) + +-- The definition of a URI in the T2T source differs from the +-- actual definition. This is a transcription of the definition in +-- the source of v2.6 +--isT2TURI :: String -> Bool +--isT2TURI (parse t2tURI "" -> Right _) = True +--isT2TURI _ = False + +t2tURI :: T2T String +t2tURI = do + start <- try ((++) <$> proto <*> urlLogin) <|> guess + domain <- many1 chars + sep <- many (char '/') + form' <- option mempty ((:) <$> char '?' <*> many1 form) + anchor' <- option mempty ((:) <$> char '#' <*> many anchor) + return (start ++ domain ++ sep ++ form' ++ anchor') + where + protos = ["http", "https", "ftp", "telnet", "gopher", "wais"] + proto = (++) <$> oneOfStrings protos <*> string "://" + guess = (++) <$> (((++) <$> stringAnyCase "www" <*> option mempty ((:[]) <$> oneOf "23")) + <|> stringAnyCase "ftp") <*> ((:[]) <$> char '.') + login = alphaNum <|> oneOf "_.-" + pass = many (noneOf " @") + chars = alphaNum <|> oneOf "%._/~:,=$@&+-" + anchor = alphaNum <|> oneOf "%._0" + form = chars <|> oneOf ";*" + urlLogin = option mempty $ try ((\x y z -> x ++ y ++ [z]) <$> many1 login <*> option mempty ((:) <$> char ':' <*> pass) <*> char '@') + + +image :: T2T Inlines +image = try $ do + -- List taken from txt2tags source + let extensions = [".jpg", ".jpeg", ".gif", ".png", ".eps", ".bmp"] + char '[' + path <- manyTill (noneOf "\n\t\r ") (try $ lookAhead (oneOfStrings extensions)) + ext <- oneOfStrings extensions + char ']' + return $ B.image (path ++ ext) "" mempty + +-- Characters used in markup +specialChars :: String +specialChars = "%*-_/|:+" + +tab :: T2T Char +tab = char '\t' + +space :: T2T Char +space = char ' ' + +spaces :: T2T String +spaces = many space + +endline :: T2T Inlines +endline = try $ do + newline + notFollowedBy blankline + notFollowedBy hrule + notFollowedBy title + notFollowedBy verbatim + notFollowedBy rawBlock + notFollowedBy taggedBlock + notFollowedBy quote + notFollowedBy list + notFollowedBy table + return $ B.space + +str :: T2T Inlines +str = try $ do + B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") + +whitespace :: T2T Inlines +whitespace = try $ B.space <$ spaceChar + +symbol :: T2T Inlines +symbol = B.str . (:[]) <$> oneOf specialChars + +-- Utility + +getTarget :: T2T String +getTarget = do + mv <- lookupMeta "target" . stateMeta <$> getState + let MetaString target = fromMaybe (MetaString "html") mv + return target + +atStart :: T2T () +atStart = (sourceColumn <$> getPosition) >>= guard . (== 1) + +ignoreSpacesCap :: T2T String -> T2T String +ignoreSpacesCap p = map toLower <$> (spaces *> p <* spaces) + diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index 2a2f56281..1a4e037cf 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -32,53 +32,54 @@ the HTML using data URIs. -} module Text.Pandoc.SelfContained ( makeSelfContained ) where import Text.HTML.TagSoup -import Network.URI (isURI, escapeURIString) +import Network.URI (isURI, escapeURIString, URI(..), parseURI) import Data.ByteString.Base64 import qualified Data.ByteString.Char8 as B import Data.ByteString (ByteString) -import System.FilePath (takeExtension, dropExtension, takeDirectory, (</>)) +import System.FilePath (takeExtension, takeDirectory, (</>)) import Data.Char (toLower, isAscii, isAlphaNum) import Codec.Compression.GZip as Gzip import qualified Data.ByteString.Lazy as L -import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err) +import Text.Pandoc.Shared (renderTags', err, fetchItem') +import Text.Pandoc.MediaBag (MediaBag) import Text.Pandoc.UTF8 (toString, fromString) -import Text.Pandoc.MIME (getMimeType) -import System.Directory (doesFileExist) +import Text.Pandoc.Options (WriterOptions(..)) isOk :: Char -> Bool isOk c = isAscii c && isAlphaNum c -convertTag :: Maybe FilePath -> Tag String -> IO (Tag String) -convertTag userdata t@(TagOpen tagname as) +convertTag :: MediaBag -> Maybe String -> Tag String -> IO (Tag String) +convertTag media sourceURL t@(TagOpen tagname as) | tagname `elem` ["img", "embed", "video", "input", "audio", "source"] = do as' <- mapM processAttribute as return $ TagOpen tagname as' where processAttribute (x,y) = if x == "src" || x == "href" || x == "poster" then do - (raw, mime) <- getRaw userdata (fromAttrib "type" t) y + (raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) y let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw) return (x, enc) else return (x,y) -convertTag userdata t@(TagOpen "script" as) = +convertTag media sourceURL t@(TagOpen "script" as) = case fromAttrib "src" t of [] -> return t src -> do - (raw, mime) <- getRaw userdata (fromAttrib "type" t) src + (raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) src let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw) return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"]) -convertTag userdata t@(TagOpen "link" as) = +convertTag media sourceURL t@(TagOpen "link" as) = case fromAttrib "href" t of [] -> return t src -> do - (raw, mime) <- getRaw userdata (fromAttrib "type" t) src + (raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) src let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw) return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"]) -convertTag _ t = return t +convertTag _ _ t = return t -- NOTE: This is really crude, it doesn't respect CSS comments. -cssURLs :: Maybe FilePath -> FilePath -> ByteString -> IO ByteString -cssURLs userdata d orig = +cssURLs :: MediaBag -> Maybe String -> FilePath -> ByteString + -> IO ByteString +cssURLs media sourceURL d orig = case B.breakSubstring "url(" orig of (x,y) | B.null y -> return orig | otherwise -> do @@ -91,33 +92,21 @@ cssURLs userdata d orig = let url' = if isURI url then url else d </> url - (raw, mime) <- getRaw userdata "" url' - rest <- cssURLs userdata d v + (raw, mime) <- getRaw media sourceURL "" url' + rest <- cssURLs media sourceURL d v let enc = "data:" `B.append` fromString mime `B.append` ";base64," `B.append` (encode raw) return $ x `B.append` "url(" `B.append` enc `B.append` rest -getItem :: Maybe FilePath -> String -> IO (ByteString, Maybe String) -getItem userdata f = - if isURI f - then openURL f >>= either handleErr return - else do - -- strip off trailing query or fragment part, if relative URL. - -- this is needed for things like cmunrm.eot?#iefix, - -- which is used to get old versions of IE to work with web fonts. - let f' = takeWhile (\c -> c /= '?' && c /= '#') f - let mime = case takeExtension f' of - ".gz" -> getMimeType $ dropExtension f' - x -> getMimeType x - exists <- doesFileExist f' - cont <- if exists then B.readFile f' else readDataFile userdata f' - return (cont, mime) - where handleErr e = err 61 $ "Failed to retrieve " ++ f ++ "\n" ++ show e - -getRaw :: Maybe FilePath -> String -> String -> IO (ByteString, String) -getRaw userdata mimetype src = do +getRaw :: MediaBag -> Maybe String -> String -> String + -> IO (ByteString, String) +getRaw media sourceURL mimetype src = do let ext = map toLower $ takeExtension src - (raw, respMime) <- getItem userdata src + fetchResult <- fetchItem' media sourceURL src + (raw, respMime) <- case fetchResult of + Left msg -> err 67 $ "Could not fetch " ++ src ++ + "\n" ++ show msg + Right x -> return x let raw' = if ext == ".gz" then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks $ [raw] @@ -127,21 +116,22 @@ getRaw userdata mimetype src = do $ "Could not determine mime type for `" ++ src ++ "'" (x, Nothing) -> x (_, Just x ) -> x + let cssSourceURL = case parseURI src of + Just u + | uriScheme u `elem` ["http:","https:"] -> + Just $ show u{ uriPath = "", + uriQuery = "", + uriFragment = "" } + _ -> Nothing result <- if mime == "text/css" - then cssURLs userdata (takeDirectory src) raw' + then cssURLs media cssSourceURL (takeDirectory src) raw' else return raw' return (result, mime) -- | Convert HTML into self-contained HTML, incorporating images, --- scripts, and CSS using data: URIs. Items specified using absolute --- URLs will be downloaded; those specified using relative URLs will --- be sought first relative to the working directory, then relative --- to the user data directory (if the first parameter is 'Just' --- a directory), and finally relative to pandoc's default data --- directory. -makeSelfContained :: Maybe FilePath -> String -> IO String -makeSelfContained userdata inp = do +-- scripts, and CSS using data: URIs. +makeSelfContained :: WriterOptions -> String -> IO String +makeSelfContained opts inp = do let tags = parseTags inp - out' <- mapM (convertTag userdata) tags + out' <- mapM (convertTag (writerMediaBag opts) (writerSourceURL opts)) tags return $ renderTags' out' - diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index bb13836f2..f0e5bbe5d 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -59,6 +59,7 @@ module Text.Pandoc.Shared ( normalizeBlocks, removeFormatting, stringify, + capitalize, compactify, compactify', compactify'DL, @@ -77,16 +78,20 @@ module Text.Pandoc.Shared ( readDataFile, readDataFileUTF8, fetchItem, + fetchItem', openURL, -- * Error handling err, warn, -- * Safe read - safeRead + safeRead, + -- * Temp directory + withTempDir ) where import Text.Pandoc.Definition import Text.Pandoc.Walk +import Text.Pandoc.MediaBag (MediaBag, lookupMedia) import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..)) import qualified Text.Pandoc.Builder as B import qualified Text.Pandoc.UTF8 as UTF8 @@ -97,11 +102,11 @@ import Data.Char ( toLower, isLower, isUpper, isAlpha, import Data.List ( find, isPrefixOf, intercalate ) import qualified Data.Map as M import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo, - unEscapeString, parseURIReference ) + unEscapeString, parseURIReference, isAllowedInURI ) import qualified Data.Set as Set import System.Directory import Text.Pandoc.MIME (getMimeType) -import System.FilePath ( (</>), takeExtension, dropExtension ) +import System.FilePath ( (</>), takeExtension, dropExtension) import Data.Generics (Typeable, Data) import qualified Control.Monad.State as S import qualified Control.Exception as E @@ -110,6 +115,7 @@ import Text.Pandoc.Pretty (charWidth) import System.Locale (defaultTimeLocale) import Data.Time import System.IO (stderr) +import System.IO.Temp import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..), renderOptions) import qualified Data.ByteString as BS @@ -117,6 +123,7 @@ import qualified Data.ByteString.Char8 as B8 import Text.Pandoc.Compat.Monoid import Data.ByteString.Base64 (decodeLenient) import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr) +import qualified Data.Text as T (toUpper, pack, unpack) #ifdef EMBED_DATA_FILES import Text.Pandoc.Data (dataFiles) @@ -522,6 +529,17 @@ stringify = query go . walk deNote deNote (Note _) = Str "" deNote x = x +-- | Bring all regular text in a pandoc structure to uppercase. +-- +-- This function correctly handles cases where a lowercase character doesn't +-- match to a single uppercase character – e.g. “Straße” would be converted +-- to “STRASSE”, not “STRAßE”. +capitalize :: Walkable Inline a => a -> a +capitalize = walk go + where go :: Inline -> Inline + go (Str s) = Str (T.unpack $ T.toUpper $ T.pack s) + go x = x + -- | Change final list item from @Para@ to @Plain@ if the list contains -- no other @Para@ blocks. compactify :: [[Block]] -- ^ List of list items (each a list of blocks) @@ -553,20 +571,22 @@ compactify' items = _ -> items _ -> items --- | Like @compactify'@, but akts on items of definition lists. +-- | Like @compactify'@, but acts on items of definition lists. compactify'DL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])] compactify'DL items = let defs = concatMap snd items - defBlocks = reverse $ concatMap B.toList defs - in case defBlocks of - (Para x:_) -> if not $ any isPara (drop 1 defBlocks) - then let (t,ds) = last items - lastDef = B.toList $ last ds - ds' = init ds ++ - [B.fromList $ init lastDef ++ [Plain x]] - in init items ++ [(t, ds')] - else items - _ -> items + in case reverse (concatMap B.toList defs) of + (Para x:xs) + | not (any isPara xs) -> + let (t,ds) = last items + lastDef = B.toList $ last ds + ds' = init ds ++ + if null lastDef + then [B.fromList lastDef] + else [B.fromList $ init lastDef ++ [Plain x]] + in init items ++ [(t, ds')] + | otherwise -> items + _ -> items isPara :: Block -> Bool isPara (Para _) = True @@ -759,21 +779,31 @@ readDataFileUTF8 userDir fname = -- Returns raw content and maybe mime type. fetchItem :: Maybe String -> String -> IO (Either E.SomeException (BS.ByteString, Maybe String)) -fetchItem sourceURL s - | isURI s = openURL s - | otherwise = - case sourceURL >>= parseURIReference of - Just u -> case parseURIReference s of - Just s' -> openURL $ show $ - s' `nonStrictRelativeTo` u - Nothing -> openURL $ show u ++ "/" ++ s - Nothing -> E.try readLocalFile +fetchItem sourceURL s = + case (sourceURL >>= parseURIReference . ensureEscaped, ensureEscaped s) of + (_, s') | isURI s' -> openURL s' + (Just u, s') -> -- try fetching from relative path at source + case parseURIReference s' of + Just u' -> openURL $ show $ u' `nonStrictRelativeTo` u + Nothing -> openURL s' -- will throw error + (Nothing, _) -> E.try readLocalFile -- get from local file system where readLocalFile = do - let mime = case takeExtension s of - ".gz" -> getMimeType $ dropExtension s - x -> getMimeType x - cont <- BS.readFile $ unEscapeString s + cont <- BS.readFile fp return (cont, mime) + dropFragmentAndQuery = takeWhile (\c -> c /= '?' && c /= '#') + fp = unEscapeString $ dropFragmentAndQuery s + mime = case takeExtension fp of + ".gz" -> getMimeType $ dropExtension fp + x -> getMimeType x + ensureEscaped = escapeURIString isAllowedInURI + +-- | Like 'fetchItem', but also looks for items in a 'MediaBag'. +fetchItem' :: MediaBag -> Maybe String -> String + -> IO (Either E.SomeException (BS.ByteString, Maybe String)) +fetchItem' media sourceURL s = do + case lookupMedia s media of + Nothing -> fetchItem sourceURL s + Just (mime, bs) -> return $ Right (BS.concat $ toChunks bs, Just mime) -- | Read from a URL and return raw data and maybe mime type. openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe String)) @@ -833,3 +863,15 @@ safeRead s = case reads s of (d,x):_ | all isSpace x -> return d _ -> fail $ "Could not read `" ++ s ++ "'" + +-- +-- Temp directory +-- + +withTempDir :: String -> (FilePath -> IO a) -> IO a +withTempDir = +#ifdef _WINDOWS + withTempDirectory "." +#else + withSystemTempDirectory +#endif diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index de31e462e..6be6eb1d3 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -481,17 +481,30 @@ writeOpenXML opts (Pandoc meta blocks) = do _ -> [] let auths = docAuthors meta let dat = docDate meta + let abstract' = case lookupMeta "abstract" meta of + Just (MetaBlocks bs) -> bs + Just (MetaInlines ils) -> [Plain ils] + _ -> [] + let subtitle' = case lookupMeta "subtitle" meta of + Just (MetaBlocks [Plain xs]) -> xs + Just (MetaBlocks [Para xs]) -> xs + Just (MetaInlines xs) -> xs + _ -> [] title <- withParaProp (pStyle "Title") $ blocksToOpenXML opts [Para tit | not (null tit)] - authors <- withParaProp (pStyle "Authors") $ blocksToOpenXML opts - [Para (intercalate [LineBreak] auths) | not (null auths)] + subtitle <- withParaProp (pStyle "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')] + authors <- withParaProp (pStyle "Author") $ blocksToOpenXML opts $ + map Para auths date <- withParaProp (pStyle "Date") $ blocksToOpenXML opts [Para dat | not (null dat)] + abstract <- if null abstract' + then return [] + else withParaProp (pStyle "Abstract") $ blocksToOpenXML opts abstract' let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs convertSpace xs = xs let blocks' = bottomUp convertSpace $ blocks doc' <- blocksToOpenXML opts blocks' notes' <- reverse `fmap` gets stFootnotes - let meta' = title ++ authors ++ date + let meta' = title ++ subtitle ++ authors ++ date ++ abstract return (meta' ++ doc', notes') -- | Convert a list of Pandoc blocks to OpenXML. @@ -817,7 +830,8 @@ inlineToOpenXML opts (Image alt (src, tit)) = do case M.lookup src imgs of Just (_,_,_,elt,_) -> return [elt] Nothing -> do - res <- liftIO $ fetchItem (writerSourceURL opts) src + res <- liftIO $ + fetchItem' (writerMediaBag opts) (writerSourceURL opts) src case res of Left (_ :: E.SomeException) -> do liftIO $ warn $ "Could not find image `" ++ src ++ "', skipping..." diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index ec206086a..34a6dcb2f 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -360,7 +360,8 @@ writeEPUB opts doc@(Pandoc meta _) = do walkM (transformBlock opts' mediaRef) pics <- readIORef mediaRef let readPicEntry entries (oldsrc, newsrc) = do - res <- fetchItem (writerSourceURL opts') oldsrc + res <- fetchItem' (writerMediaBag opts') + (writerSourceURL opts') oldsrc case res of Left _ -> do warn $ "Could not find media `" ++ oldsrc ++ "', skipping..." @@ -792,7 +793,7 @@ transformInline opts mediaRef (Image lab (src,tit)) = do return $ Image lab (newsrc, tit) transformInline opts _ (x@(Math _ _)) | WebTeX _ <- writerHTMLMathMethod opts = do - raw <- makeSelfContained Nothing $ writeHtmlInline opts x + raw <- makeSelfContained opts $ writeHtmlInline opts x return $ RawInline (Format "html") raw transformInline opts mediaRef (RawInline fmt raw) | fmt == Format "html" = do diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 803617f95..7a9bff4fe 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -28,7 +28,7 @@ module Text.Pandoc.Writers.FB2 (writeFB2) where import Control.Monad.State (StateT, evalStateT, get, modify) import Control.Monad.State (liftM, liftM2, liftIO) import Data.ByteString.Base64 (encode) -import Data.Char (toUpper, toLower, isSpace, isAscii, isControl) +import Data.Char (toLower, isSpace, isAscii, isControl) import Data.List (intersperse, intercalate, isPrefixOf) import Data.Either (lefts, rights) import Network.Browser (browse, request, setAllowRedirects, setOutHandler) @@ -44,8 +44,7 @@ import qualified Text.XML.Light.Cursor as XC import Text.Pandoc.Definition import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..), def) -import Text.Pandoc.Shared (orderedListMarkers, isHeaderBlock) -import Text.Pandoc.Walk +import Text.Pandoc.Shared (orderedListMarkers, isHeaderBlock, capitalize) -- | Data to be written at the end of the document: -- (foot)notes, URLs, references, images. @@ -421,10 +420,6 @@ indent = indentBlock indentLines ins = let lns = split isLineBreak ins :: [[Inline]] in intercalate [LineBreak] $ map ((Str spacer):) lns -capitalize :: Inline -> Inline -capitalize (Str xs) = Str $ map toUpper xs -capitalize x = x - -- | Convert a Pandoc's Inline element to FictionBook XML representation. toXml :: Inline -> FBM [Content] toXml (Str s) = return [txt s] @@ -434,7 +429,7 @@ toXml (Strong ss) = list `liftM` wrap "strong" ss toXml (Strikeout ss) = list `liftM` wrap "strikethrough" ss toXml (Superscript ss) = list `liftM` wrap "sup" ss toXml (Subscript ss) = list `liftM` wrap "sub" ss -toXml (SmallCaps ss) = cMapM toXml $ walk capitalize ss +toXml (SmallCaps ss) = cMapM toXml $ capitalize ss toXml (Quoted SingleQuote ss) = do -- FIXME: should be language-specific inner <- cMapM toXml ss return $ [txt "‘"] ++ inner ++ [txt "’"] diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 0197d5db6..ea704c91d 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -742,7 +742,7 @@ inlineToLaTeX (Quoted qt lst) = do else char '\x2018' <> inner <> char '\x2019' inlineToLaTeX (Str str) = liftM text $ stringToLaTeX TextString str inlineToLaTeX (Math InlineMath str) = - return $ char '$' <> text str <> char '$' + return $ "\\(" <> text str <> "\\)" inlineToLaTeX (Math DisplayMath str) = return $ "\\[" <> text str <> "\\]" inlineToLaTeX (RawInline f str) diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 37f148c0a..a859267cc 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -37,7 +37,7 @@ import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Shared import Text.Pandoc.Writers.Shared import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (blankline, char, space) +import Text.Pandoc.Parsing hiding (blankline, blanklines, char, space) import Data.List ( group, isPrefixOf, find, intersperse, transpose, sortBy ) import Data.Char ( isSpace, isPunctuation ) import Data.Ord ( comparing ) @@ -77,27 +77,13 @@ writePlain :: WriterOptions -> Pandoc -> String writePlain opts document = evalState (pandocToMarkdown opts{ writerExtensions = Set.delete Ext_escaped_line_breaks $ + Set.delete Ext_pipe_tables $ + Set.delete Ext_raw_html $ + Set.delete Ext_footnotes $ + Set.delete Ext_tex_math_dollars $ + Set.delete Ext_citations $ writerExtensions opts } - document') def{ stPlain = True } - where document' = plainify document - -plainify :: Pandoc -> Pandoc -plainify = walk go - where go :: Inline -> Inline - go (Emph xs) = Span ("",[],[]) xs - go (Strong xs) = Span ("",[],[]) xs - go (Strikeout xs) = Span ("",[],[]) xs - go (Superscript xs) = Span ("",[],[]) xs - go (Subscript xs) = Span ("",[],[]) xs - go (SmallCaps xs) = Span ("",[],[]) xs - go (Span _ xs) = Span ("",[],[]) xs - go (Code _ s) = Str s - go (Math _ s) = Str s - go (RawInline _ _) = Str "" - go (Link xs _) = Span ("",[],[]) xs - go (Image xs _) = Span ("",[],[]) $ [Str "["] ++ xs ++ [Str "]"] - go (Cite _ cits) = Span ("",[],[]) cits - go x = x + document) def{ stPlain = True } pandocTitleBlock :: Doc -> [Doc] -> Doc -> Doc pandocTitleBlock tit auths dat = @@ -188,7 +174,7 @@ pandocToMarkdown opts (Pandoc meta blocks) = do then tableOfContents opts headerBlocks else empty -- Strip off final 'references' header if markdown citations enabled - let blocks' = if not isPlain && isEnabled Ext_citations opts + let blocks' = if isEnabled Ext_citations opts then case reverse blocks of (Div (_,["references"],_) _):xs -> reverse xs _ -> blocks @@ -309,9 +295,9 @@ blockToMarkdown :: WriterOptions -- ^ Options -> State WriterState Doc blockToMarkdown _ Null = return empty blockToMarkdown opts (Div attrs ils) = do - isPlain <- gets stPlain + plain <- gets stPlain contents <- blockListToMarkdown opts ils - return $ if isPlain || not (isEnabled Ext_markdown_in_html_blocks opts) + return $ if plain || not (isEnabled Ext_markdown_in_html_blocks opts) then contents <> blankline else tagWithAttrs "div" attrs <> blankline <> contents <> blankline <> "</div>" <> blankline @@ -338,21 +324,22 @@ blockToMarkdown opts (Para inlines) = (<> blankline) `fmap` blockToMarkdown opts (Plain inlines) blockToMarkdown opts (RawBlock f str) | f == "html" = do - st <- get - if stPlain st - then return empty - else return $ if isEnabled Ext_markdown_attribute opts + plain <- gets stPlain + return $ if plain + then empty + else if isEnabled Ext_markdown_attribute opts then text (addMarkdownAttribute str) <> text "\n" else text str <> text "\n" | f `elem` ["latex", "tex", "markdown"] = do - st <- get - if stPlain st - then return empty - else return $ text str <> text "\n" + plain <- gets stPlain + return $ if plain + then empty + else text str <> text "\n" blockToMarkdown _ (RawBlock _ _) = return empty -blockToMarkdown _ HorizontalRule = - return $ blankline <> text "* * * * *" <> blankline +blockToMarkdown opts HorizontalRule = do + return $ blankline <> text (replicate (writerColumns opts) '-') <> blankline blockToMarkdown opts (Header level attr inlines) = do + plain <- gets stPlain -- we calculate the id that would be used by auto_identifiers -- so we know whether to print an explicit identifier ids <- gets stIds @@ -368,18 +355,19 @@ blockToMarkdown opts (Header level attr inlines) = do space <> attrsToMarkdown attr | otherwise -> empty contents <- inlineListToMarkdown opts inlines - st <- get let setext = writerSetextHeaders opts return $ nowrap $ case level of - 1 | setext -> + 1 | plain -> blanklines 3 <> contents <> blanklines 2 + | setext -> contents <> attr' <> cr <> text (replicate (offset contents) '=') <> blankline - 2 | setext -> + 2 | plain -> blanklines 2 <> contents <> blankline + | setext -> contents <> attr' <> cr <> text (replicate (offset contents) '-') <> blankline -- ghc interprets '#' characters in column 1 as linenum specifiers. - _ | stPlain st || isEnabled Ext_literate_haskell opts -> + _ | plain || isEnabled Ext_literate_haskell opts -> contents <> blankline _ -> text (replicate level '#') <> space <> contents <> attr' <> blankline blockToMarkdown opts (CodeBlock (_,classes,_) str) @@ -409,14 +397,12 @@ blockToMarkdown opts (CodeBlock attribs str) = return $ (_,(cls:_),_) -> " " <> text cls _ -> empty blockToMarkdown opts (BlockQuote blocks) = do - st <- get + plain <- gets stPlain -- if we're writing literate haskell, put a space before the bird tracks -- so they won't be interpreted as lhs... let leader = if isEnabled Ext_literate_haskell opts then " > " - else if stPlain st - then " " - else "> " + else if plain then " " else "> " contents <- blockListToMarkdown opts blocks return $ (prefixed leader contents) <> blankline blockToMarkdown opts t@(Table caption aligns widths headers rows) = do @@ -610,8 +596,19 @@ definitionListItemToMarkdown opts (label, defs) = do let sps = case writerTabStop opts - 3 of n | n > 0 -> text $ replicate n ' ' _ -> text " " - let contents = vcat $ map (\d -> hang tabStop (leader <> sps) $ vcat d <> cr) defs' - return $ nowrap labelText <> cr <> contents <> cr + if isEnabled Ext_compact_definition_lists opts + then do + let contents = vcat $ map (\d -> hang tabStop (leader <> sps) + $ vcat d <> cr) defs' + return $ nowrap labelText <> cr <> contents <> cr + else do + let contents = vcat $ map (\d -> hang tabStop (leader <> sps) + $ vcat d <> cr) defs' + let isTight = case defs of + ((Plain _ : _): _) -> True + _ -> False + return $ blankline <> nowrap labelText <> + (if isTight then cr else blankline) <> contents <> blankline else do return $ nowrap labelText <> text " " <> cr <> vsep (map vsep defs') <> blankline @@ -626,15 +623,21 @@ blockListToMarkdown opts blocks = -- code block will be treated as a list continuation paragraph where fixBlocks (b : CodeBlock attr x : rest) | (not (isEnabled Ext_fenced_code_blocks opts) || attr == nullAttr) - && isListBlock b = - b : RawBlock "html" "<!-- -->\n" : CodeBlock attr x : - fixBlocks rest + && isListBlock b = b : commentSep : CodeBlock attr x : + fixBlocks rest + fixBlocks (b1@(BulletList _) : b2@(BulletList _) : bs) = + b1 : commentSep : fixBlocks (b2:bs) + fixBlocks (b1@(OrderedList _ _) : b2@(OrderedList _ _) : bs) = + b1 : commentSep : fixBlocks (b2:bs) + fixBlocks (b1@(DefinitionList _) : b2@(DefinitionList _) : bs) = + b1 : commentSep : fixBlocks (b2:bs) fixBlocks (x : xs) = x : fixBlocks xs fixBlocks [] = [] isListBlock (BulletList _) = True isListBlock (OrderedList _ _) = True isListBlock (DefinitionList _) = True isListBlock _ = False + commentSep = RawBlock "html" "<!-- -->\n" -- | Get reference for target; if none exists, create unique one and return. -- Prefer label if possible; otherwise, generate a unique key. @@ -672,59 +675,69 @@ escapeSpaces x = x -- | Convert Pandoc inline element to markdown. inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc inlineToMarkdown opts (Span attrs ils) = do - st <- get + plain <- gets stPlain contents <- inlineListToMarkdown opts ils - return $ if stPlain st + return $ if plain then contents else tagWithAttrs "span" attrs <> contents <> text "</span>" inlineToMarkdown opts (Emph lst) = do + plain <- gets stPlain contents <- inlineListToMarkdown opts lst - return $ "*" <> contents <> "*" + return $ if plain + then "_" <> contents <> "_" + else "*" <> contents <> "*" inlineToMarkdown opts (Strong lst) = do - contents <- inlineListToMarkdown opts lst - return $ "**" <> contents <> "**" + plain <- gets stPlain + if plain + then inlineListToMarkdown opts $ capitalize lst + else do + contents <- inlineListToMarkdown opts lst + return $ "**" <> contents <> "**" inlineToMarkdown opts (Strikeout lst) = do contents <- inlineListToMarkdown opts lst - return $ if isEnabled Ext_strikeout opts + plain <- gets stPlain + return $ if plain || isEnabled Ext_strikeout opts then "~~" <> contents <> "~~" else "<s>" <> contents <> "</s>" inlineToMarkdown opts (Superscript lst) = do - let lst' = walk escapeSpaces lst - contents <- inlineListToMarkdown opts lst' + contents <- inlineListToMarkdown opts $ walk escapeSpaces lst return $ if isEnabled Ext_superscript opts then "^" <> contents <> "^" else "<sup>" <> contents <> "</sup>" inlineToMarkdown opts (Subscript lst) = do - let lst' = walk escapeSpaces lst - contents <- inlineListToMarkdown opts lst' + contents <- inlineListToMarkdown opts $ walk escapeSpaces lst return $ if isEnabled Ext_subscript opts then "~" <> contents <> "~" else "<sub>" <> contents <> "</sub>" inlineToMarkdown opts (SmallCaps lst) = do - contents <- inlineListToMarkdown opts lst - st <- get - return $ if stPlain st - then contents - else tagWithAttrs "span" - ("",[],[("style","font-variant:small-caps;")]) - <> contents <> text "</span>" + plain <- gets stPlain + if plain + then inlineListToMarkdown opts $ capitalize lst + else do + contents <- inlineListToMarkdown opts lst + return $ tagWithAttrs "span" + ("",[],[("style","font-variant:small-caps;")]) + <> contents <> text "</span>" inlineToMarkdown opts (Quoted SingleQuote lst) = do contents <- inlineListToMarkdown opts lst return $ "‘" <> contents <> "’" inlineToMarkdown opts (Quoted DoubleQuote lst) = do contents <- inlineListToMarkdown opts lst return $ "“" <> contents <> "”" -inlineToMarkdown opts (Code attr str) = +inlineToMarkdown opts (Code attr str) = do let tickGroups = filter (\s -> '`' `elem` s) $ group str - longest = if null tickGroups + let longest = if null tickGroups then 0 else maximum $ map length tickGroups - marker = replicate (longest + 1) '`' - spacer = if (longest == 0) then "" else " " - attrs = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr + let marker = replicate (longest + 1) '`' + let spacer = if (longest == 0) then "" else " " + let attrs = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr then attrsToMarkdown attr else empty - in return $ text (marker ++ spacer ++ str ++ spacer ++ marker) <> attrs + plain <- gets stPlain + if plain + then return $ text str + else return $ text (marker ++ spacer ++ str ++ spacer ++ marker) <> attrs inlineToMarkdown _ (Str str) = do st <- get if stPlain st @@ -737,7 +750,11 @@ inlineToMarkdown opts (Math InlineMath str) return $ "\\(" <> text str <> "\\)" | isEnabled Ext_tex_math_double_backslash opts = return $ "\\\\(" <> text str <> "\\\\)" - | otherwise = inlineListToMarkdown opts $ texMathToInlines InlineMath str + | otherwise = do + plain <- gets stPlain + inlineListToMarkdown opts $ + (if plain then makeMathPlainer else id) $ + texMathToInlines InlineMath str inlineToMarkdown opts (Math DisplayMath str) | isEnabled Ext_tex_math_dollars opts = return $ "$$" <> text str <> "$$" @@ -747,15 +764,20 @@ inlineToMarkdown opts (Math DisplayMath str) return $ "\\\\[" <> text str <> "\\\\]" | otherwise = (\x -> cr <> x <> cr) `fmap` inlineListToMarkdown opts (texMathToInlines DisplayMath str) -inlineToMarkdown opts (RawInline f str) - | f == "html" || f == "markdown" || - (isEnabled Ext_raw_tex opts && (f == "latex" || f == "tex")) = - return $ text str -inlineToMarkdown _ (RawInline _ _) = return empty -inlineToMarkdown opts (LineBreak) - | isEnabled Ext_hard_line_breaks opts = return cr - | isEnabled Ext_escaped_line_breaks opts = return $ "\\" <> cr - | otherwise = return $ " " <> cr +inlineToMarkdown opts (RawInline f str) = do + plain <- gets stPlain + if not plain && f == "html" || f == "markdown" || + (isEnabled Ext_raw_tex opts && (f == "latex" || f == "tex")) + then return $ text str + else return empty +inlineToMarkdown opts (LineBreak) = do + plain <- gets stPlain + if plain || isEnabled Ext_hard_line_breaks opts + then return cr + else return $ + if isEnabled Ext_escaped_line_breaks opts + then "\\" <> cr + else " " <> cr inlineToMarkdown _ Space = return space inlineToMarkdown opts (Cite [] lst) = inlineListToMarkdown opts lst inlineToMarkdown opts (Cite (c:cs) lst) @@ -788,6 +810,7 @@ inlineToMarkdown opts (Cite (c:cs) lst) modekey SuppressAuthor = "-" modekey _ = "" inlineToMarkdown opts (Link txt (src, tit)) = do + plain <- gets stPlain linktext <- inlineListToMarkdown opts txt let linktitle = if null tit then empty @@ -801,22 +824,29 @@ inlineToMarkdown opts (Link txt (src, tit)) = do ref <- if useRefLinks then getReference txt (src, tit) else return [] reftext <- inlineListToMarkdown opts ref return $ if useAuto - then "<" <> text srcSuffix <> ">" + then if plain + then text srcSuffix + else "<" <> text srcSuffix <> ">" else if useRefLinks then let first = "[" <> linktext <> "]" second = if txt == ref then "[]" else "[" <> reftext <> "]" in first <> second - else "[" <> linktext <> "](" <> - text src <> linktitle <> ")" + else if plain + then linktext + else "[" <> linktext <> "](" <> + text src <> linktitle <> ")" inlineToMarkdown opts (Image alternate (source, tit)) = do + plain <- gets stPlain let txt = if null alternate || alternate == [Str source] -- to prevent autolinks then [Str ""] else alternate linkPart <- inlineToMarkdown opts (Link txt (source, tit)) - return $ "!" <> linkPart + return $ if plain + then "[" <> linkPart <> "]" + else "!" <> linkPart inlineToMarkdown opts (Note contents) = do modify (\st -> st{ stNotes = contents : stNotes st }) st <- get @@ -824,3 +854,9 @@ inlineToMarkdown opts (Note contents) = do if isEnabled Ext_footnotes opts then return $ "[^" <> ref <> "]" else return $ "[" <> ref <> "]" + +makeMathPlainer :: [Inline] -> [Inline] +makeMathPlainer = walk go + where + go (Emph xs) = Span nullAttr xs + go x = x diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index cab55be9b..3f392a5d0 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -139,10 +139,14 @@ blockToMediaWiki (CodeBlock (_,classes,_) str) = do "python", "qbasic", "rails", "reg", "robots", "ruby", "sas", "scheme", "sdlbasic", "smalltalk", "smarty", "sql", "tcl", "", "thinbasic", "tsql", "vb", "vbnet", "vhdl", "visualfoxpro", "winbatch", "xml", "xpp", "z80"] - let (beg, end) = if null at - then ("<pre" ++ if null classes then ">" else " class=\"" ++ unwords classes ++ "\">", "</pre>") - else ("<source lang=\"" ++ head at ++ "\">", "</source>") - return $ beg ++ escapeString str ++ end + return $ + if null at + then "<pre" ++ (if null classes + then ">" + else " class=\"" ++ unwords classes ++ "\">") ++ + escapeString str ++ "</pre>" + else "<source lang=\"" ++ head at ++ "\">" ++ str ++ "</source>" + -- note: no escape! blockToMediaWiki (BlockQuote blocks) = do contents <- blockListToMediaWiki blocks diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 15f7c8be8..02794f76d 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -38,7 +38,7 @@ import qualified Data.ByteString.Lazy as B import Text.Pandoc.UTF8 ( fromStringLazy ) import Codec.Archive.Zip import Text.Pandoc.Options ( WriterOptions(..) ) -import Text.Pandoc.Shared ( stringify, readDataFile, fetchItem, warn ) +import Text.Pandoc.Shared ( stringify, readDataFile, fetchItem', warn ) import Text.Pandoc.ImageSize ( imageSize, sizeInPoints ) import Text.Pandoc.MIME ( getMimeType ) import Text.Pandoc.Definition @@ -131,7 +131,7 @@ writeODT opts doc@(Pandoc meta _) = do transformPicMath :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline transformPicMath opts entriesRef (Image lab (src,_)) = do - res <- fetchItem (writerSourceURL opts) src + res <- fetchItem' (writerMediaBag opts) (writerSourceURL opts) src case res of Left (_ :: E.SomeException) -> do warn $ "Could not find image `" ++ src ++ "', skipping..." diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index fe241b8d7..43405ce3c 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -36,42 +36,48 @@ import Text.Pandoc.Readers.TeXMath import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Walk import Data.List ( isSuffixOf, intercalate ) -import Data.Char ( ord, chr, isDigit, toLower ) -import System.FilePath ( takeExtension ) +import Data.Char ( ord, chr, isDigit ) import qualified Data.ByteString as B import qualified Data.Map as M import Text.Printf ( printf ) -import Network.URI ( isURI, unEscapeString ) -import qualified Control.Exception as E +import Text.Pandoc.ImageSize --- | Convert Image inlines into a raw RTF embedded image, read from a file. +-- | Convert Image inlines into a raw RTF embedded image, read from a file, +-- or a MediaBag, or the internet. -- If file not found or filetype not jpeg or png, leave the inline unchanged. -rtfEmbedImage :: Inline -> IO Inline -rtfEmbedImage x@(Image _ (src,_)) = do - let ext = map toLower (takeExtension src) - if ext `elem` [".jpg",".jpeg",".png"] && not (isURI src) - then do - let src' = unEscapeString src - imgdata <- E.catch (B.readFile src') - (\e -> let _ = (e :: E.SomeException) in return B.empty) - let bytes = map (printf "%02x") $ B.unpack imgdata - let filetype = case ext of - ".jpg" -> "\\jpegblip" - ".jpeg" -> "\\jpegblip" - ".png" -> "\\pngblip" - _ -> error "Unknown file type" - let raw = "{\\pict" ++ filetype ++ " " ++ concat bytes ++ "}" - return $ if B.null imgdata - then x - else RawInline (Format "rtf") raw - else return x -rtfEmbedImage x = return x +rtfEmbedImage :: WriterOptions -> Inline -> IO Inline +rtfEmbedImage opts x@(Image _ (src,_)) = do + result <- fetchItem' (writerMediaBag opts) (writerSourceURL opts) src + case result of + Right (imgdata, Just mime) + | mime == "image/jpeg" || mime == "image/png" -> do + let bytes = map (printf "%02x") $ B.unpack imgdata + let filetype = case mime of + "image/jpeg" -> "\\jpegblip" + "image/png" -> "\\pngblip" + _ -> error "Unknown file type" + let sizeSpec = case imageSize imgdata of + Nothing -> "" + Just sz -> "\\picw" ++ show xpx ++ + "\\pich" ++ show ypx ++ + "\\picwgoal" ++ show (xpt * 20) + ++ "\\pichgoal" ++ show (ypt * 20) + -- twip = 1/1440in = 1/20pt + where (xpx, ypx) = sizeInPixels sz + (xpt, ypt) = sizeInPoints sz + let raw = "{\\pict" ++ filetype ++ sizeSpec ++ " " ++ + concat bytes ++ "}" + return $ if B.null imgdata + then x + else RawInline (Format "rtf") raw + _ -> return x +rtfEmbedImage _ x = return x -- | Convert Pandoc to a string in rich text format, with -- images embedded as encoded binary data. writeRTFWithEmbeddedImages :: WriterOptions -> Pandoc -> IO String writeRTFWithEmbeddedImages options doc = - writeRTF options `fmap` walkM rtfEmbedImage doc + writeRTF options `fmap` walkM (rtfEmbedImage options) doc -- | Convert Pandoc to a string in rich text format. writeRTF :: WriterOptions -> Pandoc -> String |