aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/Asciify.hs2
-rw-r--r--src/Text/Pandoc/Compat/Directory.hs21
-rw-r--r--src/Text/Pandoc/Compat/Except.hs37
-rw-r--r--src/Text/Pandoc/MediaBag.hs107
-rw-r--r--src/Text/Pandoc/Options.hs25
-rw-r--r--src/Text/Pandoc/PDF.hs23
-rw-r--r--src/Text/Pandoc/Parsing.hs425
-rw-r--r--src/Text/Pandoc/Pretty.hs32
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs7
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs805
-rw-r--r--src/Text/Pandoc/Readers/Docx/Fonts.hs238
-rw-r--r--src/Text/Pandoc/Readers/Docx/Lists.hs35
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs981
-rw-r--r--src/Text/Pandoc/Readers/Docx/Reducible.hs183
-rw-r--r--src/Text/Pandoc/Readers/EPUB.hs285
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs341
-rw-r--r--src/Text/Pandoc/Readers/Haddock.hs40
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs5
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs236
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs5
-rw-r--r--src/Text/Pandoc/Readers/Org.hs51
-rw-r--r--src/Text/Pandoc/Readers/RST.hs13
-rw-r--r--src/Text/Pandoc/Readers/TeXMath.hs20
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs24
-rw-r--r--src/Text/Pandoc/Readers/Txt2Tags.hs548
-rw-r--r--src/Text/Pandoc/SelfContained.hs86
-rw-r--r--src/Text/Pandoc/Shared.hs351
-rw-r--r--src/Text/Pandoc/Writers/AsciiDoc.hs70
-rw-r--r--src/Text/Pandoc/Writers/Custom.hs34
-rw-r--r--src/Text/Pandoc/Writers/Docbook.hs47
-rw-r--r--src/Text/Pandoc/Writers/Docx.hs37
-rw-r--r--src/Text/Pandoc/Writers/DokuWiki.hs475
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs123
-rw-r--r--src/Text/Pandoc/Writers/FB2.hs43
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs30
-rw-r--r--src/Text/Pandoc/Writers/Haddock.hs4
-rw-r--r--src/Text/Pandoc/Writers/ICML.hs8
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs24
-rw-r--r--src/Text/Pandoc/Writers/Man.hs9
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs234
-rw-r--r--src/Text/Pandoc/Writers/MediaWiki.hs316
-rw-r--r--src/Text/Pandoc/Writers/ODT.hs7
-rw-r--r--src/Text/Pandoc/Writers/OpenDocument.hs6
-rw-r--r--src/Text/Pandoc/Writers/Org.hs2
-rw-r--r--src/Text/Pandoc/Writers/RST.hs7
-rw-r--r--src/Text/Pandoc/Writers/RTF.hs72
46 files changed, 4577 insertions, 1897 deletions
diff --git a/src/Text/Pandoc/Asciify.hs b/src/Text/Pandoc/Asciify.hs
index 8a5ccec5c..66490d5c6 100644
--- a/src/Text/Pandoc/Asciify.hs
+++ b/src/Text/Pandoc/Asciify.hs
@@ -17,7 +17,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
- Module : Text.Pandoc.SelfContained
+ Module : Text.Pandoc.Asciify
Copyright : Copyright (C) 2013-2014 John MacFarlane
License : GNU GPL, version 2 or above
diff --git a/src/Text/Pandoc/Compat/Directory.hs b/src/Text/Pandoc/Compat/Directory.hs
new file mode 100644
index 000000000..61dd5c525
--- /dev/null
+++ b/src/Text/Pandoc/Compat/Directory.hs
@@ -0,0 +1,21 @@
+{-# LANGUAGE CPP #-}
+module Text.Pandoc.Compat.Directory ( getModificationTime )
+ where
+
+#if MIN_VERSION_directory(1,2,0)
+import System.Directory
+
+
+#else
+import qualified System.Directory as S
+import Data.Time.Clock (UTCTime)
+import Data.Time.Clock.POSIX
+import System.Time
+
+getModificationTime :: FilePath -> IO UTCTime
+getModificationTime fp = convert `fmap` S.getModificationTime fp
+ where
+ convert (TOD x _) = posixSecondsToUTCTime (realToFrac x)
+
+#endif
+
diff --git a/src/Text/Pandoc/Compat/Except.hs b/src/Text/Pandoc/Compat/Except.hs
new file mode 100644
index 000000000..9ce7c0d36
--- /dev/null
+++ b/src/Text/Pandoc/Compat/Except.hs
@@ -0,0 +1,37 @@
+{-# LANGUAGE CPP #-}
+module Text.Pandoc.Compat.Except ( ExceptT
+ , Except
+ , Error(..)
+ , runExceptT
+ , runExcept
+ , MonadError
+ , throwError
+ , catchError )
+ where
+
+#if MIN_VERSION_mtl(2,2,1)
+import Control.Monad.Except
+
+class Error a where
+ noMsg :: a
+ strMsg :: String -> a
+
+ noMsg = strMsg ""
+ strMsg _ = noMsg
+
+#else
+import Control.Monad.Error
+import Control.Monad.Identity (Identity, runIdentity)
+
+type ExceptT = ErrorT
+
+type Except s a = ErrorT s Identity a
+
+runExceptT :: ExceptT e m a -> m (Either e a)
+runExceptT = runErrorT
+
+runExcept :: ExceptT e Identity a -> Either e a
+runExcept = runIdentity . runExceptT
+#endif
+
+
diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs
new file mode 100644
index 000000000..b19804b5f
--- /dev/null
+++ b/src/Text/Pandoc/MediaBag.hs
@@ -0,0 +1,107 @@
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-
+Copyright (C) 2014 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.MediaBag
+ Copyright : Copyright (C) 2014 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Definition of a MediaBag object to hold binary resources, and an
+interface for interacting with it.
+-}
+module Text.Pandoc.MediaBag (
+ MediaBag,
+ lookupMedia,
+ insertMedia,
+ mediaDirectory,
+ extractMediaBag
+ ) where
+import System.FilePath
+import System.Directory (createDirectoryIfMissing)
+import qualified Data.Map as M
+import qualified Data.ByteString.Lazy as BL
+import Data.Monoid (Monoid)
+import Control.Monad (when, MonadPlus(..))
+import Text.Pandoc.MIME (getMimeType)
+import qualified Text.Pandoc.UTF8 as UTF8
+import Data.Maybe (fromMaybe)
+import System.IO (stderr)
+
+-- | A container for a collection of binary resources, with names and
+-- mime types. Note that a 'MediaBag' is a Monoid, so 'mempty'
+-- can be used for an empty 'MediaBag', and '<>' can be used to append
+-- two 'MediaBag's.
+newtype MediaBag = MediaBag (M.Map String (String, BL.ByteString))
+ deriving (Monoid)
+
+instance Show MediaBag where
+ show bag = "MediaBag " ++ show (mediaDirectory bag)
+
+-- | Insert a media item into a 'MediaBag', replacing any existing
+-- value with the same name.
+insertMedia :: FilePath -- ^ relative path and canonical name of resource
+ -> Maybe String -- ^ mime type (Nothing = determine from extension)
+ -> BL.ByteString -- ^ contents of resource
+ -> MediaBag
+ -> MediaBag
+insertMedia fp mbMime contents (MediaBag mediamap) =
+ MediaBag (M.insert fp (mime, contents) mediamap)
+ where mime = fromMaybe "application/octet-stream" (mbMime `mplus` fallback)
+ fallback = case takeExtension fp of
+ ".gz" -> getMimeType $ dropExtension fp
+ _ -> getMimeType fp
+
+-- | Lookup a media item in a 'MediaBag', returning mime type and contents.
+lookupMedia :: FilePath
+ -> MediaBag
+ -> Maybe (String, BL.ByteString)
+lookupMedia fp (MediaBag mediamap) = M.lookup fp mediamap
+
+-- | Get a list of the file paths stored in a 'MediaBag', with
+-- their corresponding mime types and the lengths in bytes of the contents.
+mediaDirectory :: MediaBag -> [(String, String, Int)]
+mediaDirectory (MediaBag mediamap) =
+ M.foldWithKey (\fp (mime,contents) ->
+ ((fp, mime, fromIntegral $ BL.length contents):)) [] mediamap
+
+-- | Extract contents of MediaBag to a given directory. Print informational
+-- messages if 'verbose' is true.
+extractMediaBag :: Bool
+ -> FilePath
+ -> MediaBag
+ -> IO ()
+extractMediaBag verbose dir (MediaBag mediamap) = do
+ sequence_ $ M.foldWithKey
+ (\fp (_ ,contents) ->
+ ((writeMedia verbose dir (fp, contents)):)) [] mediamap
+
+writeMedia :: Bool -> FilePath -> (FilePath, BL.ByteString) -> IO ()
+writeMedia verbose dir (subpath, bs) = do
+ -- we join and split to convert a/b/c to a\b\c on Windows;
+ -- in zip containers all paths use /
+ let fullpath = dir </> normalise subpath
+ createDirectoryIfMissing True $ takeDirectory fullpath
+ when verbose $ UTF8.hPutStrLn stderr $ "pandoc: extracting " ++ fullpath
+ BL.writeFile fullpath bs
+
+
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index 611a6bb06..84ccbbdc9 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -41,6 +41,7 @@ module Text.Pandoc.Options ( Extension(..)
, HTMLSlideVariant (..)
, EPUBVersion (..)
, WriterOptions (..)
+ , TrackChanges (..)
, def
, isEnabled
) where
@@ -48,6 +49,8 @@ import Data.Set (Set)
import qualified Data.Set as Set
import Data.Default
import Text.Pandoc.Highlighting (Style, pygments)
+import Text.Pandoc.MediaBag (MediaBag)
+import Data.Monoid
-- | Individually selectable syntax extensions.
data Extension =
@@ -74,6 +77,8 @@ data Extension =
| Ext_backtick_code_blocks -- ^ Github style ``` code blocks
| Ext_inline_code_attributes -- ^ Allow attributes on inline code
| Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks
+ | Ext_native_divs -- ^ Use Div blocks for contents of <div> tags
+ | Ext_native_spans -- ^ Use Span inlines for contents of <span>
| Ext_markdown_attribute -- ^ Interpret text inside HTML as markdown
-- iff container has attribute 'markdown'
| Ext_escaped_line_breaks -- ^ Treat a backslash at EOL as linebreak
@@ -83,6 +88,8 @@ data Extension =
| Ext_lists_without_preceding_blankline -- ^ Allow lists without preceding blank
| Ext_startnum -- ^ Make start number of ordered list significant
| Ext_definition_lists -- ^ Definition lists as in pandoc, mmd, php
+ | Ext_compact_definition_lists -- ^ Definition lists without
+ -- space between items, and disallow laziness
| Ext_example_lists -- ^ Markdown-style numbered examples
| Ext_all_symbols_escapable -- ^ Make all non-alphanumerics escapable
| Ext_intraword_underscores -- ^ Treat underscore inside word as literal
@@ -101,6 +108,7 @@ data Extension =
| Ext_mmd_header_identifiers -- ^ Multimarkdown style header identifiers [myid]
| Ext_implicit_header_references -- ^ Implicit reference links for headers
| Ext_line_blocks -- ^ RST style line blocks
+ | Ext_epub_html_exts -- ^ Recognise the EPUB extended version of HTML
deriving (Show, Read, Enum, Eq, Ord, Bounded)
pandocExtensions :: Set Extension
@@ -125,6 +133,8 @@ pandocExtensions = Set.fromList
, Ext_backtick_code_blocks
, Ext_inline_code_attributes
, Ext_markdown_in_html_blocks
+ , Ext_native_divs
+ , Ext_native_spans
, Ext_escaped_line_breaks
, Ext_fancy_lists
, Ext_startnum
@@ -162,7 +172,6 @@ githubMarkdownExtensions = Set.fromList
, Ext_raw_html
, Ext_tex_math_single_backslash
, Ext_fenced_code_blocks
- , Ext_fenced_code_attributes
, Ext_auto_identifiers
, Ext_ascii_identifiers
, Ext_backtick_code_blocks
@@ -198,7 +207,6 @@ strictExtensions = Set.fromList
data ReaderOptions = ReaderOptions{
readerExtensions :: Set Extension -- ^ Syntax extensions
, readerSmart :: Bool -- ^ Smart punctuation
- , readerStrict :: Bool -- ^ FOR TRANSITION ONLY
, readerStandalone :: Bool -- ^ Standalone document with header
, readerParseRaw :: Bool -- ^ Parse raw HTML, LaTeX
, readerColumns :: Int -- ^ Number of columns in terminal
@@ -211,13 +219,13 @@ data ReaderOptions = ReaderOptions{
-- indented code blocks
, readerDefaultImageExtension :: String -- ^ Default extension for images
, readerTrace :: Bool -- ^ Print debugging info
+ , readerTrackChanges :: TrackChanges
} deriving (Show, Read)
instance Default ReaderOptions
where def = ReaderOptions{
readerExtensions = pandocExtensions
, readerSmart = False
- , readerStrict = False
, readerStandalone = False
, readerParseRaw = False
, readerColumns = 80
@@ -227,6 +235,7 @@ instance Default ReaderOptions
, readerIndentedCodeClasses = []
, readerDefaultImageExtension = ""
, readerTrace = False
+ , readerTrackChanges = AcceptChanges
}
--
@@ -264,6 +273,12 @@ data HTMLSlideVariant = S5Slides
| NoSlides
deriving (Show, Read, Eq)
+-- | Options for accepting or rejecting MS Word track-changes.
+data TrackChanges = AcceptChanges
+ | RejectChanges
+ | AllChanges
+ deriving (Show, Read, Eq)
+
-- | Options for writers
data WriterOptions = WriterOptions
{ writerStandalone :: Bool -- ^ Include header and footer
@@ -305,7 +320,8 @@ data WriterOptions = WriterOptions
, writerEpubChapterLevel :: Int -- ^ Header level for chapters (separate files)
, writerTOCDepth :: Int -- ^ Number of levels to include in TOC
, writerReferenceODT :: Maybe FilePath -- ^ Path to reference ODT if specified
- , writerReferenceDocx :: Maybe FilePath -- ^ Ptah to reference DOCX if specified
+ , writerReferenceDocx :: Maybe FilePath -- ^ Path to reference DOCX if specified
+ , writerMediaBag :: MediaBag -- ^ Media collected by docx or epub reader
} deriving Show
instance Default WriterOptions where
@@ -348,6 +364,7 @@ instance Default WriterOptions where
, writerTOCDepth = 3
, writerReferenceODT = Nothing
, writerReferenceDocx = Nothing
+ , writerMediaBag = mempty
}
-- | Returns True if the given extension is enabled.
diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index bd55c565f..35554637a 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -30,7 +30,6 @@ Conversion of LaTeX documents to PDF.
-}
module Text.Pandoc.PDF ( makePDF ) where
-import System.IO.Temp
import Data.ByteString.Lazy (ByteString)
import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Lazy.Char8 as BC
@@ -46,7 +45,7 @@ import Data.Maybe (fromMaybe)
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.Definition
import Text.Pandoc.Walk (walkM)
-import Text.Pandoc.Shared (fetchItem, warn)
+import Text.Pandoc.Shared (fetchItem', warn, withTempDir)
import Text.Pandoc.Options (WriterOptions(..))
import Text.Pandoc.MIME (extensionFromMimeType)
import Text.Pandoc.Process (pipeProcess)
@@ -55,14 +54,6 @@ import qualified Data.ByteString.Lazy as BL
import Data.List (intercalate)
#endif
-withTempDir :: String -> (FilePath -> IO a) -> IO a
-withTempDir =
-#ifdef _WINDOWS
- withTempDirectory "."
-#else
- withSystemTempDirectory
-#endif
-
#ifdef _WINDOWS
changePathSeparators :: FilePath -> FilePath
changePathSeparators = intercalate "/" . splitDirectories
@@ -74,26 +65,26 @@ makePDF :: String -- ^ pdf creator (pdflatex, lualatex, xelatex)
-> Pandoc -- ^ document
-> IO (Either ByteString ByteString)
makePDF program writer opts doc = withTempDir "tex2pdf." $ \tmpdir -> do
- doc' <- handleImages (writerSourceURL opts) tmpdir doc
+ doc' <- handleImages opts tmpdir doc
let source = writer opts doc'
tex2pdf' tmpdir program source
-handleImages :: Maybe String -- ^ source base URL
+handleImages :: WriterOptions
-> FilePath -- ^ temp dir to store images
-> Pandoc -- ^ document
-> IO Pandoc
-handleImages baseURL tmpdir = walkM (handleImage' baseURL tmpdir)
+handleImages opts tmpdir = walkM (handleImage' opts tmpdir)
-handleImage' :: Maybe String
+handleImage' :: WriterOptions
-> FilePath
-> Inline
-> IO Inline
-handleImage' baseURL tmpdir (Image ils (src,tit)) = do
+handleImage' opts tmpdir (Image ils (src,tit)) = do
exists <- doesFileExist src
if exists
then return $ Image ils (src,tit)
else do
- res <- fetchItem baseURL src
+ res <- fetchItem' (writerMediaBag opts) (writerSourceURL opts) src
case res of
Right (contents, Just mime) -> do
let ext = fromMaybe (takeExtension src) $
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 8bc042e28..d1fba1e21 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -1,5 +1,9 @@
-{-# LANGUAGE GeneralizedNewtypeDeriving, TypeSynonymInstances,
- FlexibleInstances#-}
+{-# LANGUAGE
+ FlexibleContexts
+, GeneralizedNewtypeDeriving
+, TypeSynonymInstances
+, MultiParamTypeClasses
+, FlexibleInstances #-}
{-
Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
@@ -29,8 +33,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
A utility library with parsers used in pandoc readers.
-}
-module Text.Pandoc.Parsing ( (>>~),
- anyLine,
+module Text.Pandoc.Parsing ( anyLine,
many1Till,
notFollowedBy',
oneOfStrings,
@@ -62,6 +65,7 @@ module Text.Pandoc.Parsing ( (>>~),
widthsFromIndices,
gridTableWith,
readWith,
+ readWithM,
testStringWith,
guardEnabled,
guardDisabled,
@@ -77,6 +81,7 @@ module Text.Pandoc.Parsing ( (>>~),
HeaderType (..),
ParserContext (..),
QuoteContext (..),
+ HasQuoteContext (..),
NoteTable,
NoteTable',
KeyTable,
@@ -85,7 +90,6 @@ module Text.Pandoc.Parsing ( (>>~),
toKey,
registerHeader,
smartPunctuation,
- withQuoteContext,
singleQuoteStart,
singleQuoteEnd,
doubleQuoteStart,
@@ -98,12 +102,16 @@ module Text.Pandoc.Parsing ( (>>~),
macro,
applyMacros',
Parser,
+ ParserT,
F(..),
runF,
askF,
asksF,
+ token,
-- * Re-exports from Text.Pandoc.Parsec
+ Stream,
runParser,
+ runParserT,
parse,
anyToken,
getInput,
@@ -154,7 +162,6 @@ module Text.Pandoc.Parsing ( (>>~),
setSourceColumn,
setSourceLine,
newPos,
- token
)
where
@@ -164,25 +171,29 @@ import Text.Pandoc.Builder (Blocks, Inlines, rawBlock, HasMeta(..))
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.XML (fromEntities)
import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
-import Text.Parsec
+import Text.Parsec hiding (token)
import Text.Parsec.Pos (newPos)
-import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum, isDigit,
+import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum,
isHexDigit, isSpace )
import Data.List ( intercalate, transpose )
import Text.Pandoc.Shared
import qualified Data.Map as M
-import Text.TeXMath.Macros (applyMacros, Macro, parseMacroDefinitions)
+import Text.TeXMath.Readers.TeX.Macros (applyMacros, Macro,
+ parseMacroDefinitions)
import Text.Pandoc.Compat.TagSoupEntity ( lookupEntity )
import Text.Pandoc.Asciify (toAsciiChar)
import Data.Default
import qualified Data.Set as Set
import Control.Monad.Reader
-import Control.Applicative ((*>), (<*), (<$), liftA2, Applicative)
+import Control.Monad.Identity
+import Control.Applicative ((<$>), (<*>), (*>), (<*), (<$), Applicative)
import Data.Monoid
import Data.Maybe (catMaybes)
type Parser t s = Parsec t s
+type ParserT = ParsecT
+
newtype F a = F { unF :: Reader ParserState a } deriving (Monad, Applicative, Functor)
runF :: F a -> ParserState -> a
@@ -199,13 +210,8 @@ instance Monoid a => Monoid (F a) where
mappend = liftM2 mappend
mconcat = liftM mconcat . sequence
--- | Like >>, but returns the operation on the left.
--- (Suggested by Tillmann Rendel on Haskell-cafe list.)
-(>>~) :: (Monad m) => m a -> m b -> m a
-a >>~ b = a >>= \x -> b >> return x
-
-- | Parse any line of text
-anyLine :: Parser [Char] st [Char]
+anyLine :: Stream [Char] m Char => ParserT [Char] st m [Char]
anyLine = do
-- This is much faster than:
-- manyTill anyChar newline
@@ -221,9 +227,10 @@ anyLine = do
_ -> mzero
-- | Like @manyTill@, but reads at least one item.
-many1Till :: Parser [tok] st a
- -> Parser [tok] st end
- -> Parser [tok] st [a]
+many1Till :: Stream s m t
+ => ParserT s st m a
+ -> ParserT s st m end
+ -> ParserT s st m [a]
many1Till p end = do
first <- p
rest <- manyTill p end
@@ -232,21 +239,21 @@ many1Till p end = do
-- | A more general form of @notFollowedBy@. This one allows any
-- type of parser to be specified, and succeeds only if that parser fails.
-- It does not consume any input.
-notFollowedBy' :: Show b => Parser [a] st b -> Parser [a] st ()
+notFollowedBy' :: (Show b, Stream s m a) => ParserT s st m b -> ParserT s st m ()
notFollowedBy' p = try $ join $ do a <- try p
return (unexpected (show a))
<|>
return (return ())
-- (This version due to Andrew Pimlott on the Haskell mailing list.)
-oneOfStrings' :: (Char -> Char -> Bool) -> [String] -> Parser [Char] st String
+oneOfStrings' :: Stream s m Char => (Char -> Char -> Bool) -> [String] -> ParserT s st m String
oneOfStrings' _ [] = fail "no strings"
oneOfStrings' matches strs = try $ do
c <- anyChar
let strs' = [xs | (x:xs) <- strs, x `matches` c]
case strs' of
[] -> fail "not found"
- _ -> (c:) `fmap` oneOfStrings' matches strs'
+ _ -> (c:) <$> oneOfStrings' matches strs'
<|> if "" `elem` strs'
then return [c]
else fail "not found"
@@ -254,11 +261,11 @@ oneOfStrings' matches strs = try $ do
-- | Parses one of a list of strings. If the list contains
-- two strings one of which is a prefix of the other, the longer
-- string will be matched if possible.
-oneOfStrings :: [String] -> Parser [Char] st String
+oneOfStrings :: Stream s m Char => [String] -> ParserT s st m String
oneOfStrings = oneOfStrings' (==)
-- | Parses one of a list of strings (tried in order), case insensitive.
-oneOfStringsCI :: [String] -> Parser [Char] st String
+oneOfStringsCI :: Stream s m Char => [String] -> ParserT s st m String
oneOfStringsCI = oneOfStrings' ciMatch
where ciMatch x y = toLower' x == toLower' y
-- this optimizes toLower by checking common ASCII case
@@ -269,35 +276,35 @@ oneOfStringsCI = oneOfStrings' ciMatch
| otherwise = toLower c
-- | Parses a space or tab.
-spaceChar :: Parser [Char] st Char
+spaceChar :: Stream s m Char => ParserT s st m Char
spaceChar = satisfy $ \c -> c == ' ' || c == '\t'
-- | Parses a nonspace, nonnewline character.
-nonspaceChar :: Parser [Char] st Char
+nonspaceChar :: Stream s m Char => ParserT s st m Char
nonspaceChar = satisfy $ flip notElem ['\t', '\n', ' ', '\r']
-- | Skips zero or more spaces or tabs.
-skipSpaces :: Parser [Char] st ()
+skipSpaces :: Stream s m Char => ParserT s st m ()
skipSpaces = skipMany spaceChar
-- | Skips zero or more spaces or tabs, then reads a newline.
-blankline :: Parser [Char] st Char
+blankline :: Stream s m Char => ParserT s st m Char
blankline = try $ skipSpaces >> newline
-- | Parses one or more blank lines and returns a string of newlines.
-blanklines :: Parser [Char] st [Char]
+blanklines :: Stream s m Char => ParserT s st m [Char]
blanklines = many1 blankline
-- | Parses material enclosed between start and end parsers.
-enclosed :: Parser [Char] st t -- ^ start parser
- -> Parser [Char] st end -- ^ end parser
- -> Parser [Char] st a -- ^ content parser (to be used repeatedly)
- -> Parser [Char] st [a]
+enclosed :: Stream s m Char => ParserT s st m t -- ^ start parser
+ -> ParserT s st m end -- ^ end parser
+ -> ParserT s st m a -- ^ content parser (to be used repeatedly)
+ -> ParserT s st m [a]
enclosed start end parser = try $
start >> notFollowedBy space >> many1Till parser end
-- | Parse string, case insensitive.
-stringAnyCase :: [Char] -> Parser [Char] st String
+stringAnyCase :: Stream s m Char => [Char] -> ParserT s st m String
stringAnyCase [] = string ""
stringAnyCase (x:xs) = do
firstChar <- char (toUpper x) <|> char (toLower x)
@@ -305,7 +312,7 @@ stringAnyCase (x:xs) = do
return (firstChar:rest)
-- | Parse contents of 'str' using 'parser' and return result.
-parseFromString :: Parser [tok] st a -> [tok] -> Parser [tok] st a
+parseFromString :: Stream s m t => ParserT s st m a -> s -> ParserT s st m a
parseFromString parser str = do
oldPos <- getPosition
oldInput <- getInput
@@ -316,7 +323,7 @@ parseFromString parser str = do
return result
-- | Parse raw line block up to and including blank lines.
-lineClump :: Parser [Char] st String
+lineClump :: Stream [Char] m Char => ParserT [Char] st m String
lineClump = blanklines
<|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines)
@@ -325,8 +332,8 @@ lineClump = blanklines
-- pairs of open and close, which must be different. For example,
-- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))"
-- and return "hello (there)".
-charsInBalanced :: Char -> Char -> Parser [Char] st Char
- -> Parser [Char] st String
+charsInBalanced :: Stream s m Char => Char -> Char -> ParserT s st m Char
+ -> ParserT s st m String
charsInBalanced open close parser = try $ do
char open
let isDelim c = c == open || c == close
@@ -350,8 +357,8 @@ uppercaseRomanDigits :: [Char]
uppercaseRomanDigits = map toUpper lowercaseRomanDigits
-- | Parses a roman numeral (uppercase or lowercase), returns number.
-romanNumeral :: Bool -- ^ Uppercase if true
- -> Parser [Char] st Int
+romanNumeral :: Stream s m Char => Bool -- ^ Uppercase if true
+ -> ParserT s st m Int
romanNumeral upperCase = do
let romanDigits = if upperCase
then uppercaseRomanDigits
@@ -383,12 +390,12 @@ romanNumeral upperCase = do
-- | Parses an email address; returns original and corresponding
-- escaped mailto: URI.
-emailAddress :: Parser [Char] st (String, String)
-emailAddress = try $ liftA2 toResult mailbox (char '@' *> domain)
+emailAddress :: Stream s m Char => ParserT s st m (String, String)
+emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
where toResult mbox dom = let full = fromEntities $ mbox ++ '@':dom
in (full, escapeURI $ "mailto:" ++ full)
- mailbox = intercalate "." `fmap` (emailWord `sepby1` dot)
- domain = intercalate "." `fmap` (subdomain `sepby1` dot)
+ mailbox = intercalate "." <$> (emailWord `sepby1` dot)
+ domain = intercalate "." <$> (subdomain `sepby1` dot)
dot = char '.'
subdomain = many1 $ alphaNum <|> innerPunct
innerPunct = try (satisfy (\c -> isEmailPunct c || c == '@') <*
@@ -398,11 +405,11 @@ emailAddress = try $ liftA2 toResult mailbox (char '@' *> domain)
isEmailPunct c = c `elem` "!\"#$%&'*+-/=?^_{|}~;"
-- note: sepBy1 from parsec consumes input when sep
-- succeeds and p fails, so we use this variant here.
- sepby1 p sep = liftA2 (:) p (many (try $ sep >> p))
+ sepby1 p sep = (:) <$> p <*> (many (try $ sep >> p))
-- Schemes from http://www.iana.org/assignments/uri-schemes.html plus
--- the unofficial schemes coap, doi, javascript.
+-- the unofficial schemes coap, doi, javascript, isbn, pmid
schemes :: [String]
schemes = ["coap","doi","javascript","aaa","aaas","about","acap","cap","cid",
"crid","data","dav","dict","dns","file","ftp","geo","go","gopher",
@@ -424,13 +431,13 @@ schemes = ["coap","doi","javascript","aaa","aaas","about","acap","cap","cid",
"rtmp","secondlife","sftp","sgn","skype","smb","soldat","spotify",
"ssh","steam","svn","teamspeak","things","udp","unreal","ut2004",
"ventrilo","view-source","webcal","wtai","wyciwyg","xfire","xri",
- "ymsgr"]
+ "ymsgr", "isbn", "pmid"]
-uriScheme :: Parser [Char] st String
+uriScheme :: Stream s m Char => ParserT s st m String
uriScheme = oneOfStringsCI schemes
-- | Parses a URI. Returns pair of original and URI-escaped version.
-uri :: Parser [Char] st (String, String)
+uri :: Stream [Char] m Char => ParserT [Char] st m (String, String)
uri = try $ do
scheme <- uriScheme
char ':'
@@ -451,7 +458,7 @@ uri = try $ do
<|> entity
<|> (try $ punct >>
lookAhead (void (satisfy isWordChar) <|> percentEscaped))
- str <- snd `fmap` withRaw (skipMany1 ( () <$
+ str <- snd <$> withRaw (skipMany1 ( () <$
(enclosed (char '(') (char ')') uriChunk
<|> enclosed (char '{') (char '}') uriChunk
<|> enclosed (char '[') (char ']') uriChunk)
@@ -460,7 +467,7 @@ uri = try $ do
let uri' = scheme ++ ":" ++ fromEntities str'
return (uri', escapeURI uri')
-mathInlineWith :: String -> String -> Parser [Char] st String
+mathInlineWith :: Stream s m Char => String -> String -> ParserT s st m String
mathInlineWith op cl = try $ do
string op
notFollowedBy space
@@ -474,12 +481,13 @@ mathInlineWith op cl = try $ do
notFollowedBy digit -- to prevent capture of $5
return $ concat words'
-mathDisplayWith :: String -> String -> Parser [Char] st String
+mathDisplayWith :: Stream s m Char => String -> String -> ParserT s st m String
mathDisplayWith op cl = try $ do
string op
- many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string cl)
+ many1Till (noneOf "\n" <|> (newline <* notFollowedBy' blankline)) (try $ string cl)
-mathDisplay :: Parser [Char] ParserState String
+mathDisplay :: (HasReaderOptions st, Stream s m Char)
+ => ParserT s st m String
mathDisplay =
(guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
<|> (guardEnabled Ext_tex_math_single_backslash >>
@@ -487,7 +495,8 @@ mathDisplay =
<|> (guardEnabled Ext_tex_math_double_backslash >>
mathDisplayWith "\\\\[" "\\\\]")
-mathInline :: Parser [Char] ParserState String
+mathInline :: (HasReaderOptions st , Stream s m Char)
+ => ParserT s st m String
mathInline =
(guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
<|> (guardEnabled Ext_tex_math_single_backslash >>
@@ -499,8 +508,9 @@ mathInline =
-- displacement (the difference between the source column at the end
-- and the source column at the beginning). Vertical displacement
-- (source row) is ignored.
-withHorizDisplacement :: Parser [Char] st a -- ^ Parser to apply
- -> Parser [Char] st (a, Int) -- ^ (result, displacement)
+withHorizDisplacement :: Stream s m Char
+ => ParserT s st m a -- ^ Parser to apply
+ -> ParserT s st m (a, Int) -- ^ (result, displacement)
withHorizDisplacement parser = do
pos1 <- getPosition
result <- parser
@@ -509,7 +519,7 @@ withHorizDisplacement parser = do
-- | Applies a parser and returns the raw string that was parsed,
-- along with the value produced by the parser.
-withRaw :: Monad m => ParsecT [Char] st m a -> ParsecT [Char] st m (a, [Char])
+withRaw :: Stream [Char] m Char => ParsecT [Char] st m a -> ParsecT [Char] st m (a, [Char])
withRaw parser = do
pos1 <- getPosition
inp <- getInput
@@ -525,12 +535,13 @@ withRaw parser = do
return (result, raw)
-- | Parses backslash, then applies character parser.
-escaped :: Parser [Char] st Char -- ^ Parser for character to escape
- -> Parser [Char] st Char
+escaped :: Stream s m Char
+ => ParserT s st m Char -- ^ Parser for character to escape
+ -> ParserT s st m Char
escaped parser = try $ char '\\' >> parser
-- | Parse character entity.
-characterReference :: Parser [Char] st Char
+characterReference :: Stream s m Char => ParserT s st m Char
characterReference = try $ do
char '&'
ent <- many1Till nonspaceChar (char ';')
@@ -539,19 +550,19 @@ characterReference = try $ do
Nothing -> fail "entity not found"
-- | Parses an uppercase roman numeral and returns (UpperRoman, number).
-upperRoman :: Parser [Char] st (ListNumberStyle, Int)
+upperRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
upperRoman = do
num <- romanNumeral True
return (UpperRoman, num)
-- | Parses a lowercase roman numeral and returns (LowerRoman, number).
-lowerRoman :: Parser [Char] st (ListNumberStyle, Int)
+lowerRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
lowerRoman = do
num <- romanNumeral False
return (LowerRoman, num)
-- | Parses a decimal numeral and returns (Decimal, number).
-decimal :: Parser [Char] st (ListNumberStyle, Int)
+decimal :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
decimal = do
num <- many1 digit
return (Decimal, read num)
@@ -560,7 +571,8 @@ decimal = do
-- returns (DefaultStyle, [next example number]). The next
-- example number is incremented in parser state, and the label
-- (if present) is added to the label table.
-exampleNum :: Parser [Char] ParserState (ListNumberStyle, Int)
+exampleNum :: Stream s m Char
+ => ParserT s ParserState m (ListNumberStyle, Int)
exampleNum = do
char '@'
lab <- many (alphaNum <|> satisfy (\c -> c == '_' || c == '-'))
@@ -574,38 +586,39 @@ exampleNum = do
return (Example, num)
-- | Parses a '#' returns (DefaultStyle, 1).
-defaultNum :: Parser [Char] st (ListNumberStyle, Int)
+defaultNum :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
defaultNum = do
char '#'
return (DefaultStyle, 1)
-- | Parses a lowercase letter and returns (LowerAlpha, number).
-lowerAlpha :: Parser [Char] st (ListNumberStyle, Int)
+lowerAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
lowerAlpha = do
ch <- oneOf ['a'..'z']
return (LowerAlpha, ord ch - ord 'a' + 1)
-- | Parses an uppercase letter and returns (UpperAlpha, number).
-upperAlpha :: Parser [Char] st (ListNumberStyle, Int)
+upperAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
upperAlpha = do
ch <- oneOf ['A'..'Z']
return (UpperAlpha, ord ch - ord 'A' + 1)
-- | Parses a roman numeral i or I
-romanOne :: Parser [Char] st (ListNumberStyle, Int)
+romanOne :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
(char 'I' >> return (UpperRoman, 1))
-- | Parses an ordered list marker and returns list attributes.
-anyOrderedListMarker :: Parser [Char] ParserState ListAttributes
+anyOrderedListMarker :: Stream s m Char => ParserT s ParserState m ListAttributes
anyOrderedListMarker = choice $
[delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
numParser <- [decimal, exampleNum, defaultNum, romanOne,
lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
-- | Parses a list number (num) followed by a period, returns list attributes.
-inPeriod :: Parser [Char] st (ListNumberStyle, Int)
- -> Parser [Char] st ListAttributes
+inPeriod :: Stream s m Char
+ => ParserT s st m (ListNumberStyle, Int)
+ -> ParserT s st m ListAttributes
inPeriod num = try $ do
(style, start) <- num
char '.'
@@ -615,16 +628,18 @@ inPeriod num = try $ do
return (start, style, delim)
-- | Parses a list number (num) followed by a paren, returns list attributes.
-inOneParen :: Parser [Char] st (ListNumberStyle, Int)
- -> Parser [Char] st ListAttributes
+inOneParen :: Stream s m Char
+ => ParserT s st m (ListNumberStyle, Int)
+ -> ParserT s st m ListAttributes
inOneParen num = try $ do
(style, start) <- num
char ')'
return (start, style, OneParen)
-- | Parses a list number (num) enclosed in parens, returns list attributes.
-inTwoParens :: Parser [Char] st (ListNumberStyle, Int)
- -> Parser [Char] st ListAttributes
+inTwoParens :: Stream s m Char
+ => ParserT s st m (ListNumberStyle, Int)
+ -> ParserT s st m ListAttributes
inTwoParens num = try $ do
char '('
(style, start) <- num
@@ -633,9 +648,10 @@ inTwoParens num = try $ do
-- | Parses an ordered list marker with a given style and delimiter,
-- returns number.
-orderedListMarker :: ListNumberStyle
+orderedListMarker :: Stream s m Char
+ => ListNumberStyle
-> ListNumberDelim
- -> Parser [Char] ParserState Int
+ -> ParserT s ParserState m Int
orderedListMarker style delim = do
let num = defaultNum <|> -- # can continue any kind of list
case style of
@@ -655,12 +671,12 @@ orderedListMarker style delim = do
return start
-- | Parses a character reference and returns a Str element.
-charRef :: Parser [Char] st Inline
+charRef :: Stream s m Char => ParserT s st m Inline
charRef = do
c <- characterReference
return $ Str [c]
-lineBlockLine :: Parser [Char] st String
+lineBlockLine :: Stream [Char] m Char => ParserT [Char] st m String
lineBlockLine = try $ do
char '|'
char ' '
@@ -671,7 +687,7 @@ lineBlockLine = try $ do
return $ white ++ unwords (line : continuations)
-- | Parses an RST-style line block and returns a list of strings.
-lineBlockLines :: Parser [Char] st [String]
+lineBlockLines :: Stream [Char] m Char => ParserT [Char] st m [String]
lineBlockLines = try $ do
lines' <- many1 lineBlockLine
skipMany1 $ blankline <|> try (char '|' >> blankline)
@@ -679,11 +695,12 @@ lineBlockLines = try $ do
-- | Parse a table using 'headerParser', 'rowParser',
-- 'lineParser', and 'footerParser'.
-tableWith :: Parser [Char] ParserState ([[Block]], [Alignment], [Int])
- -> ([Int] -> Parser [Char] ParserState [[Block]])
- -> Parser [Char] ParserState sep
- -> Parser [Char] ParserState end
- -> Parser [Char] ParserState Block
+tableWith :: Stream s m Char
+ => ParserT s ParserState m ([[Block]], [Alignment], [Int])
+ -> ([Int] -> ParserT s ParserState m [[Block]])
+ -> ParserT s ParserState m sep
+ -> ParserT s ParserState m end
+ -> ParserT s ParserState m Block
tableWith headerParser rowParser lineParser footerParser = try $ do
(heads, aligns, indices) <- headerParser
lines' <- rowParser indices `sepEndBy1` lineParser
@@ -725,9 +742,10 @@ widthsFromIndices numColumns' indices =
-- (which may be grid), then the rows,
-- which may be grid, separated by blank lines, and
-- ending with a footer (dashed line followed by blank line).
-gridTableWith :: Parser [Char] ParserState [Block] -- ^ Block list parser
+gridTableWith :: Stream [Char] m Char
+ => ParserT [Char] ParserState m [Block] -- ^ Block list parser
-> Bool -- ^ Headerless table
- -> Parser [Char] ParserState Block
+ -> ParserT [Char] ParserState m Block
gridTableWith blocks headless =
tableWith (gridTableHeader headless blocks) (gridTableRow blocks)
(gridTableSep '-') gridTableFooter
@@ -736,27 +754,28 @@ gridTableSplitLine :: [Int] -> String -> [String]
gridTableSplitLine indices line = map removeFinalBar $ tail $
splitStringByIndices (init indices) $ trimr line
-gridPart :: Char -> Parser [Char] st (Int, Int)
+gridPart :: Stream s m Char => Char -> ParserT s st m (Int, Int)
gridPart ch = do
dashes <- many1 (char ch)
char '+'
return (length dashes, length dashes + 1)
-gridDashedLines :: Char -> Parser [Char] st [(Int,Int)]
-gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline
+gridDashedLines :: Stream s m Char => Char -> ParserT s st m [(Int,Int)]
+gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
removeFinalBar :: String -> String
removeFinalBar =
reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse
-- | Separator between rows of grid table.
-gridTableSep :: Char -> Parser [Char] ParserState Char
+gridTableSep :: Stream s m Char => Char -> ParserT s ParserState m Char
gridTableSep ch = try $ gridDashedLines ch >> return '\n'
-- | Parse header for a grid table.
-gridTableHeader :: Bool -- ^ Headerless table
- -> Parser [Char] ParserState [Block]
- -> Parser [Char] ParserState ([[Block]], [Alignment], [Int])
+gridTableHeader :: Stream [Char] m Char
+ => Bool -- ^ Headerless table
+ -> ParserT [Char] ParserState m [Block]
+ -> ParserT [Char] ParserState m ([[Block]], [Alignment], [Int])
gridTableHeader headless blocks = try $ do
optional blanklines
dashes <- gridDashedLines '-'
@@ -779,16 +798,17 @@ gridTableHeader headless blocks = try $ do
heads <- mapM (parseFromString blocks) $ map trim rawHeads
return (heads, aligns, indices)
-gridTableRawLine :: [Int] -> Parser [Char] ParserState [String]
+gridTableRawLine :: Stream s m Char => [Int] -> ParserT s ParserState m [String]
gridTableRawLine indices = do
char '|'
line <- many1Till anyChar newline
return (gridTableSplitLine indices line)
-- | Parse row of grid table.
-gridTableRow :: Parser [Char] ParserState [Block]
+gridTableRow :: Stream [Char] m Char
+ => ParserT [Char] ParserState m [Block]
-> [Int]
- -> Parser [Char] ParserState [[Block]]
+ -> ParserT [Char] ParserState m [[Block]]
gridTableRow blocks indices = do
colLines <- many1 (gridTableRawLine indices)
let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $
@@ -807,19 +827,21 @@ compactifyCell :: [Block] -> [Block]
compactifyCell bs = head $ compactify [bs]
-- | Parse footer for a grid table.
-gridTableFooter :: Parser [Char] ParserState [Char]
+gridTableFooter :: Stream s m Char => ParserT s ParserState m [Char]
gridTableFooter = blanklines
---
--- | Parse a string with a given parser and state.
-readWith :: Parser [Char] st a -- ^ parser
- -> st -- ^ initial state
- -> [Char] -- ^ input
- -> a
-readWith parser state input =
- case runParser parser state "source" input of
- Left err' ->
+-- | Removes the ParsecT layer from the monad transformer stack
+readWithM :: (Monad m, Functor m)
+ => ParserT [Char] st m a -- ^ parser
+ -> st -- ^ initial state
+ -> String -- ^ input
+ -> m a
+readWithM parser state input =
+ handleError <$> (runParserT parser state "source" input)
+ where
+ handleError (Left err') =
let errPos = errorPos err'
errLine = sourceLine errPos
errColumn = sourceColumn errPos
@@ -827,11 +849,19 @@ readWith parser state input =
in error $ "\nError at " ++ show err' ++ "\n" ++
theline ++ "\n" ++ replicate (errColumn - 1) ' ' ++
"^"
- Right result -> result
+ handleError (Right result) = result
+
+-- | Parse a string with a given parser and state
+readWith :: Parser [Char] st a
+ -> st
+ -> String
+ -> a
+readWith p t inp = runIdentity $ readWithM p t inp
-- | Parse a string with @parser@ (for testing).
-testStringWith :: (Show a) => Parser [Char] ParserState a
- -> String
+testStringWith :: (Show a, Stream [Char] Identity Char)
+ => ParserT [Char] ParserState Identity a
+ -> [Char]
-> IO ()
testStringWith parser str = UTF8.putStrLn $ show $
readWith parser defaultParserState str
@@ -863,6 +893,8 @@ data ParserState = ParserState
-- roles), 3) Source language annotation for code (could be used to
-- annotate role classes too).
stateCaption :: Maybe Inlines, -- ^ Caption in current environment
+ stateInHtmlBlock :: Maybe String, -- ^ Tag type of HTML block being parsed
+ stateMarkdownAttribute :: Bool, -- ^ True if in markdown=1 context
stateWarnings :: [String] -- ^ Warnings generated by the parser
}
@@ -877,9 +909,24 @@ instance HasMeta ParserState where
class HasReaderOptions st where
extractReaderOptions :: st -> ReaderOptions
- getOption :: (ReaderOptions -> b) -> Parser s st b
+ getOption :: (Stream s m t) => (ReaderOptions -> b) -> ParserT s st m b
-- default
- getOption f = (f . extractReaderOptions) `fmap` getState
+ getOption f = (f . extractReaderOptions) <$> getState
+
+class HasQuoteContext st m where
+ getQuoteContext :: (Stream s m t) => ParsecT s st m QuoteContext
+ withQuoteContext :: QuoteContext -> ParsecT s st m a -> ParsecT s st m a
+
+instance Monad m => HasQuoteContext ParserState m where
+ getQuoteContext = stateQuoteContext <$> getState
+ withQuoteContext context parser = do
+ oldState <- getState
+ let oldQuoteContext = stateQuoteContext oldState
+ setState oldState { stateQuoteContext = context }
+ result <- parser
+ newState <- getState
+ setState newState { stateQuoteContext = oldQuoteContext }
+ return result
instance HasReaderOptions ParserState where
extractReaderOptions = stateOptions
@@ -941,22 +988,24 @@ defaultParserState =
stateRstDefaultRole = "title-reference",
stateRstCustomRoles = M.empty,
stateCaption = Nothing,
+ stateInHtmlBlock = Nothing,
+ stateMarkdownAttribute = False,
stateWarnings = []}
-- | Succeed only if the extension is enabled.
-guardEnabled :: HasReaderOptions st => Extension -> Parser s st ()
+guardEnabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m ()
guardEnabled ext = getOption readerExtensions >>= guard . Set.member ext
-- | Succeed only if the extension is disabled.
-guardDisabled :: HasReaderOptions st => Extension -> Parser s st ()
+guardDisabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m ()
guardDisabled ext = getOption readerExtensions >>= guard . not . Set.member ext
-- | Update the position on which the last string ended.
-updateLastStrPos :: HasLastStrPosition st => Parser s st ()
+updateLastStrPos :: (Stream s m a, HasLastStrPosition st) => ParserT s st m ()
updateLastStrPos = getPosition >>= updateState . setLastStrPos
-- | Whether we are right after the end of a string.
-notAfterString :: HasLastStrPosition st => Parser s st Bool
+notAfterString :: (Stream s m a, HasLastStrPosition st) => ParserT s st m Bool
notAfterString = do
pos <- getPosition
st <- getState
@@ -996,10 +1045,10 @@ type SubstTable = M.Map Key Inlines
-- and the auto_identifers extension is set, generate a new
-- unique identifier, and update the list of identifiers
-- in state.
-registerHeader :: (HasReaderOptions st, HasHeaderMap st, HasIdentifierList st)
- => Attr -> Inlines -> Parser s st Attr
+registerHeader :: (Stream s m a, HasReaderOptions st, HasHeaderMap st, HasIdentifierList st)
+ => Attr -> Inlines -> ParserT s st m Attr
registerHeader (ident,classes,kvs) header' = do
- ids <- extractIdentifierList `fmap` getState
+ ids <- extractIdentifierList <$> getState
exts <- getOption readerExtensions
let insert' = M.insertWith (\_new old -> old)
if null ident && Ext_auto_identifiers `Set.member` exts
@@ -1018,136 +1067,113 @@ registerHeader (ident,classes,kvs) header' = do
return (ident,classes,kvs)
-- | Fail unless we're in "smart typography" mode.
-failUnlessSmart :: HasReaderOptions st => Parser s st ()
+failUnlessSmart :: (Stream s m a, HasReaderOptions st) => ParserT s st m ()
failUnlessSmart = getOption readerSmart >>= guard
-smartPunctuation :: Parser [Char] ParserState Inlines
- -> Parser [Char] ParserState Inlines
+smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+ => ParserT s st m Inlines
+ -> ParserT s st m Inlines
smartPunctuation inlineParser = do
failUnlessSmart
choice [ quoted inlineParser, apostrophe, dash, ellipses ]
-apostrophe :: Parser [Char] ParserState Inlines
+apostrophe :: Stream s m Char => ParserT s st m Inlines
apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\x2019")
-quoted :: Parser [Char] ParserState Inlines
- -> Parser [Char] ParserState Inlines
+quoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+ => ParserT s st m Inlines
+ -> ParserT s st m Inlines
quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
-withQuoteContext :: QuoteContext
- -> Parser [tok] ParserState a
- -> Parser [tok] ParserState a
-withQuoteContext context parser = do
- oldState <- getState
- let oldQuoteContext = stateQuoteContext oldState
- setState oldState { stateQuoteContext = context }
- result <- parser
- newState <- getState
- setState newState { stateQuoteContext = oldQuoteContext }
- return result
-
-singleQuoted :: Parser [Char] ParserState Inlines
- -> Parser [Char] ParserState Inlines
+singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+ => ParserT s st m Inlines
+ -> ParserT s st m Inlines
singleQuoted inlineParser = try $ do
singleQuoteStart
withQuoteContext InSingleQuote $ many1Till inlineParser singleQuoteEnd >>=
return . B.singleQuoted . mconcat
-doubleQuoted :: Parser [Char] ParserState Inlines
- -> Parser [Char] ParserState Inlines
+doubleQuoted :: (HasQuoteContext st m, Stream s m Char)
+ => ParserT s st m Inlines
+ -> ParserT s st m Inlines
doubleQuoted inlineParser = try $ do
doubleQuoteStart
withQuoteContext InDoubleQuote $ manyTill inlineParser doubleQuoteEnd >>=
return . B.doubleQuoted . mconcat
-failIfInQuoteContext :: QuoteContext -> Parser [tok] ParserState ()
+failIfInQuoteContext :: (HasQuoteContext st m, Stream s m t)
+ => QuoteContext
+ -> ParserT s st m ()
failIfInQuoteContext context = do
- st <- getState
- if stateQuoteContext st == context
+ context' <- getQuoteContext
+ if context' == context
then fail "already inside quotes"
else return ()
-charOrRef :: [Char] -> Parser [Char] st Char
+charOrRef :: Stream s m Char => String -> ParserT s st m Char
charOrRef cs =
oneOf cs <|> try (do c <- characterReference
guard (c `elem` cs)
return c)
-singleQuoteStart :: Parser [Char] ParserState ()
+singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+ => ParserT s st m ()
singleQuoteStart = do
failIfInQuoteContext InSingleQuote
-- single quote start can't be right after str
guard =<< notAfterString
() <$ charOrRef "'\8216\145"
-singleQuoteEnd :: Parser [Char] st ()
+singleQuoteEnd :: Stream s m Char
+ => ParserT s st m ()
singleQuoteEnd = try $ do
charOrRef "'\8217\146"
notFollowedBy alphaNum
-doubleQuoteStart :: Parser [Char] ParserState ()
+doubleQuoteStart :: (HasQuoteContext st m, Stream s m Char)
+ => ParserT s st m ()
doubleQuoteStart = do
failIfInQuoteContext InDoubleQuote
try $ do charOrRef "\"\8220\147"
notFollowedBy . satisfy $ flip elem [' ', '\t', '\n']
-doubleQuoteEnd :: Parser [Char] st ()
-doubleQuoteEnd = do
- charOrRef "\"\8221\148"
- return ()
+doubleQuoteEnd :: Stream s m Char
+ => ParserT s st m ()
+doubleQuoteEnd = void (charOrRef "\"\8221\148")
-ellipses :: Parser [Char] st Inlines
-ellipses = do
- try (charOrRef "\8230\133") <|> try (string "..." >> return '…')
- return (B.str "\8230")
+ellipses :: Stream s m Char
+ => ParserT s st m Inlines
+ellipses = try (string "..." >> return (B.str "\8230"))
-dash :: Parser [Char] ParserState Inlines
-dash = do
+dash :: (HasReaderOptions st, Stream s m Char)
+ => ParserT s st m Inlines
+dash = try $ do
oldDashes <- getOption readerOldDashes
if oldDashes
- then emDashOld <|> enDashOld
- else B.str `fmap` (hyphenDash <|> emDash <|> enDash)
-
--- Two hyphens = en-dash, three = em-dash
-hyphenDash :: Parser [Char] st String
-hyphenDash = do
- try $ string "--"
- option "\8211" (char '-' >> return "\8212")
-
-emDash :: Parser [Char] st String
-emDash = do
- try (charOrRef "\8212\151")
- return "\8212"
-
-enDash :: Parser [Char] st String
-enDash = do
- try (charOrRef "\8212\151")
- return "\8211"
-
-enDashOld :: Parser [Char] st Inlines
-enDashOld = do
- try (charOrRef "\8211\150") <|>
- try (char '-' >> lookAhead (satisfy isDigit) >> return '–')
- return (B.str "\8211")
-
-emDashOld :: Parser [Char] st Inlines
-emDashOld = do
- try (charOrRef "\8212\151") <|> (try $ string "--" >> optional (char '-') >> return '-')
- return (B.str "\8212")
+ then do
+ char '-'
+ (char '-' >> return (B.str "\8212"))
+ <|> (lookAhead digit >> return (B.str "\8211"))
+ else do
+ string "--"
+ (char '-' >> return (B.str "\8212"))
+ <|> return (B.str "\8211")
-- This is used to prevent exponential blowups for things like:
-- a**a*a**a*a**a*a**a*a**a*a**a*a**
-nested :: Parser s ParserState a
- -> Parser s ParserState a
+nested :: Stream s m a
+ => ParserT s ParserState m a
+ -> ParserT s ParserState m a
nested p = do
- nestlevel <- stateMaxNestingLevel `fmap` getState
+ nestlevel <- stateMaxNestingLevel <$> getState
guard $ nestlevel > 0
updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 }
res <- p
updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
return res
-citeKey :: HasLastStrPosition st => Parser [Char] st (Bool, String)
+citeKey :: (Stream s m Char, HasLastStrPosition st)
+ => ParserT s st m (Bool, String)
citeKey = try $ do
guard =<< notAfterString
suppress_author <- option False (char '-' *> return True)
@@ -1159,12 +1185,21 @@ citeKey = try $ do
let key = firstChar:rest
return (suppress_author, key)
+
+token :: (Stream s m t)
+ => (t -> String)
+ -> (t -> SourcePos)
+ -> (t -> Maybe a)
+ -> ParsecT s st m a
+token pp pos match = tokenPrim pp (\_ t _ -> pos t) match
+
--
-- Macros
--
-- | Parse a \newcommand or \renewcommand macro definition.
-macro :: (HasMacros st, HasReaderOptions st) => Parser [Char] st Blocks
+macro :: (Stream [Char] m Char, HasMacros st, HasReaderOptions st)
+ => ParserT [Char] st m Blocks
macro = do
apply <- getOption readerApplyMacros
inp <- getInput
@@ -1179,10 +1214,12 @@ macro = do
else return $ rawBlock "latex" def'
-- | Apply current macros to string.
-applyMacros' :: String -> Parser [Char] ParserState String
+applyMacros' :: (HasReaderOptions st, HasMacros st, Stream [Char] m Char)
+ => String
+ -> ParserT [Char] st m String
applyMacros' target = do
apply <- getOption readerApplyMacros
if apply
- then do macros <- extractMacros `fmap` getState
+ then do macros <- extractMacros <$> getState
return $ applyMacros macros target
else return target
diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs
index d25ba725f..1e72c2040 100644
--- a/src/Text/Pandoc/Pretty.hs
+++ b/src/Text/Pandoc/Pretty.hs
@@ -35,6 +35,7 @@ module Text.Pandoc.Pretty (
, render
, cr
, blankline
+ , blanklines
, space
, text
, char
@@ -100,7 +101,7 @@ data D = Text Int String
| BreakingSpace
| CarriageReturn
| NewLine
- | BlankLine
+ | BlankLines Int -- number of blank lines
deriving (Show)
newtype Doc = Doc { unDoc :: Seq D }
@@ -113,7 +114,7 @@ isBlank :: D -> Bool
isBlank BreakingSpace = True
isBlank CarriageReturn = True
isBlank NewLine = True
-isBlank BlankLine = True
+isBlank (BlankLines _) = True
isBlank (Text _ (c:_)) = isSpace c
isBlank _ = False
@@ -190,7 +191,7 @@ vsep = foldr ($+$) empty
nestle :: Doc -> Doc
nestle (Doc d) = Doc $ go d
where go x = case viewl x of
- (BlankLine :< rest) -> go rest
+ (BlankLines _ :< rest) -> go rest
(NewLine :< rest) -> go rest
_ -> x
@@ -203,7 +204,7 @@ chomp d = Doc (fromList dl')
go (BreakingSpace : xs) = go xs
go (CarriageReturn : xs) = go xs
go (NewLine : xs) = go xs
- go (BlankLine : xs) = go xs
+ go (BlankLines _ : xs) = go xs
go (Prefixed s d' : xs) = Prefixed s (chomp d') : xs
go xs = xs
@@ -216,9 +217,10 @@ outp off s | off < 0 = do -- offset < 0 means newline characters
let pref = reverse $ dropWhile isSpace $ reverse rawpref
modify $ \st -> st{ output = fromString pref : output st
, column = column st + realLength pref }
+ let numnewlines = length $ takeWhile (=='\n') $ reverse s
modify $ \st -> st { output = fromString s : output st
, column = 0
- , newlines = newlines st + 1 }
+ , newlines = newlines st + numnewlines }
outp off s = do -- offset >= 0 (0 might be combining char)
st' <- get
let pref = prefix st'
@@ -277,15 +279,11 @@ renderList (BeforeNonBlank d : xs) =
| otherwise -> renderDoc d >> renderList xs
[] -> renderList xs
-renderList (BlankLine : xs) = do
+renderList (BlankLines num : xs) = do
st <- get
case output st of
- _ | newlines st > 1 || null xs -> return ()
- _ | column st == 0 -> do
- outp (-1) "\n"
- _ -> do
- outp (-1) "\n"
- outp (-1) "\n"
+ _ | newlines st > num || null xs -> return ()
+ | otherwise -> replicateM_ (1 + num - newlines st) (outp (-1) "\n")
renderList xs
renderList (CarriageReturn : xs) = do
@@ -302,7 +300,7 @@ renderList (NewLine : xs) = do
renderList (BreakingSpace : CarriageReturn : xs) = renderList (CarriageReturn:xs)
renderList (BreakingSpace : NewLine : xs) = renderList (NewLine:xs)
-renderList (BreakingSpace : BlankLine : xs) = renderList (BlankLine:xs)
+renderList (BreakingSpace : BlankLines n : xs) = renderList (BlankLines n:xs)
renderList (BreakingSpace : BreakingSpace : xs) = renderList (BreakingSpace:xs)
renderList (BreakingSpace : xs) = do
let isText (Text _ _) = True
@@ -383,9 +381,13 @@ cr = Doc $ singleton CarriageReturn
-- | Inserts a blank line unless one exists already.
-- (@blankline <> blankline@ has the same effect as @blankline@.
--- If you want multiple blank lines, use @text "\\n\\n"@.
blankline :: Doc
-blankline = Doc $ singleton BlankLine
+blankline = Doc $ singleton (BlankLines 1)
+
+-- | Inserts a blank lines unless they exists already.
+-- (@blanklines m <> blanklines n@ has the same effect as @blankline (max m n)@.
+blanklines :: Int -> Doc
+blanklines n = Doc $ singleton (BlankLines n)
-- | Uses the specified string as a prefix for every line of
-- the inside document (except the first, if not at the beginning
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index cf1d5132e..1e119e729 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -769,7 +769,12 @@ parseBlock (Elem e) =
"" -> []
x -> [x]
return $ codeBlockWith (attrValue "id" e, classes', [])
- $ trimNl $ strContent e
+ $ trimNl $ strContentRecursive e
+ strContentRecursive = strContent . (\e' -> e'{ elContent =
+ map elementToStr $ elContent e' })
+ elementToStr :: Content -> Content
+ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
+ elementToStr x = x
parseBlockquote = do
attrib <- case filterChild (named "attribution") e of
Nothing -> return mempty
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 09c2330fb..085ee01fc 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -1,3 +1,5 @@
+{-# LANGUAGE PatternGuards #-}
+
{-
Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu>
@@ -76,48 +78,188 @@ import Codec.Archive.Zip
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Builder (text, toList)
-import Text.Pandoc.Generic (bottomUp)
-import Text.Pandoc.MIME (getMimeType)
-import Text.Pandoc.UTF8 (toString)
+import Text.Pandoc.Walk
import Text.Pandoc.Readers.Docx.Parse
import Text.Pandoc.Readers.Docx.Lists
-import Data.Maybe (mapMaybe, isJust, fromJust)
-import Data.List (delete, isPrefixOf, (\\), intersect)
-import qualified Data.ByteString as BS
+import Text.Pandoc.Readers.Docx.Reducible
+import Text.Pandoc.Shared
+import Text.Pandoc.MediaBag (insertMedia, MediaBag)
+import Data.Maybe (mapMaybe)
+import Data.List (delete, stripPrefix, (\\), intersect)
+import Data.Monoid
+import Text.TeXMath (writeTeX)
+import Data.Default (Default)
import qualified Data.ByteString.Lazy as B
-import Data.ByteString.Base64 (encode)
-import System.FilePath (combine)
+import qualified Data.Map as M
+import Control.Monad.Reader
+import Control.Monad.State
+import Control.Applicative ((<$>))
readDocx :: ReaderOptions
-> B.ByteString
- -> Pandoc
+ -> (Pandoc, MediaBag)
readDocx opts bytes =
case archiveToDocx (toArchive bytes) of
- Just docx -> Pandoc nullMeta (docxToBlocks opts docx)
- Nothing -> error $ "couldn't parse docx file"
-
-runStyleToSpanAttr :: RunStyle -> (String, [String], [(String, String)])
-runStyleToSpanAttr rPr = ("",
- mapMaybe id [
- if isBold rPr then (Just "strong") else Nothing,
- if isItalic rPr then (Just "emph") else Nothing,
- if isSmallCaps rPr then (Just "smallcaps") else Nothing,
- if isStrike rPr then (Just "strike") else Nothing,
- if isSuperScript rPr then (Just "superscript") else Nothing,
- if isSubScript rPr then (Just "subscript") else Nothing,
- rStyle rPr],
- case underline rPr of
- Just fmt -> [("underline", fmt)]
- _ -> []
- )
-
-parStyleToDivAttr :: ParagraphStyle -> (String, [String], [(String, String)])
-parStyleToDivAttr pPr = ("",
- pStyle pPr,
- case indent pPr of
- Just n -> [("indent", (show n))]
- Nothing -> []
- )
+ Right docx -> (Pandoc meta blks, mediaBag) where
+ (meta, blks, mediaBag) = (docxToOutput opts docx)
+ Left _ -> error $ "couldn't parse docx file"
+
+data DState = DState { docxAnchorMap :: M.Map String String
+ , docxMediaBag :: MediaBag }
+
+instance Default DState where
+ def = DState { docxAnchorMap = M.empty
+ , docxMediaBag = mempty }
+
+data DEnv = DEnv { docxOptions :: ReaderOptions
+ , docxInHeaderBlock :: Bool }
+
+instance Default DEnv where
+ def = DEnv def False
+
+type DocxContext = ReaderT DEnv (State DState)
+
+evalDocxContext :: DocxContext a -> DEnv -> DState -> a
+evalDocxContext ctx env st = evalState (runReaderT ctx env) st
+
+concatMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b]
+concatMapM f xs = liftM concat (mapM f xs)
+
+-- This is empty, but we put it in for future-proofing.
+spansToKeep :: [String]
+spansToKeep = []
+
+divsToKeep :: [String]
+divsToKeep = ["list-item", "Definition", "DefinitionTerm"]
+
+metaStyles :: M.Map String String
+metaStyles = M.fromList [ ("Title", "title")
+ , ("Subtitle", "subtitle")
+ , ("Author", "author")
+ , ("Date", "date")
+ , ("Abstract", "abstract")]
+
+sepBodyParts :: [BodyPart] -> ([BodyPart], [BodyPart])
+sepBodyParts = span (\bp -> (isMetaPar bp || isEmptyPar bp))
+
+isMetaPar :: BodyPart -> Bool
+isMetaPar (Paragraph pPr _) =
+ not $ null $ intersect (pStyle pPr) (M.keys metaStyles)
+isMetaPar _ = False
+
+isEmptyPar :: BodyPart -> Bool
+isEmptyPar (Paragraph _ parParts) =
+ all isEmptyParPart parParts
+ where
+ isEmptyParPart (PlainRun (Run _ runElems)) = all isEmptyElem runElems
+ isEmptyParPart _ = False
+ isEmptyElem (TextRun s) = trim s == ""
+ isEmptyElem _ = True
+isEmptyPar _ = False
+
+bodyPartsToMeta' :: [BodyPart] -> DocxContext (M.Map String MetaValue)
+bodyPartsToMeta' [] = return M.empty
+bodyPartsToMeta' (bp : bps)
+ | (Paragraph pPr parParts) <- bp
+ , (c : _)<- intersect (pStyle pPr) (M.keys metaStyles)
+ , (Just metaField) <- M.lookup c metaStyles = do
+ inlines <- parPartsToInlines parParts
+ remaining <- bodyPartsToMeta' bps
+ let
+ f (MetaInlines ils) (MetaInlines ils') = MetaBlocks [Para ils, Para ils']
+ f (MetaInlines ils) (MetaBlocks blks) = MetaBlocks ((Para ils) : blks)
+ f m (MetaList mv) = MetaList (m : mv)
+ f m n = MetaList [m, n]
+ return $ M.insertWith f metaField (MetaInlines inlines) remaining
+bodyPartsToMeta' (_ : bps) = bodyPartsToMeta' bps
+
+bodyPartsToMeta :: [BodyPart] -> DocxContext Meta
+bodyPartsToMeta bps = do
+ mp <- bodyPartsToMeta' bps
+ let mp' =
+ case M.lookup "author" mp of
+ Just mv -> M.insert "author" (fixAuthors mv) mp
+ Nothing -> mp
+ return $ Meta mp'
+
+fixAuthors :: MetaValue -> MetaValue
+fixAuthors (MetaBlocks blks) =
+ MetaList $ map g $ filter f blks
+ where f (Para _) = True
+ f _ = False
+ g (Para ils) = MetaInlines ils
+ g _ = MetaInlines []
+fixAuthors mv = mv
+
+runStyleToContainers :: RunStyle -> [Container Inline]
+runStyleToContainers rPr =
+ let spanClassToContainers :: String -> [Container Inline]
+ spanClassToContainers s | s `elem` codeSpans =
+ [Container $ (\ils -> Code ("", [], []) (concatMap ilToCode ils))]
+ spanClassToContainers s | s `elem` spansToKeep =
+ [Container $ Span ("", [s], [])]
+ spanClassToContainers _ = []
+
+ classContainers = case rStyle rPr of
+ Nothing -> []
+ Just s -> spanClassToContainers s
+
+ formatters = map Container $ mapMaybe id
+ [ if isBold rPr then (Just Strong) else Nothing
+ , if isItalic rPr then (Just Emph) else Nothing
+ , if isSmallCaps rPr then (Just SmallCaps) else Nothing
+ , if isStrike rPr then (Just Strikeout) else Nothing
+ , if isSuperScript rPr then (Just Superscript) else Nothing
+ , if isSubScript rPr then (Just Subscript) else Nothing
+ , rUnderline rPr >>=
+ (\f -> if f == "single" then (Just Emph) else Nothing)
+ ]
+ in
+ classContainers ++ formatters
+
+parStyleToContainers :: ParagraphStyle -> [Container Block]
+parStyleToContainers pPr | (c:cs) <- pStyle pPr, Just n <- isHeaderClass c =
+ [Container $ \_ -> Header n ("", delete ("Heading" ++ show n) cs, []) []]
+parStyleToContainers pPr | (c:cs) <- pStyle pPr, c `elem` divsToKeep =
+ let pPr' = pPr { pStyle = cs }
+ in
+ (Container $ Div ("", [c], [])) : (parStyleToContainers pPr')
+parStyleToContainers pPr | (c:cs) <- pStyle pPr, c `elem` codeDivs =
+ -- This is a bit of a cludge. We make the codeblock from the raw
+ -- parparts in bodyPartToBlocks. But we need something to match against.
+ let pPr' = pPr { pStyle = cs }
+ in
+ (Container $ \_ -> CodeBlock ("", [], []) "") : (parStyleToContainers pPr')
+parStyleToContainers pPr | (c:cs) <- pStyle pPr, c `elem` listParagraphDivs =
+ let pPr' = pPr { pStyle = cs, indentation = Nothing}
+ in
+ (Container $ Div ("", [c], [])) : (parStyleToContainers pPr')
+
+parStyleToContainers pPr | (c:cs) <- pStyle pPr, c `elem` blockQuoteDivs =
+ let pPr' = pPr { pStyle = cs \\ blockQuoteDivs }
+ in
+ (Container BlockQuote) : (parStyleToContainers pPr')
+parStyleToContainers pPr | (_:cs) <- pStyle pPr =
+ let pPr' = pPr { pStyle = cs}
+ in
+ parStyleToContainers pPr'
+parStyleToContainers pPr | null (pStyle pPr),
+ Just left <- indentation pPr >>= leftParIndent,
+ Just hang <- indentation pPr >>= hangingParIndent =
+ let pPr' = pPr { indentation = Nothing }
+ in
+ case (left - hang) > 0 of
+ True -> (Container BlockQuote) : (parStyleToContainers pPr')
+ False -> parStyleToContainers pPr'
+parStyleToContainers pPr | null (pStyle pPr),
+ Just left <- indentation pPr >>= leftParIndent =
+ let pPr' = pPr { indentation = Nothing }
+ in
+ case left > 0 of
+ True -> (Container BlockQuote) : (parStyleToContainers pPr')
+ False -> parStyleToContainers pPr'
+parStyleToContainers _ = []
+
strToInlines :: String -> [Inline]
strToInlines = toList . text
@@ -144,115 +286,103 @@ runElemToString (Tab) = ['\t']
runElemsToString :: [RunElem] -> String
runElemsToString = concatMap runElemToString
---- We use this instead of the more general
---- Text.Pandoc.Shared.normalize for reasons of efficiency. For
---- whatever reason, `normalize` makes a run take almost twice as
---- long. (It does more, but this does what we need)
-inlineNormalize :: [Inline] -> [Inline]
-inlineNormalize [] = []
-inlineNormalize (Str "" : ils) = inlineNormalize ils
-inlineNormalize ((Str s) : (Str s') : l) =
- inlineNormalize (Str (s++s') : l)
-inlineNormalize ((Emph ils) : (Emph ils') : l) =
- inlineNormalize $ (Emph $ inlineNormalize (ils ++ ils')) : l
-inlineNormalize ((Emph ils) : l) =
- Emph (inlineNormalize ils) : (inlineNormalize l)
-inlineNormalize ((Strong ils) : (Strong ils') : l) =
- inlineNormalize $ (Strong $ inlineNormalize (ils ++ ils')) : l
-inlineNormalize ((Strong ils) : l) =
- Strong (inlineNormalize ils) : (inlineNormalize l)
-inlineNormalize ((Strikeout ils) : (Strikeout ils') : l) =
- inlineNormalize $ (Strikeout $ inlineNormalize (ils ++ ils')) : l
-inlineNormalize ((Strikeout ils) : l) =
- Strikeout (inlineNormalize ils) : (inlineNormalize l)
-inlineNormalize ((Superscript ils) : (Superscript ils') : l) =
- inlineNormalize $ (Superscript $ inlineNormalize (ils ++ ils')) : l
-inlineNormalize ((Superscript ils) : l) =
- Superscript (inlineNormalize ils) : (inlineNormalize l)
-inlineNormalize ((Subscript ils) : (Subscript ils') : l) =
- inlineNormalize $ (Subscript $ inlineNormalize (ils ++ ils')) : l
-inlineNormalize ((Subscript ils) : l) =
- Subscript (inlineNormalize ils) : (inlineNormalize l)
-inlineNormalize ((Space : Space : l)) =
- inlineNormalize $ (Space : l)
-inlineNormalize ((Quoted qt ils) : l) =
- Quoted qt (inlineNormalize ils) : inlineNormalize l
-inlineNormalize ((Cite cits ils) : l) =
- let
- f :: Citation -> Citation
- f (Citation s pref suff mode num hash) =
- Citation s (inlineNormalize pref) (inlineNormalize suff) mode num hash
- in
- Cite (map f cits) (inlineNormalize ils) : (inlineNormalize l)
-inlineNormalize ((Link ils s) : l) =
- Link (inlineNormalize ils) s : (inlineNormalize l)
-inlineNormalize ((Image ils s) : l) =
- Image (inlineNormalize ils) s : (inlineNormalize l)
-inlineNormalize ((Note blks) : l) =
- Note (map blockNormalize blks) : (inlineNormalize l)
-inlineNormalize ((Span attr ils) : l) =
- Span attr (inlineNormalize ils) : (inlineNormalize l)
-inlineNormalize (il : l) = il : (inlineNormalize l)
-
-stripSpaces :: [Inline] -> [Inline]
-stripSpaces ils =
- reverse $ dropWhile (Space ==) $ reverse $ dropWhile (Space ==) ils
-
-blockNormalize :: Block -> Block
-blockNormalize (Plain ils) = Plain $ stripSpaces $ inlineNormalize ils
-blockNormalize (Para ils) = Para $ stripSpaces $ inlineNormalize ils
-blockNormalize (Header n attr ils) =
- Header n attr $ stripSpaces $ inlineNormalize ils
-blockNormalize (Table ils align width hdr cells) =
- Table (stripSpaces $ inlineNormalize ils) align width hdr cells
-blockNormalize (DefinitionList pairs) =
- DefinitionList $ map (\(ils, blklsts) -> (stripSpaces (inlineNormalize ils), (map (map blockNormalize) blklsts))) pairs
-blockNormalize (BlockQuote blks) = BlockQuote (map blockNormalize blks)
-blockNormalize (OrderedList attr blkslst) =
- OrderedList attr $ map (\blks -> map blockNormalize blks) blkslst
-blockNormalize (BulletList blkslst) =
- BulletList $ map (\blks -> map blockNormalize blks) blkslst
-blockNormalize (Div attr blks) = Div attr (map blockNormalize blks)
-blockNormalize blk = blk
-
-runToInlines :: ReaderOptions -> Docx -> Run -> [Inline]
-runToInlines _ _ (Run rs runElems)
- | isJust (rStyle rs) && (fromJust (rStyle rs)) `elem` codeSpans =
- case runStyleToSpanAttr rs == ("", [], []) of
- True -> [Str (runElemsToString runElems)]
- False -> [Span (runStyleToSpanAttr rs) [Str (runElemsToString runElems)]]
- | otherwise = case runStyleToSpanAttr rs == ("", [], []) of
- True -> concatMap runElemToInlines runElems
- False -> [Span (runStyleToSpanAttr rs) (concatMap runElemToInlines runElems)]
-runToInlines opts docx@(Docx _ notes _ _ _ ) (Footnote fnId) =
- case (getFootNote fnId notes) of
- Just bodyParts ->
- [Note [Div ("", ["footnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]]
- Nothing ->
- [Note [Div ("", ["footnote"], []) []]]
-runToInlines opts docx@(Docx _ notes _ _ _) (Endnote fnId) =
- case (getEndNote fnId notes) of
- Just bodyParts ->
- [Note [Div ("", ["endnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]]
- Nothing ->
- [Note [Div ("", ["endnote"], []) []]]
-
-parPartToInlines :: ReaderOptions -> Docx -> ParPart -> [Inline]
-parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r
-parPartToInlines _ _ (BookMark _ anchor) =
- [Span (anchor, ["anchor"], []) []]
-parPartToInlines _ (Docx _ _ _ rels _) (Drawing relid) =
- case lookupRelationship relid rels of
- Just target -> [Image [] (combine "word" target, "")]
- Nothing -> [Image [] ("", "")]
-parPartToInlines opts docx (InternalHyperLink anchor runs) =
- [Link (concatMap (runToInlines opts docx) runs) ('#' : anchor, "")]
-parPartToInlines opts docx@(Docx _ _ _ rels _) (ExternalHyperLink relid runs) =
- case lookupRelationship relid rels of
- Just target ->
- [Link (concatMap (runToInlines opts docx) runs) (target, "")]
- Nothing ->
- [Link (concatMap (runToInlines opts docx) runs) ("", "")]
+runToString :: Run -> String
+runToString (Run _ runElems) = runElemsToString runElems
+runToString _ = ""
+
+parPartToString :: ParPart -> String
+parPartToString (PlainRun run) = runToString run
+parPartToString (InternalHyperLink _ runs) = concatMap runToString runs
+parPartToString (ExternalHyperLink _ runs) = concatMap runToString runs
+parPartToString _ = ""
+
+
+inlineCodeContainer :: Container Inline -> Bool
+inlineCodeContainer (Container f) = case f [] of
+ Code _ "" -> True
+ _ -> False
+inlineCodeContainer _ = False
+
+
+runToInlines :: Run -> DocxContext [Inline]
+runToInlines (Run rs runElems)
+ | any inlineCodeContainer (runStyleToContainers rs) =
+ return $
+ rebuild (runStyleToContainers rs) $ [Str $ runElemsToString runElems]
+ | otherwise =
+ return $
+ rebuild (runStyleToContainers rs) (concatMap runElemToInlines runElems)
+runToInlines (Footnote bps) =
+ concatMapM bodyPartToBlocks bps >>= (\blks -> return [Note blks])
+runToInlines (Endnote bps) =
+ concatMapM bodyPartToBlocks bps >>= (\blks -> return [Note blks])
+runToInlines (InlineDrawing fp bs) = do
+ mediaBag <- gets docxMediaBag
+ modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag }
+ return [Image [] (fp, "")]
+
+
+
+
+parPartToInlines :: ParPart -> DocxContext [Inline]
+parPartToInlines (PlainRun r) = runToInlines r
+parPartToInlines (Insertion _ author date runs) = do
+ opts <- asks docxOptions
+ case readerTrackChanges opts of
+ AcceptChanges -> concatMapM runToInlines runs >>= return
+ RejectChanges -> return []
+ AllChanges -> do
+ ils <- (concatMapM runToInlines runs)
+ return [Span
+ ("", ["insertion"], [("author", author), ("date", date)])
+ ils]
+parPartToInlines (Deletion _ author date runs) = do
+ opts <- asks docxOptions
+ case readerTrackChanges opts of
+ AcceptChanges -> return []
+ RejectChanges -> concatMapM runToInlines runs >>= return
+ AllChanges -> do
+ ils <- concatMapM runToInlines runs
+ return [Span
+ ("", ["deletion"], [("author", author), ("date", date)])
+ ils]
+parPartToInlines (BookMark _ anchor) | anchor `elem` dummyAnchors = return []
+parPartToInlines (BookMark _ anchor) =
+ -- We record these, so we can make sure not to overwrite
+ -- user-defined anchor links with header auto ids.
+ do
+ -- get whether we're in a header.
+ inHdrBool <- asks docxInHeaderBlock
+ -- Get the anchor map.
+ anchorMap <- gets docxAnchorMap
+ -- We don't want to rewrite if we're in a header, since we'll take
+ -- care of that later, when we make the header anchor. If the
+ -- bookmark were already in uniqueIdent form, this would lead to a
+ -- duplication. Otherwise, we check to see if the id is already in
+ -- there. Rewrite if necessary. This will have the possible effect
+ -- of rewriting user-defined anchor links. However, since these
+ -- are not defined in pandoc, it seems like a necessary evil to
+ -- avoid an extra pass.
+ let newAnchor =
+ if not inHdrBool && anchor `elem` (M.elems anchorMap)
+ then uniqueIdent [Str anchor] (M.elems anchorMap)
+ else anchor
+ unless inHdrBool
+ (modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap})
+ return [Span (newAnchor, ["anchor"], []) []]
+parPartToInlines (Drawing fp bs) = do
+ mediaBag <- gets docxMediaBag
+ modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag }
+ return [Image [] (fp, "")]
+parPartToInlines (InternalHyperLink anchor runs) = do
+ ils <- concatMapM runToInlines runs
+ return [Link ils ('#' : anchor, "")]
+parPartToInlines (ExternalHyperLink target runs) = do
+ ils <- concatMapM runToInlines runs
+ return [Link ils (target, "")]
+parPartToInlines (PlainOMath exps) = do
+ return [Math InlineMath (writeTeX exps)]
+
isAnchorSpan :: Inline -> Bool
isAnchorSpan (Span (ident, classes, kvs) ils) =
@@ -265,74 +395,106 @@ isAnchorSpan _ = False
dummyAnchors :: [String]
dummyAnchors = ["_GoBack"]
-makeHeaderAnchors :: Block -> Block
-makeHeaderAnchors h@(Header n (_, classes, kvs) ils) =
- case filter isAnchorSpan ils of
- [] -> h
- (x@(Span (ident, _, _) _) : xs) ->
- case ident `elem` dummyAnchors of
- True -> h
- False -> Header n (ident, classes, kvs) (ils \\ (x:xs))
- _ -> h
-makeHeaderAnchors blk = blk
-
-
-parPartsToInlines :: ReaderOptions -> Docx -> [ParPart] -> [Inline]
-parPartsToInlines opts docx parparts =
- --
- -- We're going to skip data-uri's for now. It should be an option,
- -- not mandatory.
- --
- (if False -- TODO depend on option
- then bottomUp (makeImagesSelfContained docx)
- else id) $
- bottomUp spanTrim $
- bottomUp spanCorrect $
- bottomUp spanReduce $
- concatMap (parPartToInlines opts docx) parparts
-
-cellToBlocks :: ReaderOptions -> Docx -> Cell -> [Block]
-cellToBlocks opts docx (Cell bps) = map (bodyPartToBlock opts docx) bps
-
-rowToBlocksList :: ReaderOptions -> Docx -> Row -> [[Block]]
-rowToBlocksList opts docx (Row cells) = map (cellToBlocks opts docx) cells
-
-bodyPartToBlock :: ReaderOptions -> Docx -> BodyPart -> Block
-bodyPartToBlock opts docx (Paragraph pPr parparts) =
- Div (parStyleToDivAttr pPr) [Para (parPartsToInlines opts docx parparts)]
-bodyPartToBlock opts docx@(Docx _ _ numbering _ _) (ListItem pPr numId lvl parparts) =
+makeHeaderAnchor :: Block -> DocxContext Block
+-- If there is an anchor already there (an anchor span in the header,
+-- to be exact), we rename and associate the new id with the old one.
+makeHeaderAnchor (Header n (_, classes, kvs) ils)
+ | (x : xs) <- filter isAnchorSpan ils
+ , (Span (ident, _, _) _) <- x
+ , notElem ident dummyAnchors =
+ do
+ hdrIDMap <- gets docxAnchorMap
+ let newIdent = uniqueIdent ils (M.elems hdrIDMap)
+ modify $ \s -> s {docxAnchorMap = M.insert ident newIdent hdrIDMap}
+ return $ Header n (newIdent, classes, kvs) (ils \\ (x:xs))
+-- Otherwise we just give it a name, and register that name (associate
+-- it with itself.)
+makeHeaderAnchor (Header n (_, classes, kvs) ils) =
+ do
+ hdrIDMap <- gets docxAnchorMap
+ let newIdent = uniqueIdent ils (M.elems hdrIDMap)
+ modify $ \s -> s {docxAnchorMap = M.insert newIdent newIdent hdrIDMap}
+ return $ Header n (newIdent, classes, kvs) ils
+makeHeaderAnchor blk = return blk
+
+
+parPartsToInlines :: [ParPart] -> DocxContext [Inline]
+parPartsToInlines parparts = do
+ ils <- concatMapM parPartToInlines parparts
+ return $ reduceList $ ils
+
+cellToBlocks :: Cell -> DocxContext [Block]
+cellToBlocks (Cell bps) = concatMapM bodyPartToBlocks bps
+
+rowToBlocksList :: Row -> DocxContext [[Block]]
+rowToBlocksList (Row cells) = mapM cellToBlocks cells
+
+isBlockCodeContainer :: Container Block -> Bool
+isBlockCodeContainer (Container f) | CodeBlock _ _ <- f [] = True
+isBlockCodeContainer _ = False
+
+isHeaderContainer :: Container Block -> Bool
+isHeaderContainer (Container f) | Header _ _ _ <- f [] = True
+isHeaderContainer _ = False
+
+bodyPartToBlocks :: BodyPart -> DocxContext [Block]
+bodyPartToBlocks (Paragraph pPr parparts)
+ | any isBlockCodeContainer (parStyleToContainers pPr) =
+ let
+ otherConts = filter (not . isBlockCodeContainer) (parStyleToContainers pPr)
+ in
+ return $
+ rebuild
+ otherConts
+ [CodeBlock ("", [], []) (concatMap parPartToString parparts)]
+bodyPartToBlocks (Paragraph pPr parparts)
+ | any isHeaderContainer (parStyleToContainers pPr) = do
+ ils <- normalizeSpaces <$> local (\s -> s{docxInHeaderBlock = True})
+ (parPartsToInlines parparts)
+ let (Container hdrFun) = head $ filter isHeaderContainer (parStyleToContainers pPr)
+ Header n attr _ = hdrFun []
+ hdr <- makeHeaderAnchor $ Header n attr ils
+ return [hdr]
+bodyPartToBlocks (Paragraph pPr parparts) = do
+ ils <- parPartsToInlines parparts >>= (return . normalizeSpaces)
+ case ils of
+ [] -> return []
+ _ -> do
+ return $
+ rebuild
+ (parStyleToContainers pPr)
+ [Para ils]
+bodyPartToBlocks (ListItem pPr numId lvl levelInfo parparts) = do
let
- kvs = case lookupLevel numId lvl numbering of
- Just (_, fmt, txt, Just start) -> [ ("level", lvl)
- , ("num-id", numId)
- , ("format", fmt)
- , ("text", txt)
- , ("start", (show start))
- ]
-
- Just (_, fmt, txt, Nothing) -> [ ("level", lvl)
- , ("num-id", numId)
- , ("format", fmt)
- , ("text", txt)
- ]
- Nothing -> []
- in
- Div
- ("", ["list-item"], kvs)
- [bodyPartToBlock opts docx (Paragraph pPr parparts)]
-bodyPartToBlock _ _ (Tbl _ _ _ []) =
- Para []
-bodyPartToBlock opts docx (Tbl cap _ look (r:rs)) =
+ kvs = case levelInfo of
+ (_, fmt, txt, Just start) -> [ ("level", lvl)
+ , ("num-id", numId)
+ , ("format", fmt)
+ , ("text", txt)
+ , ("start", (show start))
+ ]
+
+ (_, fmt, txt, Nothing) -> [ ("level", lvl)
+ , ("num-id", numId)
+ , ("format", fmt)
+ , ("text", txt)
+ ]
+ blks <- bodyPartToBlocks (Paragraph pPr parparts)
+ return $ [Div ("", ["list-item"], kvs) blks]
+bodyPartToBlocks (Tbl _ _ _ []) =
+ return [Para []]
+bodyPartToBlocks (Tbl cap _ look (r:rs)) = do
let caption = strToInlines cap
(hdr, rows) = case firstRowFormatting look of
True -> (Just r, rs)
False -> (Nothing, r:rs)
- hdrCells = case hdr of
- Just r' -> rowToBlocksList opts docx r'
- Nothing -> []
- cells = map (rowToBlocksList opts docx) rows
+ hdrCells <- case hdr of
+ Just r' -> rowToBlocksList r'
+ Nothing -> return []
- size = case null hdrCells of
+ cells <- mapM rowToBlocksList rows
+
+ let size = case null hdrCells of
True -> length $ head cells
False -> length $ hdrCells
--
@@ -341,208 +503,49 @@ bodyPartToBlock opts docx (Tbl cap _ look (r:rs)) =
-- moment. Width information is in the TblGrid field of the Tbl,
-- so should be possible. Alignment might be more difficult,
-- since there doesn't seem to be a column entity in docx.
- alignments = take size (repeat AlignDefault)
- widths = take size (repeat 0) :: [Double]
- in
- Table caption alignments widths hdrCells cells
-
-makeImagesSelfContained :: Docx -> Inline -> Inline
-makeImagesSelfContained (Docx _ _ _ _ media) i@(Image alt (uri, title)) =
- case lookup uri media of
- Just bs -> case getMimeType uri of
- Just mime -> let data_uri =
- "data:" ++ mime ++ ";base64," ++ toString (encode $ BS.concat $ B.toChunks bs)
- in
- Image alt (data_uri, title)
- Nothing -> i
- Nothing -> i
-makeImagesSelfContained _ inline = inline
-
-bodyToBlocks :: ReaderOptions -> Docx -> Body -> [Block]
-bodyToBlocks opts docx (Body bps) =
- bottomUp removeEmptyPars $
- map blockNormalize $
- bottomUp spanRemove $
- bottomUp divRemove $
- map (makeHeaderAnchors) $
- bottomUp divCorrect $
- bottomUp divReduce $
- bottomUp divCorrectPreReduce $
- bottomUp blocksToDefinitions $
- blocksToBullets $
- map (bodyPartToBlock opts docx) bps
-
-docxToBlocks :: ReaderOptions -> Docx -> [Block]
-docxToBlocks opts d@(Docx (Document _ body) _ _ _ _) = bodyToBlocks opts d body
-
-spanReduce :: [Inline] -> [Inline]
-spanReduce [] = []
-spanReduce ((Span (id1, classes1, kvs1) ils1) : ils)
- | (id1, classes1, kvs1) == ("", [], []) = ils1 ++ (spanReduce ils)
-spanReduce (s1@(Span (id1, classes1, kvs1) ils1) :
- s2@(Span (id2, classes2, kvs2) ils2) :
- ils) =
- let classes' = classes1 `intersect` classes2
- kvs' = kvs1 `intersect` kvs2
- classes1' = classes1 \\ classes'
- kvs1' = kvs1 \\ kvs'
- classes2' = classes2 \\ classes'
- kvs2' = kvs2 \\ kvs'
- in
- case null classes' && null kvs' of
- True -> s1 : (spanReduce (s2 : ils))
- False -> let attr' = ("", classes', kvs')
- attr1' = (id1, classes1', kvs1')
- attr2' = (id2, classes2', kvs2')
- in
- spanReduce (Span attr' [(Span attr1' ils1), (Span attr2' ils2)] :
- ils)
-spanReduce (il:ils) = il : (spanReduce ils)
+ alignments = replicate size AlignDefault
+ widths = replicate size 0 :: [Double]
+
+ return [Table caption alignments widths hdrCells cells]
+bodyPartToBlocks (OMathPara e) = do
+ return [Para [Math DisplayMath (writeTeX e)]]
+
+
+-- replace targets with generated anchors.
+rewriteLink :: Inline -> DocxContext Inline
+rewriteLink l@(Link ils ('#':target, title)) = do
+ anchorMap <- gets docxAnchorMap
+ return $ case M.lookup target anchorMap of
+ Just newTarget -> (Link ils ('#':newTarget, title))
+ Nothing -> l
+rewriteLink il = return il
+
+bodyToOutput :: Body -> DocxContext (Meta, [Block], MediaBag)
+bodyToOutput (Body bps) = do
+ let (metabps, blkbps) = sepBodyParts bps
+ meta <- bodyPartsToMeta metabps
+ blks <- concatMapM bodyPartToBlocks blkbps >>=
+ walkM rewriteLink
+ mediaBag <- gets docxMediaBag
+ return $ (meta,
+ blocksToDefinitions $ blocksToBullets $ blks,
+ mediaBag)
+
+docxToOutput :: ReaderOptions -> Docx -> (Meta, [Block], MediaBag)
+docxToOutput opts (Docx (Document _ body)) =
+ let dEnv = def { docxOptions = opts} in
+ evalDocxContext (bodyToOutput body) dEnv def
+
ilToCode :: Inline -> String
ilToCode (Str s) = s
-ilToCode _ = ""
-
-spanRemove' :: Inline -> [Inline]
-spanRemove' s@(Span (ident, classes, _) [])
- -- "_GoBack" is automatically inserted. We don't want to keep it.
- | classes == ["anchor"] && not (ident `elem` dummyAnchors) = [s]
-spanRemove' (Span (_, _, kvs) ils) =
- case lookup "underline" kvs of
- Just val -> [Span ("", [], [("underline", val)]) ils]
- Nothing -> ils
-spanRemove' il = [il]
-
-spanRemove :: [Inline] -> [Inline]
-spanRemove = concatMap spanRemove'
-
-spanTrim' :: Inline -> [Inline]
-spanTrim' il@(Span _ []) = [il]
-spanTrim' il@(Span attr (il':[]))
- | il' == Space = [Span attr [], Space]
- | otherwise = [il]
-spanTrim' (Span attr ils)
- | head ils == Space && last ils == Space =
- [Space, Span attr (init $ tail ils), Space]
- | head ils == Space = [Space, Span attr (tail ils)]
- | last ils == Space = [Span attr (init ils), Space]
-spanTrim' il = [il]
-
-spanTrim :: [Inline] -> [Inline]
-spanTrim = concatMap spanTrim'
-
-spanCorrect' :: Inline -> [Inline]
-spanCorrect' (Span ("", [], []) ils) = ils
-spanCorrect' (Span (ident, classes, kvs) ils)
- | "emph" `elem` classes =
- [Emph $ spanCorrect' $ Span (ident, (delete "emph" classes), kvs) ils]
- | "strong" `elem` classes =
- [Strong $ spanCorrect' $ Span (ident, (delete "strong" classes), kvs) ils]
- | "smallcaps" `elem` classes =
- [SmallCaps $ spanCorrect' $ Span (ident, (delete "smallcaps" classes), kvs) ils]
- | "strike" `elem` classes =
- [Strikeout $ spanCorrect' $ Span (ident, (delete "strike" classes), kvs) ils]
- | "superscript" `elem` classes =
- [Superscript $ spanCorrect' $ Span (ident, (delete "superscript" classes), kvs) ils]
- | "subscript" `elem` classes =
- [Subscript $ spanCorrect' $ Span (ident, (delete "subscript" classes), kvs) ils]
- | (not . null) (codeSpans `intersect` classes) =
- [Code (ident, (classes \\ codeSpans), kvs) (init $ unlines $ map ilToCode ils)]
- | otherwise =
- [Span (ident, classes, kvs) ils]
-spanCorrect' il = [il]
-
-spanCorrect :: [Inline] -> [Inline]
-spanCorrect = concatMap spanCorrect'
-
-removeEmptyPars :: [Block] -> [Block]
-removeEmptyPars blks = filter (\b -> b /= (Para [])) blks
-
-divReduce :: [Block] -> [Block]
-divReduce [] = []
-divReduce ((Div (id1, classes1, kvs1) blks1) : blks)
- | (id1, classes1, kvs1) == ("", [], []) = blks1 ++ (divReduce blks)
-divReduce (d1@(Div (id1, classes1, kvs1) blks1) :
- d2@(Div (id2, classes2, kvs2) blks2) :
- blks) =
- let classes' = classes1 `intersect` classes2
- kvs' = kvs1 `intersect` kvs2
- classes1' = classes1 \\ classes'
- kvs1' = kvs1 \\ kvs'
- classes2' = classes2 \\ classes'
- kvs2' = kvs2 \\ kvs'
- in
- case null classes' && null kvs' of
- True -> d1 : (divReduce (d2 : blks))
- False -> let attr' = ("", classes', kvs')
- attr1' = (id1, classes1', kvs1')
- attr2' = (id2, classes2', kvs2')
- in
- divReduce (Div attr' [(Div attr1' blks1), (Div attr2' blks2)] :
- blks)
-divReduce (blk:blks) = blk : (divReduce blks)
+ilToCode Space = " "
+ilToCode _ = ""
isHeaderClass :: String -> Maybe Int
-isHeaderClass s | "Heading" `isPrefixOf` s =
- case reads (drop (length "Heading") s) :: [(Int, String)] of
+isHeaderClass s | Just s' <- stripPrefix "Heading" s =
+ case reads s' :: [(Int, String)] of
[] -> Nothing
((n, "") : []) -> Just n
_ -> Nothing
isHeaderClass _ = Nothing
-
-findHeaderClass :: [String] -> Maybe Int
-findHeaderClass ss = case mapMaybe id $ map isHeaderClass ss of
- [] -> Nothing
- n : _ -> Just n
-
-blksToInlines :: [Block] -> [Inline]
-blksToInlines (Para ils : _) = ils
-blksToInlines (Plain ils : _) = ils
-blksToInlines _ = []
-
-divCorrectPreReduce' :: Block -> [Block]
-divCorrectPreReduce' (Div (ident, classes, kvs) blks)
- | isJust $ findHeaderClass classes =
- let n = fromJust $ findHeaderClass classes
- in
- [Header n (ident, delete ("Heading" ++ (show n)) classes, kvs) (blksToInlines blks)]
- | otherwise = [Div (ident, classes, kvs) blks]
-divCorrectPreReduce' blk = [blk]
-
-divCorrectPreReduce :: [Block] -> [Block]
-divCorrectPreReduce = concatMap divCorrectPreReduce'
-
-blkToCode :: Block -> String
-blkToCode (Para []) = ""
-blkToCode (Para ((Code _ s):ils)) = s ++ (blkToCode (Para ils))
-blkToCode (Para ((Span (_, classes, _) ils'): ils))
- | (not . null) (codeSpans `intersect` classes) =
- (init $ unlines $ map ilToCode ils') ++ (blkToCode (Para ils))
-blkToCode _ = ""
-
-divRemove' :: Block -> [Block]
-divRemove' (Div (_, _, kvs) blks) =
- case lookup "indent" kvs of
- Just val -> [Div ("", [], [("indent", val)]) blks]
- Nothing -> blks
-divRemove' blk = [blk]
-
-divRemove :: [Block] -> [Block]
-divRemove = concatMap divRemove'
-
-divCorrect' :: Block -> [Block]
-divCorrect' b@(Div (ident, classes, kvs) blks)
- | (not . null) (blockQuoteDivs `intersect` classes) =
- [BlockQuote [Div (ident, classes \\ blockQuoteDivs, kvs) blks]]
- | (not . null) (codeDivs `intersect` classes) =
- [CodeBlock (ident, (classes \\ codeDivs), kvs) (init $ unlines $ map blkToCode blks)]
- | otherwise =
- case lookup "indent" kvs of
- Just "0" -> [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks]
- Just _ ->
- [BlockQuote [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks]]
- Nothing -> [b]
-divCorrect' blk = [blk]
-
-divCorrect :: [Block] -> [Block]
-divCorrect = concatMap divCorrect'
diff --git a/src/Text/Pandoc/Readers/Docx/Fonts.hs b/src/Text/Pandoc/Readers/Docx/Fonts.hs
new file mode 100644
index 000000000..b44c71412
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx/Fonts.hs
@@ -0,0 +1,238 @@
+{-
+Copyright (C) 2014 Matthew Pickering <matthewtpickering@gmail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Docx.Fonts
+ Copyright : Copyright (C) 2014 Matthew Pickering
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Matthew Pickering <matthewtpickering@gmail.com>
+ Stability : alpha
+ Portability : portable
+
+Utilities to convert between font codepoints and unicode characters.
+-}
+module Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..)) where
+
+
+-- | Enumeration of recognised fonts
+data Font = Symbol -- ^ <http://en.wikipedia.org/wiki/Symbol_(typeface) Adobe Symbol>
+ deriving (Show, Eq)
+
+-- | Given a font and codepoint, returns the corresponding unicode
+-- character
+getUnicode :: Font -> Char -> Maybe Char
+getUnicode Symbol c = lookup c symbol
+
+-- Generated from lib/fonts/symbol.txt
+symbol :: [(Char, Char)]
+symbol =
+ [ (' ',' ')
+ , (' ','\160')
+ , ('!','!')
+ , ('"','\8704')
+ , ('#','#')
+ , ('$','\8707')
+ , ('%','%')
+ , ('&','&')
+ , ('\'','\8715')
+ , ('(','(')
+ , (')',')')
+ , ('*','\8727')
+ , ('+','+')
+ , (',',',')
+ , ('-','\8722')
+ , ('.','.')
+ , ('/','/')
+ , ('0','0')
+ , ('1','1')
+ , ('2','2')
+ , ('3','3')
+ , ('4','4')
+ , ('5','5')
+ , ('6','6')
+ , ('7','7')
+ , ('8','8')
+ , ('9','9')
+ , (':',':')
+ , (';',';')
+ , ('<','<')
+ , ('=','=')
+ , ('>','>')
+ , ('?','?')
+ , ('@','\8773')
+ , ('A','\913')
+ , ('B','\914')
+ , ('C','\935')
+ , ('D','\916')
+ , ('D','\8710')
+ , ('E','\917')
+ , ('F','\934')
+ , ('G','\915')
+ , ('H','\919')
+ , ('I','\921')
+ , ('J','\977')
+ , ('K','\922')
+ , ('L','\923')
+ , ('M','\924')
+ , ('N','\925')
+ , ('O','\927')
+ , ('P','\928')
+ , ('Q','\920')
+ , ('R','\929')
+ , ('S','\931')
+ , ('T','\932')
+ , ('U','\933')
+ , ('V','\962')
+ , ('W','\937')
+ , ('W','\8486')
+ , ('X','\926')
+ , ('Y','\936')
+ , ('Z','\918')
+ , ('[','[')
+ , ('\\','\8756')
+ , (']',']')
+ , ('^','\8869')
+ , ('_','_')
+ , ('`','\63717')
+ , ('a','\945')
+ , ('b','\946')
+ , ('c','\967')
+ , ('d','\948')
+ , ('e','\949')
+ , ('f','\966')
+ , ('g','\947')
+ , ('h','\951')
+ , ('i','\953')
+ , ('j','\981')
+ , ('k','\954')
+ , ('l','\955')
+ , ('m','\181')
+ , ('m','\956')
+ , ('n','\957')
+ , ('o','\959')
+ , ('p','\960')
+ , ('q','\952')
+ , ('r','\961')
+ , ('s','\963')
+ , ('t','\964')
+ , ('u','\965')
+ , ('v','\982')
+ , ('w','\969')
+ , ('x','\958')
+ , ('y','\968')
+ , ('z','\950')
+ , ('{','{')
+ , ('|','|')
+ , ('}','}')
+ , ('~','\8764')
+ , ('\160','\8364')
+ , ('\161','\978')
+ , ('\162','\8242')
+ , ('\163','\8804')
+ , ('\164','\8260')
+ , ('\164','\8725')
+ , ('\165','\8734')
+ , ('\166','\402')
+ , ('\167','\9827')
+ , ('\168','\9830')
+ , ('\169','\9829')
+ , ('\170','\9824')
+ , ('\171','\8596')
+ , ('\172','\8592')
+ , ('\173','\8593')
+ , ('\174','\8594')
+ , ('\175','\8595')
+ , ('\176','\176')
+ , ('\177','\177')
+ , ('\178','\8243')
+ , ('\179','\8805')
+ , ('\180','\215')
+ , ('\181','\8733')
+ , ('\182','\8706')
+ , ('\183','\8226')
+ , ('\184','\247')
+ , ('\185','\8800')
+ , ('\186','\8801')
+ , ('\187','\8776')
+ , ('\188','\8230')
+ , ('\189','\63718')
+ , ('\190','\63719')
+ , ('\191','\8629')
+ , ('\192','\8501')
+ , ('\193','\8465')
+ , ('\194','\8476')
+ , ('\195','\8472')
+ , ('\196','\8855')
+ , ('\197','\8853')
+ , ('\198','\8709')
+ , ('\199','\8745')
+ , ('\200','\8746')
+ , ('\201','\8835')
+ , ('\202','\8839')
+ , ('\203','\8836')
+ , ('\204','\8834')
+ , ('\205','\8838')
+ , ('\206','\8712')
+ , ('\207','\8713')
+ , ('\208','\8736')
+ , ('\209','\8711')
+ , ('\210','\63194')
+ , ('\211','\63193')
+ , ('\212','\63195')
+ , ('\213','\8719')
+ , ('\214','\8730')
+ , ('\215','\8901')
+ , ('\216','\172')
+ , ('\217','\8743')
+ , ('\218','\8744')
+ , ('\219','\8660')
+ , ('\220','\8656')
+ , ('\221','\8657')
+ , ('\222','\8658')
+ , ('\223','\8659')
+ , ('\224','\9674')
+ , ('\225','\9001')
+ , ('\226','\63720')
+ , ('\227','\63721')
+ , ('\228','\63722')
+ , ('\229','\8721')
+ , ('\230','\63723')
+ , ('\231','\63724')
+ , ('\232','\63725')
+ , ('\233','\63726')
+ , ('\234','\63727')
+ , ('\235','\63728')
+ , ('\236','\63729')
+ , ('\237','\63730')
+ , ('\238','\63731')
+ , ('\239','\63732')
+ , ('\241','\9002')
+ , ('\242','\8747')
+ , ('\243','\8992')
+ , ('\244','\63733')
+ , ('\245','\8993')
+ , ('\246','\63734')
+ , ('\247','\63735')
+ , ('\248','\63736')
+ , ('\249','\63737')
+ , ('\250','\63738')
+ , ('\251','\63739')
+ , ('\252','\63740')
+ , ('\253','\63741')
+ , ('\254','\63742')]
diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs
index 68559d98b..ea195c14a 100644
--- a/src/Text/Pandoc/Readers/Docx/Lists.hs
+++ b/src/Text/Pandoc/Readers/Docx/Lists.hs
@@ -29,9 +29,12 @@ Functions for converting flat docx paragraphs into nested lists.
-}
module Text.Pandoc.Readers.Docx.Lists ( blocksToBullets
- , blocksToDefinitions) where
+ , blocksToDefinitions
+ , listParagraphDivs
+ ) where
import Text.Pandoc.JSON
+import Text.Pandoc.Generic (bottomUp)
import Text.Pandoc.Shared (trim)
import Control.Monad
import Data.List
@@ -118,7 +121,7 @@ handleListParagraphs (
in
handleListParagraphs ((Div attr1 (blks1 ++ [newDiv2])) : blks)
handleListParagraphs (blk:blks) = blk : (handleListParagraphs blks)
-
+
separateBlocks' :: Block -> [[Block]] -> [[Block]]
separateBlocks' blk ([] : []) = [[blk]]
separateBlocks' b@(BulletList _) acc = (init acc) ++ [(last acc) ++ [b]]
@@ -136,7 +139,7 @@ flatToBullets' :: Integer -> [Block] -> [Block]
flatToBullets' _ [] = []
flatToBullets' num xs@(b : elems)
| getLevelN b == num = b : (flatToBullets' num elems)
- | otherwise =
+ | otherwise =
let bNumId = getNumIdN b
bLevel = getLevelN b
(children, remaining) =
@@ -159,10 +162,9 @@ flatToBullets elems = flatToBullets' (-1) elems
blocksToBullets :: [Block] -> [Block]
blocksToBullets blks =
- -- bottomUp removeListItemDivs $
+ bottomUp removeListDivs $
flatToBullets $ (handleListParagraphs blks)
-
plainParaInlines :: Block -> [Inline]
plainParaInlines (Plain ils) = ils
plainParaInlines (Para ils) = ils
@@ -199,10 +201,27 @@ blocksToDefinitions' [] acc (b:blks) =
blocksToDefinitions' defAcc acc (b:blks) =
blocksToDefinitions' [] (b : (DefinitionList (reverse defAcc)) : acc) blks
+removeListDivs' :: Block -> [Block]
+removeListDivs' (Div (ident, classes, kvs) blks)
+ | "list-item" `elem` classes =
+ case delete "list-item" classes of
+ [] -> blks
+ classes' -> [Div (ident, classes', kvs) $ blks]
+removeListDivs' (Div (ident, classes, kvs) blks)
+ | not $ null $ listParagraphDivs `intersect` classes =
+ case classes \\ listParagraphDivs of
+ [] -> blks
+ classes' -> [Div (ident, classes', kvs) blks]
+removeListDivs' blk = [blk]
+
+removeListDivs :: [Block] -> [Block]
+removeListDivs = concatMap removeListDivs'
+
+
blocksToDefinitions :: [Block] -> [Block]
blocksToDefinitions = blocksToDefinitions' [] []
-
-
-
+
+
+
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 18200bcf9..1abd4bc6b 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -1,3 +1,5 @@
+{-# LANGUAGE PatternGuards, ViewPatterns #-}
+
{-
Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu>
@@ -8,49 +10,46 @@ the Free Software Foundation; either version 2 of the License, or
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
- Module : Text.Pandoc.Readers.Docx.Parse
- Copyright : Copyright (C) 2014 Jesse Rosenthal
- License : GNU GPL, version 2 or above
+ Module : Text.Pandoc.Readers.Docx.Parse
+ Copyright : Copyright (C) 2014 Jesse Rosenthal
+ License : GNU GPL, version 2 or above
- Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
- Stability : alpha
- Portability : portable
+ Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
+ Stability : alpha
+ Portability : portable
Conversion of docx archive into Docx haskell type
-}
+module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
+ , Document(..)
+ , Body(..)
+ , BodyPart(..)
+ , TblLook(..)
+ , ParPart(..)
+ , Run(..)
+ , RunElem(..)
+ , Notes
+ , Numbering
+ , Relationship
+ , Media
+ , RunStyle(..)
+ , ParIndentation(..)
+ , ParagraphStyle(..)
+ , Row(..)
+ , Cell(..)
+ , archiveToDocx
+ ) where
-module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
- , Document(..)
- , Body(..)
- , BodyPart(..)
- , TblLook(..)
- , ParPart(..)
- , Run(..)
- , RunElem(..)
- , Notes
- , Numbering
- , Relationship
- , Media
- , RunStyle(..)
- , ParagraphStyle(..)
- , Row(..)
- , Cell(..)
- , getFootNote
- , getEndNote
- , lookupLevel
- , lookupRelationship
- , archiveToDocx
- ) where
import Codec.Archive.Zip
import Text.XML.Light
import Data.Maybe
@@ -59,39 +58,244 @@ import System.FilePath
import Data.Bits ((.|.))
import qualified Data.ByteString.Lazy as B
import qualified Text.Pandoc.UTF8 as UTF8
+import Control.Monad.Reader
+import Control.Applicative ((<$>), (<|>))
+import qualified Data.Map as M
+import Text.Pandoc.Compat.Except
+import Text.TeXMath.Readers.OMML (readOMML)
+import Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..))
+import Text.TeXMath (Exp)
+import Data.Char (readLitChar, ord, chr)
+
+data ReaderEnv = ReaderEnv { envNotes :: Notes
+ , envNumbering :: Numbering
+ , envRelationships :: [Relationship]
+ , envMedia :: Media
+ , envFont :: Maybe Font
+ }
+ deriving Show
+
+data DocxError = DocxError | WrongElem
+ deriving Show
+
+instance Error DocxError where
+ noMsg = WrongElem
+
+type D = ExceptT DocxError (Reader ReaderEnv)
+
+runD :: D a -> ReaderEnv -> Either DocxError a
+runD dx re = runReader (runExceptT dx ) re
+
+maybeToD :: Maybe a -> D a
+maybeToD (Just a) = return a
+maybeToD Nothing = throwError DocxError
+
+eitherToD :: Either a b -> D b
+eitherToD (Right b) = return b
+eitherToD (Left _) = throwError DocxError
+
+concatMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b]
+concatMapM f xs = liftM concat (mapM f xs)
+
+
+-- This is similar to `mapMaybe`: it maps a function returning the D
+-- monad over a list, and only keeps the non-erroring return values.
+mapD :: (a -> D b) -> [a] -> D [b]
+mapD f xs =
+ let handler x = (f x >>= (\y-> return [y])) `catchError` (\_ -> return [])
+ in
+ concatMapM handler xs
-attrToNSPair :: Attr -> Maybe (String, String)
-attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
-attrToNSPair _ = Nothing
+type NameSpaces = [(String, String)]
+data Docx = Docx Document
+ deriving Show
-type NameSpaces = [(String, String)]
+data Document = Document NameSpaces Body
+ deriving Show
-data Docx = Docx Document Notes Numbering [Relationship] Media
+data Body = Body [BodyPart]
deriving Show
-archiveToDocx :: Archive -> Maybe Docx
+type Media = [(FilePath, B.ByteString)]
+
+data Numbering = Numbering NameSpaces [Numb] [AbstractNumb]
+ deriving Show
+
+data Numb = Numb String String -- right now, only a key to an abstract num
+ deriving Show
+
+data AbstractNumb = AbstractNumb String [Level]
+ deriving Show
+
+-- (ilvl, format, string, start)
+type Level = (String, String, String, Maybe Integer)
+
+data Relationship = Relationship (RelId, Target)
+ deriving Show
+
+data Notes = Notes NameSpaces
+ (Maybe (M.Map String Element))
+ (Maybe (M.Map String Element))
+ deriving Show
+
+data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer
+ , rightParIndent :: Maybe Integer
+ , hangingParIndent :: Maybe Integer}
+ deriving Show
+
+data ParagraphStyle = ParagraphStyle { pStyle :: [String]
+ , indentation :: Maybe ParIndentation
+ }
+ deriving Show
+
+defaultParagraphStyle :: ParagraphStyle
+defaultParagraphStyle = ParagraphStyle { pStyle = []
+ , indentation = Nothing
+ }
+
+
+data BodyPart = Paragraph ParagraphStyle [ParPart]
+ | ListItem ParagraphStyle String String Level [ParPart]
+ | Tbl String TblGrid TblLook [Row]
+ | OMathPara [Exp]
+ deriving Show
+
+type TblGrid = [Integer]
+
+data TblLook = TblLook {firstRowFormatting::Bool}
+ deriving Show
+
+defaultTblLook :: TblLook
+defaultTblLook = TblLook{firstRowFormatting = False}
+
+data Row = Row [Cell]
+ deriving Show
+
+data Cell = Cell [BodyPart]
+ deriving Show
+
+data ParPart = PlainRun Run
+ | Insertion ChangeId Author ChangeDate [Run]
+ | Deletion ChangeId Author ChangeDate [Run]
+ | BookMark BookMarkId Anchor
+ | InternalHyperLink Anchor [Run]
+ | ExternalHyperLink URL [Run]
+ | Drawing FilePath B.ByteString
+ | PlainOMath [Exp]
+ deriving Show
+
+data Run = Run RunStyle [RunElem]
+ | Footnote [BodyPart]
+ | Endnote [BodyPart]
+ | InlineDrawing FilePath B.ByteString
+ deriving Show
+
+data RunElem = TextRun String | LnBrk | Tab
+ deriving Show
+
+data RunStyle = RunStyle { isBold :: Bool
+ , isItalic :: Bool
+ , isSmallCaps :: Bool
+ , isStrike :: Bool
+ , isSuperScript :: Bool
+ , isSubScript :: Bool
+ , rUnderline :: Maybe String
+ , rStyle :: Maybe String }
+ deriving Show
+
+defaultRunStyle :: RunStyle
+defaultRunStyle = RunStyle { isBold = False
+ , isItalic = False
+ , isSmallCaps = False
+ , isStrike = False
+ , isSuperScript = False
+ , isSubScript = False
+ , rUnderline = Nothing
+ , rStyle = Nothing
+ }
+
+
+type Target = String
+type Anchor = String
+type URL = String
+type BookMarkId = String
+type RelId = String
+type ChangeId = String
+type Author = String
+type ChangeDate = String
+
+attrToNSPair :: Attr -> Maybe (String, String)
+attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
+attrToNSPair _ = Nothing
+
+archiveToDocx :: Archive -> Either DocxError Docx
archiveToDocx archive = do
- let notes = archiveToNotes archive
- rels = archiveToRelationships archive
- media = archiveToMedia archive
- doc <- archiveToDocument archive
- numbering <- archiveToNumbering archive
- return $ Docx doc notes numbering rels media
-
-data Document = Document NameSpaces Body
- deriving Show
+ let notes = archiveToNotes archive
+ numbering = archiveToNumbering archive
+ rels = archiveToRelationships archive
+ media = archiveToMedia archive
+ rEnv = ReaderEnv notes numbering rels media Nothing
+ doc <- runD (archiveToDocument archive) rEnv
+ return $ Docx doc
+
-archiveToDocument :: Archive -> Maybe Document
+archiveToDocument :: Archive -> D Document
archiveToDocument zf = do
- entry <- findEntryByPath "word/document.xml" zf
- docElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
- let namespaces = mapMaybe attrToNSPair (elAttribs docElem)
- bodyElem <- findChild (QName "body" (lookup "w" namespaces) Nothing) docElem
+ entry <- maybeToD $ findEntryByPath "word/document.xml" zf
+ docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+ let namespaces = mapMaybe attrToNSPair (elAttribs docElem)
+ bodyElem <- maybeToD $ findChild (elemName namespaces "w" "body") docElem
body <- elemToBody namespaces bodyElem
return $ Document namespaces body
-type Media = [(FilePath, B.ByteString)]
+elemToBody :: NameSpaces -> Element -> D Body
+elemToBody ns element | isElem ns "w" "body" element =
+ mapD (elemToBodyPart ns) (elChildren element) >>=
+ (\bps -> return $ Body bps)
+elemToBody _ _ = throwError WrongElem
+
+archiveToNotes :: Archive -> Notes
+archiveToNotes zf =
+ let fnElem = findEntryByPath "word/footnotes.xml" zf
+ >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+ enElem = findEntryByPath "word/endnotes.xml" zf
+ >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+ fn_namespaces = case fnElem of
+ Just e -> mapMaybe attrToNSPair (elAttribs e)
+ Nothing -> []
+ en_namespaces = case enElem of
+ Just e -> mapMaybe attrToNSPair (elAttribs e)
+ Nothing -> []
+ ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
+ fn = fnElem >>= (elemToNotes ns "footnote")
+ en = enElem >>= (elemToNotes ns "endnote")
+ in
+ Notes ns fn en
+
+filePathIsRel :: FilePath -> Bool
+filePathIsRel fp =
+ let (dir, name) = splitFileName fp
+ in
+ (dir == "word/_rels/") && ((takeExtension name) == ".rels")
+
+relElemToRelationship :: Element -> Maybe Relationship
+relElemToRelationship element | qName (elName element) == "Relationship" =
+ do
+ relId <- findAttr (QName "Id" Nothing Nothing) element
+ target <- findAttr (QName "Target" Nothing Nothing) element
+ return $ Relationship (relId, target)
+relElemToRelationship _ = Nothing
+
+
+archiveToRelationships :: Archive -> [Relationship]
+archiveToRelationships archive =
+ let relPaths = filter filePathIsRel (filesInArchive archive)
+ entries = mapMaybe (\f -> findEntryByPath f archive) relPaths
+ relElems = mapMaybe (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries
+ rels = mapMaybe relElemToRelationship $ concatMap elChildren relElems
+ in
+ rels
filePathIsMedia :: FilePath -> Bool
filePathIsMedia fp =
@@ -109,18 +313,6 @@ archiveToMedia :: Archive -> Media
archiveToMedia zf =
mapMaybe (getMediaPair zf) (filter filePathIsMedia (filesInArchive zf))
-data Numbering = Numbering NameSpaces [Numb] [AbstractNumb]
- deriving Show
-
-data Numb = Numb String String -- right now, only a key to an abstract num
- deriving Show
-
-data AbstractNumb = AbstractNumb String [Level]
- deriving Show
-
--- (ilvl, format, string, start)
-type Level = (String, String, String, Maybe Integer)
-
lookupLevel :: String -> String -> Numbering -> Maybe Level
lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
absNumId <- lookup numId $ map (\(Numb nid absnumid) -> (nid, absnumid)) numbs
@@ -148,7 +340,7 @@ absNumElemToAbsNum ns element |
let levelElems = findChildren
(QName "lvl" (lookup "w" ns) (Just "w"))
element
- levels = mapMaybe id $ map (levelElemToLevel ns) levelElems
+ levels = mapMaybe (levelElemToLevel ns) levelElems
return $ AbstractNumb absNumId levels
absNumElemToAbsNum _ _ = Nothing
@@ -167,8 +359,8 @@ levelElemToLevel ns element |
return (ilvl, fmt, txt, start)
levelElemToLevel _ _ = Nothing
-archiveToNumbering :: Archive -> Maybe Numbering
-archiveToNumbering zf =
+archiveToNumbering' :: Archive -> Maybe Numbering
+archiveToNumbering' zf = do
case findEntryByPath "word/numbering.xml" zf of
Nothing -> Just $ Numbering [] [] []
Just entry -> do
@@ -180,321 +372,281 @@ archiveToNumbering zf =
absNumElems = findChildren
(QName "abstractNum" (lookup "w" namespaces) (Just "w"))
numberingElem
- nums = mapMaybe id $ map (numElemToNum namespaces) numElems
- absNums = mapMaybe id $ map (absNumElemToAbsNum namespaces) absNumElems
+ nums = mapMaybe (numElemToNum namespaces) numElems
+ absNums = mapMaybe (absNumElemToAbsNum namespaces) absNumElems
return $ Numbering namespaces nums absNums
-data Notes = Notes NameSpaces (Maybe [(String, [BodyPart])]) (Maybe [(String, [BodyPart])])
- deriving Show
-
-noteElemToNote :: NameSpaces -> Element -> Maybe (String, [BodyPart])
-noteElemToNote ns element
- | qName (elName element) `elem` ["endnote", "footnote"] &&
- qURI (elName element) == (lookup "w" ns) =
- do
- noteId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element
- let bps = map fromJust
- $ filter isJust
- $ map (elemToBodyPart ns)
- $ filterChildrenName (isParOrTbl ns) element
- return $ (noteId, bps)
-noteElemToNote _ _ = Nothing
-
-getFootNote :: String -> Notes -> Maybe [BodyPart]
-getFootNote s (Notes _ fns _) = fns >>= (lookup s)
-
-getEndNote :: String -> Notes -> Maybe [BodyPart]
-getEndNote s (Notes _ _ ens) = ens >>= (lookup s)
+archiveToNumbering :: Archive -> Numbering
+archiveToNumbering archive =
+ fromMaybe (Numbering [] [] []) (archiveToNumbering' archive)
-elemToNotes :: NameSpaces -> String -> Element -> Maybe [(String, [BodyPart])]
+elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map String Element)
elemToNotes ns notetype element
- | qName (elName element) == (notetype ++ "s") &&
- qURI (elName element) == (lookup "w" ns) =
- Just $ map fromJust
- $ filter isJust
- $ map (noteElemToNote ns)
- $ findChildren (QName notetype (lookup "w" ns) (Just "w")) element
+ | isElem ns "w" (notetype ++ "s") element =
+ let pairs = mapMaybe
+ (\e -> findAttr (elemName ns "w" "id") e >>=
+ (\a -> Just (a, e)))
+ (findChildren (elemName ns "w" notetype) element)
+ in
+ Just $ M.fromList $ pairs
elemToNotes _ _ _ = Nothing
-archiveToNotes :: Archive -> Notes
-archiveToNotes zf =
- let fnElem = findEntryByPath "word/footnotes.xml" zf
- >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
- enElem = findEntryByPath "word/endnotes.xml" zf
- >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
- fn_namespaces = case fnElem of
- Just e -> mapMaybe attrToNSPair (elAttribs e)
- Nothing -> []
- en_namespaces = case enElem of
- Just e -> mapMaybe attrToNSPair (elAttribs e)
- Nothing -> []
- ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
- fn = fnElem >>= (elemToNotes ns "footnote")
- en = enElem >>= (elemToNotes ns "endnote")
- in
- Notes ns fn en
+---------------------------------------------
+---------------------------------------------
+elemName :: NameSpaces -> String -> String -> QName
+elemName ns prefix name = (QName name (lookup prefix ns) (Just prefix))
-data Relationship = Relationship (RelId, Target)
- deriving Show
+isElem :: NameSpaces -> String -> String -> Element -> Bool
+isElem ns prefix name element =
+ qName (elName element) == name &&
+ qURI (elName element) == (lookup prefix ns)
-lookupRelationship :: RelId -> [Relationship] -> Maybe Target
-lookupRelationship relid rels =
- lookup relid (map (\(Relationship pair) -> pair) rels)
-filePathIsRel :: FilePath -> Bool
-filePathIsRel fp =
- let (dir, name) = splitFileName fp
+elemToTblGrid :: NameSpaces -> Element -> D TblGrid
+elemToTblGrid ns element | isElem ns "w" "tblGrid" element =
+ let cols = findChildren (elemName ns "w" "gridCol") element
in
- (dir == "word/_rels/") && ((takeExtension name) == ".rels")
-
-relElemToRelationship :: Element -> Maybe Relationship
-relElemToRelationship element | qName (elName element) == "Relationship" =
- do
- relId <- findAttr (QName "Id" Nothing Nothing) element
- target <- findAttr (QName "Target" Nothing Nothing) element
- return $ Relationship (relId, target)
-relElemToRelationship _ = Nothing
-
-
-archiveToRelationships :: Archive -> [Relationship]
-archiveToRelationships archive =
- let relPaths = filter filePathIsRel (filesInArchive archive)
- entries = map fromJust $ filter isJust $ map (\f -> findEntryByPath f archive) relPaths
- relElems = map fromJust $ filter isJust $ map (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries
- rels = map fromJust $ filter isJust $ map relElemToRelationship $ concatMap elChildren relElems
+ mapD (\e -> maybeToD (findAttr (elemName ns "w" "val") e >>= stringToInteger))
+ cols
+elemToTblGrid _ _ = throwError WrongElem
+
+elemToTblLook :: NameSpaces -> Element -> D TblLook
+elemToTblLook ns element | isElem ns "w" "tblLook" element =
+ let firstRow = findAttr (elemName ns "w" "firstRow") element
+ val = findAttr (elemName ns "w" "val") element
+ firstRowFmt =
+ case firstRow of
+ Just "1" -> True
+ Just _ -> False
+ Nothing -> case val of
+ Just bitMask -> testBitMask bitMask 0x020
+ Nothing -> False
in
- rels
-
-data Body = Body [BodyPart]
- deriving Show
+ return $ TblLook{firstRowFormatting = firstRowFmt}
+elemToTblLook _ _ = throwError WrongElem
-isParOrTbl :: NameSpaces -> QName -> Bool
-isParOrTbl ns q = qName q `elem` ["p", "tbl"] &&
- qURI q == (lookup "w" ns)
+elemToRow :: NameSpaces -> Element -> D Row
+elemToRow ns element | isElem ns "w" "tr" element =
+ do
+ let cellElems = findChildren (elemName ns "w" "tc") element
+ cells <- mapD (elemToCell ns) cellElems
+ return $ Row cells
+elemToRow _ _ = throwError WrongElem
-elemToBody :: NameSpaces -> Element -> Maybe Body
-elemToBody ns element | qName (elName element) == "body" && qURI (elName element) == (lookup "w" ns) =
- Just $ Body
- $ map fromJust
- $ filter isJust
- $ map (elemToBodyPart ns) $ filterChildrenName (isParOrTbl ns) element
-elemToBody _ _ = Nothing
+elemToCell :: NameSpaces -> Element -> D Cell
+elemToCell ns element | isElem ns "w" "tc" element =
+ do
+ cellContents <- mapD (elemToBodyPart ns) (elChildren element)
+ return $ Cell cellContents
+elemToCell _ _ = throwError WrongElem
+
+elemToParIndentation :: NameSpaces -> Element -> Maybe ParIndentation
+elemToParIndentation ns element | isElem ns "w" "ind" element =
+ Just $ ParIndentation {
+ leftParIndent =
+ findAttr (QName "left" (lookup "w" ns) (Just "w")) element >>=
+ stringToInteger
+ , rightParIndent =
+ findAttr (QName "right" (lookup "w" ns) (Just "w")) element >>=
+ stringToInteger
+ , hangingParIndent =
+ findAttr (QName "hanging" (lookup "w" ns) (Just "w")) element >>=
+ stringToInteger}
+elemToParIndentation _ _ = Nothing
-isRunOrLinkOrBookmark :: NameSpaces -> QName -> Bool
-isRunOrLinkOrBookmark ns q = qName q `elem` ["r", "hyperlink", "bookmarkStart"] &&
- qURI q == (lookup "w" ns)
elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String)
-elemToNumInfo ns element
- | qName (elName element) == "p" &&
- qURI (elName element) == (lookup "w" ns) =
- do
- pPr <- findChild (QName "pPr" (lookup "w" ns) (Just "w")) element
- numPr <- findChild (QName "numPr" (lookup "w" ns) (Just "w")) pPr
- lvl <- findChild (QName "ilvl" (lookup "w" ns) (Just "w")) numPr >>=
- findAttr (QName "val" (lookup "w" ns) (Just "w"))
- numId <- findChild (QName "numId" (lookup "w" ns) (Just "w")) numPr >>=
- findAttr (QName "val" (lookup "w" ns) (Just "w"))
- return (numId, lvl)
+elemToNumInfo ns element | isElem ns "w" "p" element = do
+ let pPr = findChild (elemName ns "w" "pPr") element
+ numPr = pPr >>= findChild (elemName ns "w" "numPr")
+ lvl <- numPr >>=
+ findChild (elemName ns "w" "ilvl") >>=
+ findAttr (elemName ns "w" "val")
+ numId <- numPr >>=
+ findChild (elemName ns "w" "numId") >>=
+ findAttr (elemName ns "w" "val")
+ return (numId, lvl)
elemToNumInfo _ _ = Nothing
--- isBookMarkTag :: NameSpaces -> QName -> Bool
--- isBookMarkTag ns q = qName q `elem` ["bookmarkStart", "bookmarkEnd"] &&
--- qURI q == (lookup "w" ns)
-
--- parChildrenToBookmark :: NameSpaces -> [Element] -> BookMark
--- parChildrenToBookmark ns (bms : bme : _)
--- | qName (elName bms) == "bookmarkStart" &&
--- qURI (elName bms) == (lookup "w" ns) &&
--- qName (elName bme) == "bookmarkEnd" &&
--- qURI (elName bme) == (lookup "w" ns) = do
--- bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) bms
--- bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) bms
--- return $ (bmId, bmName)
--- parChildrenToBookmark _ _ = Nothing
-
-elemToBodyPart :: NameSpaces -> Element -> Maybe BodyPart
-elemToBodyPart ns element
- | qName (elName element) == "p" &&
- qURI (elName element) == (lookup "w" ns) =
- let parstyle = elemToParagraphStyle ns element
- parparts = mapMaybe id
- $ map (elemToParPart ns)
- $ filterChildrenName (isRunOrLinkOrBookmark ns) element
- in
- case elemToNumInfo ns element of
- Just (numId, lvl) -> Just $ ListItem parstyle numId lvl parparts
- Nothing -> Just $ Paragraph parstyle parparts
- | qName (elName element) == "tbl" &&
- qURI (elName element) == (lookup "w" ns) =
- let
- caption = findChild (QName "tblPr" (lookup "w" ns) (Just "w")) element
- >>= findChild (QName "tblCaption" (lookup "w" ns) (Just "w"))
- >>= findAttr (QName "val" (lookup "w" ns) (Just "w"))
- grid = case
- findChild (QName "tblGrid" (lookup "w" ns) (Just "w")) element
- of
- Just g -> elemToTblGrid ns g
- Nothing -> []
- tblLook = findChild (QName "tblPr" (lookup "w" ns) (Just "w")) element
- >>= findChild (QName "tblLook" (lookup "w" ns) (Just "w"))
- >>= elemToTblLook ns
- in
- Just $ Tbl
- (fromMaybe "" caption)
- grid
- (fromMaybe defaultTblLook tblLook)
- (mapMaybe (elemToRow ns) (elChildren element))
- | otherwise = Nothing
-
-elemToTblLook :: NameSpaces -> Element -> Maybe TblLook
-elemToTblLook ns element
- | qName (elName element) == "tblLook" &&
- qURI (elName element) == (lookup "w" ns) =
- let firstRow = findAttr (QName "firstRow" (lookup "w" ns) (Just "w")) element
- val = findAttr (QName "val" (lookup "w" ns) (Just "w")) element
- firstRowFmt =
- case firstRow of
- Just "1" -> True
- Just _ -> False
- Nothing -> case val of
- Just bitMask -> testBitMask bitMask 0x020
- Nothing -> False
- in
- Just $ TblLook{firstRowFormatting = firstRowFmt}
-elemToTblLook _ _ = Nothing
-
testBitMask :: String -> Int -> Bool
testBitMask bitMaskS n =
case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of
[] -> False
((n', _) : _) -> ((n' .|. n) /= 0)
-data ParagraphStyle = ParagraphStyle { pStyle :: [String]
- , indent :: Maybe Integer
- }
- deriving Show
-
-defaultParagraphStyle :: ParagraphStyle
-defaultParagraphStyle = ParagraphStyle { pStyle = []
- , indent = Nothing
- }
-
-elemToParagraphStyle :: NameSpaces -> Element -> ParagraphStyle
-elemToParagraphStyle ns element =
- case findChild (QName "pPr" (lookup "w" ns) (Just "w")) element of
- Just pPr ->
- ParagraphStyle
- {pStyle =
- mapMaybe id $
- map
- (findAttr (QName "val" (lookup "w" ns) (Just "w")))
- (findChildren (QName "pStyle" (lookup "w" ns) (Just "w")) pPr)
- , indent =
- findChild (QName "ind" (lookup "w" ns) (Just "w")) pPr >>=
- findAttr (QName "left" (lookup "w" ns) (Just "w")) >>=
- stringToInteger
- }
- Nothing -> defaultParagraphStyle
-
-
-data BodyPart = Paragraph ParagraphStyle [ParPart]
- | ListItem ParagraphStyle String String [ParPart]
- | Tbl String TblGrid TblLook [Row]
-
- deriving Show
-
-type TblGrid = [Integer]
-
-data TblLook = TblLook {firstRowFormatting::Bool}
- deriving Show
-
-defaultTblLook :: TblLook
-defaultTblLook = TblLook{firstRowFormatting = False}
-
stringToInteger :: String -> Maybe Integer
stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)])
-elemToTblGrid :: NameSpaces -> Element -> TblGrid
-elemToTblGrid ns element
- | qName (elName element) == "tblGrid" &&
- qURI (elName element) == (lookup "w" ns) =
- let
- cols = findChildren (QName "gridCol" (lookup "w" ns) (Just "w")) element
- in
- mapMaybe (\e ->
- findAttr (QName "val" (lookup "w" ns) (Just ("w"))) e
- >>= stringToInteger
- )
- cols
-elemToTblGrid _ _ = []
-
-data Row = Row [Cell]
- deriving Show
-
-
-elemToRow :: NameSpaces -> Element -> Maybe Row
-elemToRow ns element
- | qName (elName element) == "tr" &&
- qURI (elName element) == (lookup "w" ns) =
- let
- cells = findChildren (QName "tc" (lookup "w" ns) (Just "w")) element
- in
- Just $ Row (mapMaybe (elemToCell ns) cells)
-elemToRow _ _ = Nothing
+elemToBodyPart :: NameSpaces -> Element -> D BodyPart
+elemToBodyPart ns element
+ | isElem ns "w" "p" element
+ , (c:_) <- findChildren (elemName ns "m" "oMathPara") element =
+ do
+ expsLst <- eitherToD $ readOMML $ showElement c
+ return $ OMathPara expsLst
+elemToBodyPart ns element
+ | isElem ns "w" "p" element
+ , Just (numId, lvl) <- elemToNumInfo ns element = do
+ let parstyle = elemToParagraphStyle ns element
+ parparts <- mapD (elemToParPart ns) (elChildren element)
+ num <- asks envNumbering
+ case lookupLevel numId lvl num of
+ Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts
+ Nothing -> throwError WrongElem
+elemToBodyPart ns element
+ | isElem ns "w" "p" element = do
+ let parstyle = elemToParagraphStyle ns element
+ parparts <- mapD (elemToParPart ns) (elChildren element)
+ return $ Paragraph parstyle parparts
+elemToBodyPart ns element
+ | isElem ns "w" "tbl" element = do
+ let caption' = findChild (elemName ns "w" "tblPr") element
+ >>= findChild (elemName ns "w" "tblCaption")
+ >>= findAttr (elemName ns "w" "val")
+ caption = (fromMaybe "" caption')
+ grid' = case findChild (elemName ns "w" "tblGrid") element of
+ Just g -> elemToTblGrid ns g
+ Nothing -> return []
+ tblLook' = case findChild (elemName ns "w" "tblPr") element >>=
+ findChild (elemName ns "w" "tblLook")
+ of
+ Just l -> elemToTblLook ns l
+ Nothing -> return defaultTblLook
+
+ grid <- grid'
+ tblLook <- tblLook'
+ rows <- mapD (elemToRow ns) (elChildren element)
+ return $ Tbl caption grid tblLook rows
+elemToBodyPart _ _ = throwError WrongElem
-data Cell = Cell [BodyPart]
- deriving Show
+lookupRelationship :: RelId -> [Relationship] -> Maybe Target
+lookupRelationship relid rels =
+ lookup relid (map (\(Relationship pair) -> pair) rels)
-elemToCell :: NameSpaces -> Element -> Maybe Cell
-elemToCell ns element
- | qName (elName element) == "tc" &&
- qURI (elName element) == (lookup "w" ns) =
- Just $ Cell (mapMaybe (elemToBodyPart ns) (elChildren element))
-elemToCell _ _ = Nothing
+expandDrawingId :: String -> D (FilePath, B.ByteString)
+expandDrawingId s = do
+ target <- asks (lookupRelationship s . envRelationships)
+ case target of
+ Just filepath -> do
+ bytes <- asks (lookup (combine "word" filepath) . envMedia)
+ case bytes of
+ Just bs -> return (filepath, bs)
+ Nothing -> throwError DocxError
+ Nothing -> throwError DocxError
+
+elemToParPart :: NameSpaces -> Element -> D ParPart
+elemToParPart ns element
+ | isElem ns "w" "r" element
+ , Just _ <- findChild (elemName ns "w" "drawing") element =
+ let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
+ drawing = findElement (QName "blip" (Just a_ns) (Just "a")) element
+ >>= findAttr (QName "embed" (lookup "r" ns) (Just "r"))
+ in
+ case drawing of
+ Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp bs)
+ Nothing -> throwError WrongElem
+elemToParPart ns element
+ | isElem ns "w" "r" element =
+ elemToRun ns element >>= (\r -> return $ PlainRun r)
+elemToParPart ns element
+ | isElem ns "w" "ins" element
+ , Just cId <- findAttr (elemName ns "w" "id") element
+ , Just cAuthor <- findAttr (elemName ns "w" "author") element
+ , Just cDate <- findAttr (elemName ns "w" "date") element = do
+ runs <- mapD (elemToRun ns) (elChildren element)
+ return $ Insertion cId cAuthor cDate runs
+elemToParPart ns element
+ | isElem ns "w" "del" element
+ , Just cId <- findAttr (elemName ns "w" "id") element
+ , Just cAuthor <- findAttr (elemName ns "w" "author") element
+ , Just cDate <- findAttr (elemName ns "w" "date") element = do
+ runs <- mapD (elemToRun ns) (elChildren element)
+ return $ Deletion cId cAuthor cDate runs
+elemToParPart ns element
+ | isElem ns "w" "bookmarkStart" element
+ , Just bmId <- findAttr (elemName ns "w" "id") element
+ , Just bmName <- findAttr (elemName ns "w" "name") element =
+ return $ BookMark bmId bmName
+elemToParPart ns element
+ | isElem ns "w" "hyperlink" element
+ , Just anchor <- findAttr (elemName ns "w" "anchor") element = do
+ runs <- mapD (elemToRun ns) (elChildren element)
+ return $ InternalHyperLink anchor runs
+elemToParPart ns element
+ | isElem ns "w" "hyperlink" element
+ , Just relId <- findAttr (elemName ns "r" "id") element = do
+ runs <- mapD (elemToRun ns) (elChildren element)
+ rels <- asks envRelationships
+ return $ case lookupRelationship relId rels of
+ Just target -> ExternalHyperLink target runs
+ Nothing -> ExternalHyperLink "" runs
+elemToParPart ns element
+ | isElem ns "m" "oMath" element =
+ (eitherToD $ readOMML $ showElement element) >>= (return . PlainOMath)
+elemToParPart _ _ = throwError WrongElem
-data ParPart = PlainRun Run
- | BookMark BookMarkId Anchor
- | InternalHyperLink Anchor [Run]
- | ExternalHyperLink RelId [Run]
- | Drawing String
- deriving Show
+lookupFootnote :: String -> Notes -> Maybe Element
+lookupFootnote s (Notes _ fns _) = fns >>= (M.lookup s)
-data Run = Run RunStyle [RunElem]
- | Footnote String
- | Endnote String
- deriving Show
+lookupEndnote :: String -> Notes -> Maybe Element
+lookupEndnote s (Notes _ _ ens) = ens >>= (M.lookup s)
-data RunElem = TextRun String | LnBrk | Tab
- deriving Show
+elemToRun :: NameSpaces -> Element -> D Run
+elemToRun ns element
+ | isElem ns "w" "r" element
+ , Just drawingElem <- findChild (elemName ns "w" "drawing") element =
+ let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
+ drawing = findElement (QName "blip" (Just a_ns) (Just "a")) drawingElem
+ >>= findAttr (QName "embed" (lookup "r" ns) (Just "r"))
+ in
+ case drawing of
+ Just s -> expandDrawingId s >>=
+ (\(fp, bs) -> return $ InlineDrawing fp bs)
+ Nothing -> throwError WrongElem
+elemToRun ns element
+ | isElem ns "w" "r" element
+ , Just ref <- findChild (elemName ns "w" "footnoteReference") element
+ , Just fnId <- findAttr (elemName ns "w" "id") ref = do
+ notes <- asks envNotes
+ case lookupFootnote fnId notes of
+ Just e -> do bps <- mapD (elemToBodyPart ns) (elChildren e)
+ return $ Footnote bps
+ Nothing -> return $ Footnote []
+elemToRun ns element
+ | isElem ns "w" "r" element
+ , Just ref <- findChild (elemName ns "w" "endnoteReference") element
+ , Just enId <- findAttr (elemName ns "w" "id") ref = do
+ notes <- asks envNotes
+ case lookupEndnote enId notes of
+ Just e -> do bps <- mapD (elemToBodyPart ns) (elChildren e)
+ return $ Endnote bps
+ Nothing -> return $ Endnote []
+elemToRun ns element
+ | isElem ns "w" "r" element = do
+ runElems <- elemToRunElems ns element
+ return $ Run (elemToRunStyle ns element) runElems
+elemToRun _ _ = throwError WrongElem
-data RunStyle = RunStyle { isBold :: Bool
- , isItalic :: Bool
- , isSmallCaps :: Bool
- , isStrike :: Bool
- , isSuperScript :: Bool
- , isSubScript :: Bool
- , underline :: Maybe String
- , rStyle :: Maybe String }
- deriving Show
+elemToParagraphStyle :: NameSpaces -> Element -> ParagraphStyle
+elemToParagraphStyle ns element
+ | Just pPr <- findChild (elemName ns "w" "pPr") element =
+ ParagraphStyle
+ {pStyle =
+ mapMaybe
+ (findAttr (elemName ns "w" "val"))
+ (findChildren (elemName ns "w" "pStyle") pPr)
+ , indentation =
+ findChild (elemName ns "w" "ind") pPr >>=
+ elemToParIndentation ns
+ }
+elemToParagraphStyle _ _ = defaultParagraphStyle
-defaultRunStyle :: RunStyle
-defaultRunStyle = RunStyle { isBold = False
- , isItalic = False
- , isSmallCaps = False
- , isStrike = False
- , isSuperScript = False
- , isSubScript = False
- , underline = Nothing
- , rStyle = Nothing
- }
elemToRunStyle :: NameSpaces -> Element -> RunStyle
-elemToRunStyle ns element =
- case findChild (QName "rPr" (lookup "w" ns) (Just "w")) element of
- Just rPr ->
- RunStyle
+elemToRunStyle ns element
+ | Just rPr <- findChild (elemName ns "w" "rPr") element =
+ RunStyle
{
isBold = isJust $ findChild (QName "b" (lookup "w" ns) (Just "w")) rPr
, isItalic = isJust $ findChild (QName "i" (lookup "w" ns) (Just "w")) rPr
@@ -508,100 +660,65 @@ elemToRunStyle ns element =
(Just "subscript" ==
(findChild (QName "vertAlign" (lookup "w" ns) (Just "w")) rPr >>=
findAttr (QName "val" (lookup "w" ns) (Just "w"))))
- , underline =
+ , rUnderline =
findChild (QName "u" (lookup "w" ns) (Just "w")) rPr >>=
findAttr (QName "val" (lookup "w" ns) (Just "w"))
, rStyle =
findChild (QName "rStyle" (lookup "w" ns) (Just "w")) rPr >>=
findAttr (QName "val" (lookup "w" ns) (Just "w"))
}
- Nothing -> defaultRunStyle
+elemToRunStyle _ _ = defaultRunStyle
-elemToRun :: NameSpaces -> Element -> Maybe Run
-elemToRun ns element
- | qName (elName element) == "r" &&
- qURI (elName element) == (lookup "w" ns) =
- case
- findChild (QName "footnoteReference" (lookup "w" ns) (Just "w")) element >>=
- findAttr (QName "id" (lookup "w" ns) (Just "w"))
- of
- Just s -> Just $ Footnote s
- Nothing ->
- case
- findChild (QName "endnoteReference" (lookup "w" ns) (Just "w")) element >>=
- findAttr (QName "id" (lookup "w" ns) (Just "w"))
- of
- Just s -> Just $ Endnote s
- Nothing -> Just $
- Run (elemToRunStyle ns element)
- (elemToRunElems ns element)
-elemToRun _ _ = Nothing
-
-elemToRunElem :: NameSpaces -> Element -> Maybe RunElem
+elemToRunElem :: NameSpaces -> Element -> D RunElem
elemToRunElem ns element
- | qName (elName element) == "t" &&
- qURI (elName element) == (lookup "w" ns) =
- Just $ TextRun (strContent element)
- | qName (elName element) == "br" &&
- qURI (elName element) == (lookup "w" ns) =
- Just $ LnBrk
- | qName (elName element) == "tab" &&
- qURI (elName element) == (lookup "w" ns) =
- Just $ Tab
- | otherwise = Nothing
-
-
-elemToRunElems :: NameSpaces -> Element -> [RunElem]
+ | isElem ns "w" "t" element
+ || isElem ns "w" "delText" element
+ || isElem ns "m" "t" element = do
+ let str = strContent element
+ font <- asks envFont
+ case font of
+ Nothing -> return $ TextRun str
+ Just f -> return . TextRun $
+ map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str
+ | isElem ns "w" "br" element = return LnBrk
+ | isElem ns "w" "tab" element = return Tab
+ | isElem ns "w" "sym" element = return (getSymChar ns element)
+ | otherwise = throwError WrongElem
+ where
+ lowerFromPrivate (ord -> c)
+ | c >= ord '\xF000' = chr $ c - ord '\xF000'
+ | otherwise = chr c
+
+-- The char attribute is a hex string
+getSymChar :: NameSpaces -> Element -> RunElem
+getSymChar ns element
+ | Just s <- lowerFromPrivate <$> getCodepoint
+ , Just font <- getFont =
+ let [(char, _)] = readLitChar ("\\x" ++ s) in
+ TextRun . maybe "" (:[]) $ getUnicode font char
+ where
+ getCodepoint = findAttr (elemName ns "w" "char") element
+ getFont = stringToFont =<< findAttr (elemName ns "w" "font") element
+ lowerFromPrivate ('F':xs) = '0':xs
+ lowerFromPrivate xs = xs
+getSymChar _ _ = TextRun ""
+
+stringToFont :: String -> Maybe Font
+stringToFont "Symbol" = Just Symbol
+stringToFont _ = Nothing
+
+elemToRunElems :: NameSpaces -> Element -> D [RunElem]
elemToRunElems ns element
- | qName (elName element) == "r" &&
- qURI (elName element) == (lookup "w" ns) =
- mapMaybe (elemToRunElem ns) (elChildren element)
- | otherwise = []
-
-elemToDrawing :: NameSpaces -> Element -> Maybe ParPart
-elemToDrawing ns element
- | qName (elName element) == "drawing" &&
- qURI (elName element) == (lookup "w" ns) =
- let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
- in
- findElement (QName "blip" (Just a_ns) (Just "a")) element
- >>= findAttr (QName "embed" (lookup "r" ns) (Just "r"))
- >>= (\s -> Just $ Drawing s)
-elemToDrawing _ _ = Nothing
-
-
-elemToParPart :: NameSpaces -> Element -> Maybe ParPart
-elemToParPart ns element
- | qName (elName element) == "r" &&
- qURI (elName element) == (lookup "w" ns) =
- case findChild (QName "drawing" (lookup "w" ns) (Just "w")) element of
- Just drawingElem -> elemToDrawing ns drawingElem
- Nothing -> do
- r <- elemToRun ns element
- return $ PlainRun r
-elemToParPart ns element
- | qName (elName element) == "bookmarkStart" &&
- qURI (elName element) == (lookup "w" ns) = do
- bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element
- bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) element
- return $ BookMark bmId bmName
-elemToParPart ns element
- | qName (elName element) == "hyperlink" &&
- qURI (elName element) == (lookup "w" ns) =
- let runs = map fromJust $ filter isJust $ map (elemToRun ns)
- $ findChildren (QName "r" (lookup "w" ns) (Just "w")) element
- in
- case findAttr (QName "anchor" (lookup "w" ns) (Just "w")) element of
- Just anchor ->
- Just $ InternalHyperLink anchor runs
- Nothing ->
- case findAttr (QName "id" (lookup "r" ns) (Just "r")) element of
- Just relId -> Just $ ExternalHyperLink relId runs
- Nothing -> Nothing
-elemToParPart _ _ = Nothing
-
-type Target = String
-type Anchor = String
-type BookMarkId = String
-type RelId = String
-
+ | isElem ns "w" "r" element
+ || isElem ns "m" "r" element = do
+ let qualName = elemName ns "w"
+ let font = do
+ fontElem <- findElement (qualName "rFonts") element
+ stringToFont =<<
+ (foldr (<|>) Nothing $
+ map (flip findAttr fontElem . qualName) ["ascii", "hAnsi"])
+ local (setFont font) (mapD (elemToRunElem ns) (elChildren element))
+elemToRunElems _ _ = throwError WrongElem
+
+setFont :: Maybe Font -> ReaderEnv -> ReaderEnv
+setFont f s = s{envFont = f}
diff --git a/src/Text/Pandoc/Readers/Docx/Reducible.hs b/src/Text/Pandoc/Readers/Docx/Reducible.hs
new file mode 100644
index 000000000..2dbef4131
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx/Reducible.hs
@@ -0,0 +1,183 @@
+{-# LANGUAGE OverloadedStrings, PatternGuards #-}
+
+{-
+Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Docx.Reducible
+ Copyright : Copyright (C) 2014 Jesse Rosenthal
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
+ Stability : alpha
+ Portability : portable
+
+Typeclass for combining adjacent blocks and inlines correctly.
+-}
+
+
+module Text.Pandoc.Readers.Docx.Reducible ((<++>),
+ (<+++>),
+ Reducible,
+ Container(..),
+ container,
+ innards,
+ reduceList,
+ reduceListB,
+ rebuild)
+ where
+
+import Text.Pandoc.Builder
+import Data.List ((\\), intersect)
+
+data Container a = Container ([a] -> a) | NullContainer
+
+instance (Eq a) => Eq (Container a) where
+ (Container x) == (Container y) = ((x []) == (y []))
+ NullContainer == NullContainer = True
+ _ == _ = False
+
+instance (Show a) => Show (Container a) where
+ show (Container x) = "Container {" ++
+ (reverse $ drop 3 $ reverse $ show $ x []) ++
+ "}"
+ show (NullContainer) = "NullContainer"
+
+class Reducible a where
+ (<++>) :: a -> a -> [a]
+ container :: a -> Container a
+ innards :: a -> [a]
+ isSpace :: a -> Bool
+
+(<+++>) :: (Reducible a) => Many a -> Many a -> Many a
+mr <+++> ms = fromList $ reduceList $ toList mr ++ toList ms
+
+reduceListB :: (Reducible a) => Many a -> Many a
+reduceListB = fromList . reduceList . toList
+
+reduceList' :: (Reducible a) => [a] -> [a] -> [a]
+reduceList' acc [] = acc
+reduceList' [] (x:xs) = reduceList' [x] xs
+reduceList' as (x:xs) = reduceList' (init as ++ (last as <++> x) ) xs
+
+reduceList :: (Reducible a) => [a] -> [a]
+reduceList = reduceList' []
+
+combineReducibles :: (Reducible a, Eq a) => a -> a -> [a]
+combineReducibles r s =
+ let (conts, rs) = topLevelContainers r
+ (conts', ss) = topLevelContainers s
+ shared = conts `intersect` conts'
+ remaining = conts \\ shared
+ remaining' = conts' \\ shared
+ in
+ case null shared of
+ True | (x : xs) <- reverse rs
+ , isSpace x ->
+ rebuild conts (reverse xs) ++ [x, s]
+ | (x : xs) <- ss
+ , isSpace x ->
+ [r, x] ++ rebuild conts' (xs)
+ True -> [r,s]
+ False -> rebuild
+ shared $
+ reduceList $
+ (rebuild remaining rs) ++ (rebuild remaining' ss)
+
+instance Reducible Inline where
+ s1@(Span (id1, classes1, kvs1) ils1) <++> s2@(Span (id2, classes2, kvs2) ils2) =
+ let classes' = classes1 `intersect` classes2
+ kvs' = kvs1 `intersect` kvs2
+ classes1' = classes1 \\ classes'
+ kvs1' = kvs1 \\ kvs'
+ classes2' = classes2 \\ classes'
+ kvs2' = kvs2 \\ kvs'
+ in
+ case null classes' && null kvs' of
+ True -> [s1,s2]
+ False -> let attr' = ("", classes', kvs')
+ attr1' = (id1, classes1', kvs1')
+ attr2' = (id2, classes2', kvs2')
+ s1' = case null classes1' && null kvs1' of
+ True -> ils1
+ False -> [Span attr1' ils1]
+ s2' = case null classes2' && null kvs2' of
+ True -> ils2
+ False -> [Span attr2' ils2]
+ in
+ [Span attr' $ reduceList $ s1' ++ s2']
+
+ (Str x) <++> (Str y) = [Str (x++y)]
+ il <++> il' = combineReducibles il il'
+
+ container (Emph _) = Container Emph
+ container (Strong _) = Container Strong
+ container (Strikeout _) = Container Strikeout
+ container (Subscript _) = Container Subscript
+ container (Superscript _) = Container Superscript
+ container (Quoted qt _) = Container $ Quoted qt
+ container (Cite cs _) = Container $ Cite cs
+ container (Span attr _) = Container $ Span attr
+ container _ = NullContainer
+
+ innards (Emph ils) = ils
+ innards (Strong ils) = ils
+ innards (Strikeout ils) = ils
+ innards (Subscript ils) = ils
+ innards (Superscript ils) = ils
+ innards (Quoted _ ils) = ils
+ innards (Cite _ ils) = ils
+ innards (Span _ ils) = ils
+ innards _ = []
+
+ isSpace Space = True
+ isSpace _ = False
+
+instance Reducible Block where
+ (Div (ident, classes, kvs) blks) <++> blk | "list-item" `elem` classes =
+ [Div (ident, classes, kvs) (reduceList blks), blk]
+
+ blk <++> blk' = combineReducibles blk blk'
+
+ container (BlockQuote _) = Container BlockQuote
+ container (Div attr _) = Container $ Div attr
+ container _ = NullContainer
+
+ innards (BlockQuote bs) = bs
+ innards (Div _ bs) = bs
+ innards _ = []
+
+ isSpace _ = False
+
+
+topLevelContainers' :: (Reducible a) => [a] -> ([Container a], [a])
+topLevelContainers' (r : []) = case container r of
+ NullContainer -> ([], [r])
+ _ ->
+ let (conts, inns) = topLevelContainers' (innards r)
+ in
+ ((container r) : conts, inns)
+topLevelContainers' rs = ([], rs)
+
+topLevelContainers :: (Reducible a) => a -> ([Container a], [a])
+topLevelContainers il = topLevelContainers' [il]
+
+rebuild :: [Container a] -> [a] -> [a]
+rebuild [] xs = xs
+rebuild ((Container f) : cs) xs = rebuild cs $ [f xs]
+rebuild (NullContainer : cs) xs = rebuild cs $ xs
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
new file mode 100644
index 000000000..f900c0adc
--- /dev/null
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -0,0 +1,285 @@
+{-# LANGUAGE
+ ViewPatterns
+ , StandaloneDeriving
+ , TupleSections
+ , FlexibleContexts #-}
+
+module Text.Pandoc.Readers.EPUB
+ (readEPUB)
+ where
+
+import Text.XML.Light
+import Text.Pandoc.Definition hiding (Attr)
+import Text.Pandoc.Walk (walk, query)
+import Text.Pandoc.Generic(bottomUp)
+import Text.Pandoc.Readers.HTML (readHtml)
+import Text.Pandoc.Options ( ReaderOptions(..), readerTrace)
+import Text.Pandoc.Shared (escapeURI, collapseFilePath, addMetaField)
+import Text.Pandoc.MediaBag (MediaBag, insertMedia)
+import Text.Pandoc.Compat.Except (MonadError, throwError, runExcept, Except)
+import qualified Text.Pandoc.Builder as B
+import Codec.Archive.Zip ( Archive (..), toArchive, fromEntry
+ , findEntryByPath, Entry)
+import qualified Data.ByteString.Lazy as BL (ByteString)
+import System.FilePath ( takeFileName, (</>), dropFileName, normalise
+ , dropFileName
+ , splitFileName )
+import qualified Text.Pandoc.UTF8 as UTF8 (toStringLazy)
+import Control.Applicative ((<$>))
+import Control.Monad (guard, liftM, when)
+import Data.Monoid (mempty, (<>))
+import Data.List (isPrefixOf, isInfixOf)
+import Data.Maybe (mapMaybe, fromMaybe)
+import qualified Data.Map as M (Map, lookup, fromList, elems)
+import Control.DeepSeq.Generics (deepseq, NFData)
+
+import Debug.Trace (trace)
+
+type MIME = String
+
+type Items = M.Map String (FilePath, MIME)
+
+readEPUB :: ReaderOptions -> BL.ByteString -> (Pandoc, MediaBag)
+readEPUB opts bytes = runEPUB (archiveToEPUB opts $ toArchive bytes)
+
+runEPUB :: Except String a -> a
+runEPUB = either error id . runExcept
+
+-- Note that internal reference are aggresively normalised so that all ids
+-- are of the form "filename#id"
+--
+archiveToEPUB :: (MonadError String m) => ReaderOptions -> Archive -> m (Pandoc, MediaBag)
+archiveToEPUB os archive = do
+ -- root is path to folder with manifest file in
+ (root, content) <- getManifest archive
+ meta <- parseMeta content
+ (cover, items) <- parseManifest content
+ -- No need to collapse here as the image path is from the manifest file
+ let coverDoc = fromMaybe mempty (imageToPandoc <$> cover)
+ spine <- parseSpine items content
+ let escapedSpine = map (escapeURI . takeFileName . fst) spine
+ Pandoc _ bs <-
+ foldM' (\a b -> ((a <>) . bottomUp (prependHash escapedSpine))
+ `liftM` parseSpineElem root b) mempty spine
+ let ast = coverDoc <> (Pandoc meta bs)
+ let mediaBag = fetchImages (M.elems items) root archive ast
+ return $ (ast, mediaBag)
+ where
+ os' = os {readerParseRaw = True}
+ parseSpineElem :: MonadError String m => FilePath -> (FilePath, MIME) -> m Pandoc
+ parseSpineElem (normalise -> r) (normalise -> path, mime) = do
+ when (readerTrace os) (traceM path)
+ doc <- mimeToReader mime r path
+ let docSpan = B.doc $ B.para $ B.spanWith (takeFileName path, [], []) mempty
+ return $ docSpan <> doc
+ mimeToReader :: MonadError String m => MIME -> FilePath -> FilePath -> m Pandoc
+ mimeToReader "application/xhtml+xml" (normalise -> root) (normalise -> path) = do
+ fname <- findEntryByPathE (root </> path) archive
+ return $ fixInternalReferences path .
+ readHtml os' .
+ UTF8.toStringLazy $
+ fromEntry fname
+ mimeToReader s _ path
+ | s `elem` imageMimes = return $ imageToPandoc path
+ | otherwise = return $ mempty
+
+-- paths should be absolute when this function is called
+-- renameImages should do this
+fetchImages :: [(FilePath, MIME)]
+ -> FilePath -- ^ Root
+ -> Archive
+ -> Pandoc
+ -> MediaBag
+fetchImages mimes root arc (query iq -> links) =
+ foldr (uncurry3 insertMedia) mempty
+ (mapMaybe getEntry links)
+ where
+ getEntry link =
+ let abslink = root </> link in
+ (link , lookup link mimes, ) . fromEntry
+ <$> findEntryByPath abslink arc
+
+iq :: Inline -> [FilePath]
+iq (Image _ (url, _)) = [url]
+iq _ = []
+
+-- Remove relative paths
+renameImages :: FilePath -> Inline -> Inline
+renameImages root (Image a (url, b)) = Image a (collapseFilePath (root </> url), b)
+renameImages _ x = x
+
+imageToPandoc :: FilePath -> Pandoc
+imageToPandoc s = B.doc . B.para $ B.image s "" mempty
+
+imageMimes :: [String]
+imageMimes = ["image/gif", "image/jpeg", "image/png"]
+
+type CoverImage = FilePath
+
+parseManifest :: (MonadError String m) => Element -> m (Maybe CoverImage, Items)
+parseManifest content = do
+ manifest <- findElementE (dfName "manifest") content
+ let items = findChildren (dfName "item") manifest
+ r <- mapM parseItem items
+ let cover = findAttr (emptyName "href") =<< filterChild findCover manifest
+ return (cover, (M.fromList r))
+ where
+ findCover e = maybe False (isInfixOf "cover-image")
+ (findAttr (emptyName "properties") e)
+ parseItem e = do
+ uid <- findAttrE (emptyName "id") e
+ href <- findAttrE (emptyName "href") e
+ mime <- findAttrE (emptyName "media-type") e
+ return (uid, (href, mime))
+
+parseSpine :: MonadError String m => Items -> Element -> m [(FilePath, MIME)]
+parseSpine is e = do
+ spine <- findElementE (dfName "spine") e
+ let itemRefs = findChildren (dfName "itemref") spine
+ mapM (mkE "parseSpine" . (flip M.lookup is)) $ mapMaybe parseItemRef itemRefs
+ where
+ parseItemRef ref = do
+ let linear = maybe True (== "yes") (findAttr (emptyName "linear") ref)
+ guard linear
+ findAttr (emptyName "idref") ref
+
+parseMeta :: MonadError String m => Element -> m Meta
+parseMeta content = do
+ meta <- findElementE (dfName "metadata") content
+ let dcspace (QName _ (Just "http://purl.org/dc/elements/1.1/") (Just "dc")) = True
+ dcspace _ = False
+ let dcs = filterChildrenName dcspace meta
+ let r = foldr parseMetaItem nullMeta dcs
+ return r
+
+-- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-metadata-elem
+parseMetaItem :: Element -> Meta -> Meta
+parseMetaItem e@(stripNamespace . elName -> field) meta =
+ addMetaField (renameMeta field) (B.str $ strContent e) meta
+
+renameMeta :: String -> String
+renameMeta "creator" = "author"
+renameMeta s = s
+
+getManifest :: MonadError String m => Archive -> m (String, Element)
+getManifest archive = do
+ metaEntry <- findEntryByPathE ("META-INF" </> "container.xml") archive
+ docElem <- (parseXMLDocE . UTF8.toStringLazy . fromEntry) metaEntry
+ let namespaces = mapMaybe attrToNSPair (elAttribs docElem)
+ ns <- mkE "xmlns not in namespaces" (lookup "xmlns" namespaces)
+ as <- liftM ((map attrToPair) . elAttribs)
+ (findElementE (QName "rootfile" (Just ns) Nothing) docElem)
+ manifestFile <- mkE "Root not found" (lookup "full-path" as)
+ let rootdir = dropFileName manifestFile
+ --mime <- lookup "media-type" as
+ manifest <- findEntryByPathE manifestFile archive
+ liftM ((,) rootdir) (parseXMLDocE . UTF8.toStringLazy . fromEntry $ manifest)
+
+-- Fixup
+
+fixInternalReferences :: FilePath -> Pandoc -> Pandoc
+fixInternalReferences pathToFile =
+ (walk $ renameImages root)
+ . (walk normalisePath)
+ . (walk $ fixBlockIRs filename)
+ . (walk $ fixInlineIRs filename)
+ where
+ (root, escapeURI -> filename) = splitFileName pathToFile
+
+fixInlineIRs :: String -> Inline -> Inline
+fixInlineIRs s (Span as v) =
+ Span (fixAttrs s as) v
+fixInlineIRs s (Code as code) =
+ Code (fixAttrs s as) code
+fixInlineIRs s (Link t ('#':url, tit)) =
+ Link t (addHash s url, tit)
+fixInlineIRs _ v = v
+
+normalisePath :: Inline -> Inline
+normalisePath (Link t (url, tit)) =
+ let (path, uid) = span (/= '#') url in
+ Link t (takeFileName path ++ uid, tit)
+normalisePath s = s
+
+prependHash :: [String] -> Inline -> Inline
+prependHash ps l@(Link is (url, tit))
+ | or [s `isPrefixOf` url | s <- ps] =
+ Link is ('#':url, tit)
+ | otherwise = l
+prependHash _ i = i
+
+fixBlockIRs :: String -> Block -> Block
+fixBlockIRs s (Div as b) =
+ Div (fixAttrs s as) b
+fixBlockIRs s (Header i as b) =
+ Header i (fixAttrs s as) b
+fixBlockIRs s (CodeBlock as code) =
+ CodeBlock (fixAttrs s as) code
+fixBlockIRs _ b = b
+
+fixAttrs :: FilePath -> B.Attr -> B.Attr
+fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . null) cs, removeEPUBAttrs kvs)
+
+addHash :: String -> String -> String
+addHash _ "" = ""
+addHash s ident = s ++ "#" ++ ident
+
+removeEPUBAttrs :: [(String, String)] -> [(String, String)]
+removeEPUBAttrs kvs = filter (not . isEPUBAttr) kvs
+
+isEPUBAttr :: (String, String) -> Bool
+isEPUBAttr (k, _) = "epub:" `isPrefixOf` k
+
+-- Library
+
+-- Strict version of foldM
+foldM' :: (Monad m, NFData a) => (a -> b -> m a) -> a -> [b] -> m a
+foldM' _ z [] = return z
+foldM' f z (x:xs) = do
+ z' <- f z x
+ z' `deepseq` foldM' f z' xs
+
+uncurry3 :: (a -> b -> c -> d) -> (a, b, c) -> d
+uncurry3 f (a, b, c) = f a b c
+
+traceM :: Monad m => String -> m ()
+traceM = flip trace (return ())
+
+-- Utility
+
+stripNamespace :: QName -> String
+stripNamespace (QName v _ _) = v
+
+attrToNSPair :: Attr -> Maybe (String, String)
+attrToNSPair (Attr (QName "xmlns" _ _) val) = Just ("xmlns", val)
+attrToNSPair _ = Nothing
+
+attrToPair :: Attr -> (String, String)
+attrToPair (Attr (QName name _ _) val) = (name, val)
+
+defaultNameSpace :: Maybe String
+defaultNameSpace = Just "http://www.idpf.org/2007/opf"
+
+dfName :: String -> QName
+dfName s = QName s defaultNameSpace Nothing
+
+emptyName :: String -> QName
+emptyName s = QName s Nothing Nothing
+
+-- Convert Maybe interface to Either
+
+findAttrE :: MonadError String m => QName -> Element -> m String
+findAttrE q e = mkE "findAttr" $ findAttr q e
+
+findEntryByPathE :: MonadError String m => FilePath -> Archive -> m Entry
+findEntryByPathE (normalise -> path) a =
+ mkE ("No entry on path: " ++ path) $ findEntryByPath path a
+
+parseXMLDocE :: MonadError String m => String -> m Element
+parseXMLDocE doc = mkE "Unable to parse XML doc" $ parseXMLDoc doc
+
+findElementE :: MonadError String m => QName -> Element -> m Element
+findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x
+
+mkE :: MonadError String m => String -> Maybe a -> m a
+mkE s = maybe (throwError s) return
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 552e8a251..1789b865f 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses #-}
{-
Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
@@ -40,41 +41,69 @@ import Text.HTML.TagSoup
import Text.HTML.TagSoup.Match
import Text.Pandoc.Definition
import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
-import Text.Pandoc.Shared
-import Text.Pandoc.Options
-import Text.Pandoc.Parsing
-import Data.Maybe ( fromMaybe, isJust )
-import Data.List ( intercalate )
+import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..))
+import Text.Pandoc.Shared ( extractSpaces, renderTags'
+ , escapeURI, safeRead )
+import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace)
+ , Extension (Ext_epub_html_exts,
+ Ext_native_divs, Ext_native_spans))
+import Text.Pandoc.Parsing hiding ((<|>))
+import Text.Pandoc.Walk
+import Data.Maybe ( fromMaybe, isJust)
+import Data.List ( intercalate, isInfixOf )
import Data.Char ( isDigit )
-import Control.Monad ( liftM, guard, when, mzero )
-import Control.Applicative ( (<$>), (<$), (<*) )
-import Data.Monoid
+import Control.Monad ( liftM, guard, when, mzero, void, unless )
+import Control.Arrow ((***))
+import Control.Applicative ( (<$>), (<$), (<*), (*>), (<|>))
+import Data.Monoid (mconcat, Monoid, mempty, (<>), First (..))
import Text.Printf (printf)
import Debug.Trace (trace)
+import Text.TeXMath (readMathML, writeTeX)
+import Data.Default (Default (..), def)
+import Control.Monad.Reader (Reader,ask, asks, local, runReader)
-isSpace :: Char -> Bool
-isSpace ' ' = True
-isSpace '\t' = True
-isSpace '\n' = True
-isSpace _ = False
-- | Convert HTML-formatted string to 'Pandoc' document.
readHtml :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assumes @'\n'@ line endings)
-> Pandoc
readHtml opts inp =
- case runParser parseDoc def{ stateOptions = opts } "source" tags of
+ case flip runReader def $ runParserT parseDoc (HTMLState def{ stateOptions = opts } []) "source" tags of
Left err' -> error $ "\nError at " ++ show err'
Right result -> result
- where tags = canonicalizeTags $
+ where tags = stripPrefixes . canonicalizeTags $
parseTagsOptions parseOptions{ optTagPosition = True } inp
parseDoc = do
blocks <- (fixPlains False) . mconcat <$> manyTill block eof
- meta <- stateMeta <$> getState
- return $ Pandoc meta (B.toList blocks)
+ meta <- stateMeta . parserState <$> getState
+ bs' <- replaceNotes (B.toList blocks)
+ return $ Pandoc meta bs'
+
+replaceNotes :: [Block] -> TagParser [Block]
+replaceNotes = walkM replaceNotes'
+
+replaceNotes' :: Inline -> TagParser Inline
+replaceNotes' (RawInline (Format "noteref") ref) = maybe (Str "") (Note . B.toList) . lookup ref <$> getNotes
+ where
+ getNotes = noteTable <$> getState
+replaceNotes' x = return x
-type TagParser = Parser [Tag String] ParserState
+data HTMLState =
+ HTMLState
+ { parserState :: ParserState,
+ noteTable :: [(String, Blocks)]
+ }
+
+data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext
+ , inChapter :: Bool -- ^ Set if in chapter section
+ }
+
+setInChapter :: HTMLParser s a -> HTMLParser s a
+setInChapter = local (\s -> s {inChapter = True})
+
+type HTMLParser s = ParserT s HTMLState (Reader HTMLLocal)
+
+type TagParser = HTMLParser [Tag String]
pBody :: TagParser Blocks
pBody = pInTags "body" block
@@ -98,7 +127,11 @@ block = do
tr <- getOption readerTrace
pos <- getPosition
res <- choice
- [ pPara
+ [ eSection
+ , eSwitch B.para block
+ , mempty <$ eFootnote
+ , mempty <$ eTOC
+ , pPara
, pHeader
, pBlockQuote
, pCodeBlock
@@ -115,6 +148,63 @@ block = do
(take 60 $ show $ B.toList res)) (return ())
return res
+namespaces :: [(String, TagParser Inlines)]
+namespaces = [(mathMLNamespace, pMath True)]
+
+mathMLNamespace :: String
+mathMLNamespace = "http://www.w3.org/1998/Math/MathML"
+
+eSwitch :: Monoid a => (Inlines -> a) -> TagParser a -> TagParser a
+eSwitch constructor parser = try $ do
+ guardEnabled Ext_epub_html_exts
+ pSatisfy (~== TagOpen "switch" [])
+ cases <- getFirst . mconcat <$>
+ manyTill (First <$> (eCase <* skipMany pBlank) )
+ (lookAhead $ try $ pSatisfy (~== TagOpen "default" []))
+ skipMany pBlank
+ fallback <- pInTags "default" (skipMany pBlank *> parser <* skipMany pBlank)
+ skipMany pBlank
+ pSatisfy (~== TagClose "switch")
+ return $ maybe fallback constructor cases
+
+eCase :: TagParser (Maybe Inlines)
+eCase = do
+ skipMany pBlank
+ TagOpen _ attr <- lookAhead $ pSatisfy $ (~== TagOpen "case" [])
+ case (flip lookup namespaces) =<< lookup "required-namespace" attr of
+ Just p -> Just <$> (pInTags "case" (skipMany pBlank *> p <* skipMany pBlank))
+ Nothing -> Nothing <$ manyTill pAnyTag (pSatisfy (~== TagClose "case"))
+
+eFootnote :: TagParser ()
+eFootnote = try $ do
+ let notes = ["footnote", "rearnote"]
+ guardEnabled Ext_epub_html_exts
+ (TagOpen tag attr) <- lookAhead $ pAnyTag
+ guard (maybe False (flip elem notes) (lookup "type" attr))
+ let ident = fromMaybe "" (lookup "id" attr)
+ content <- pInTags tag block
+ addNote ident content
+
+addNote :: String -> Blocks -> TagParser ()
+addNote uid cont = updateState (\s -> s {noteTable = (uid, cont) : (noteTable s)})
+
+eNoteref :: TagParser Inlines
+eNoteref = try $ do
+ guardEnabled Ext_epub_html_exts
+ TagOpen tag attr <- lookAhead $ pAnyTag
+ guard (maybe False (== "noteref") (lookup "type" attr))
+ let ident = maybe "" (dropWhile (== '#')) (lookup "href" attr)
+ guard (not (null ident))
+ pInTags tag block
+ return $ B.rawInline "noteref" ident
+
+-- Strip TOC if there is one, better to generate again
+eTOC :: TagParser ()
+eTOC = try $ do
+ guardEnabled Ext_epub_html_exts
+ (TagOpen tag attr) <- lookAhead $ pAnyTag
+ guard (maybe False (== "toc") (lookup "type" attr))
+ void (pInTags tag block)
pList :: TagParser Blocks
pList = pBulletList <|> pOrderedList <|> pDefinitionList
@@ -128,9 +218,15 @@ pBulletList = try $ do
-- note: if they have an <ol> or <ul> not in scope of a <li>,
-- treat it as a list item, though it's not valid xhtml...
skipMany nonItem
- items <- manyTill (pInTags "li" block >>~ skipMany nonItem) (pCloses "ul")
+ items <- manyTill (pListItem nonItem) (pCloses "ul")
return $ B.bulletList $ map (fixPlains True) items
+pListItem :: TagParser a -> TagParser Blocks
+pListItem nonItem = do
+ TagOpen _ attr <- lookAhead $ pSatisfy (~== TagOpen "li" [])
+ let liDiv = maybe mempty (\x -> B.divWith (x, [], []) mempty) (lookup "id" attr)
+ (liDiv <>) <$> pInTags "li" block <* skipMany nonItem
+
pOrderedList :: TagParser Blocks
pOrderedList = try $ do
TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" [])
@@ -156,7 +252,7 @@ pOrderedList = try $ do
-- note: if they have an <ol> or <ul> not in scope of a <li>,
-- treat it as a list item, though it's not valid xhtml...
skipMany nonItem
- items <- manyTill (pInTags "li" block >>~ skipMany nonItem) (pCloses "ol")
+ items <- manyTill (pListItem nonItem) (pCloses "ol")
return $ B.orderedListWith (start, style, DefaultDelim) $ map (fixPlains True) items
pDefinitionList :: TagParser Blocks
@@ -194,14 +290,14 @@ fixPlains inList bs = if any isParaish bs'
pRawTag :: TagParser String
pRawTag = do
tag <- pAnyTag
- let ignorable x = x `elem` ["html","head","body"]
+ let ignorable x = x `elem` ["html","head","body","!DOCTYPE","?xml"]
if tagOpen ignorable (const True) tag || tagClose ignorable tag
then return []
else return $ renderTags' [tag]
pDiv :: TagParser Blocks
pDiv = try $ do
- getOption readerParseRaw >>= guard
+ guardEnabled Ext_native_divs
TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="div") (const True)
contents <- pInTags "div" block
return $ B.divWith (mkAttr attr) contents
@@ -220,13 +316,35 @@ pHtmlBlock t = try $ do
contents <- manyTill pAnyTag (pSatisfy (~== TagClose t))
return $ renderTags' $ [open] ++ contents ++ [TagClose t]
+-- Sets chapter context
+eSection :: TagParser Blocks
+eSection = try $ do
+ let matchChapter as = maybe False (isInfixOf "chapter") (lookup "type" as)
+ let sectTag = tagOpen (`elem` sectioningContent) matchChapter
+ TagOpen tag _ <- lookAhead $ pSatisfy sectTag
+ setInChapter (pInTags tag block)
+
+headerLevel :: String -> TagParser Int
+headerLevel tagtype = do
+ let level = read (drop 1 tagtype)
+ (try $ do
+ guardEnabled Ext_epub_html_exts
+ asks inChapter >>= guard
+ return (level - 1))
+ <|>
+ return level
+
+
+
+
+
pHeader :: TagParser Blocks
pHeader = try $ do
TagOpen tagtype attr <- pSatisfy $
tagOpen (`elem` ["h1","h2","h3","h4","h5","h6"])
(const True)
let bodyTitle = TagOpen tagtype attr ~== TagOpen "h1" [("class","title")]
- let level = read (drop 1 tagtype)
+ level <- headerLevel tagtype
contents <- trimInlines . mconcat <$> manyTill inline (pCloses tagtype <|> eof)
let ident = fromMaybe "" $ lookup "id" attr
let classes = maybe [] words $ lookup "class" attr
@@ -244,7 +362,7 @@ pTable :: TagParser Blocks
pTable = try $ do
TagOpen _ _ <- pSatisfy (~== TagOpen "table" [])
skipMany pBlank
- caption <- option mempty $ pInTags "caption" inline >>~ skipMany pBlank
+ caption <- option mempty $ pInTags "caption" inline <* skipMany pBlank
-- TODO actually read these and take width information from them
widths' <- pColgroup <|> many pCol
head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th")
@@ -326,7 +444,9 @@ pCodeBlock = try $ do
inline :: TagParser Inlines
inline = choice
- [ pTagText
+ [ eNoteref
+ , eSwitch id inline
+ , pTagText
, pQ
, pEmph
, pStrong
@@ -338,6 +458,7 @@ inline = choice
, pImage
, pCode
, pSpan
+ , pMath False
, pRawHtmlInline
]
@@ -366,8 +487,8 @@ pSelfClosing f g = do
pQ :: TagParser Inlines
pQ = do
- quoteContext <- stateQuoteContext `fmap` getState
- let quoteType = case quoteContext of
+ context <- asks quoteContext
+ let quoteType = case context of
InDoubleQuote -> SingleQuote
_ -> DoubleQuote
let innerQuoteContext = if quoteType == SingleQuote
@@ -406,12 +527,24 @@ pLineBreak = do
return B.linebreak
pLink :: TagParser Inlines
-pLink = try $ do
+pLink = pRelLink <|> pAnchor
+
+pAnchor :: TagParser Inlines
+pAnchor = try $ do
+ tag <- pSatisfy (tagOpenLit "a" (isJust . lookup "id"))
+ return $ B.spanWith (fromAttrib "id" tag , [], []) mempty
+
+pRelLink :: TagParser Inlines
+pRelLink = try $ do
tag <- pSatisfy (tagOpenLit "a" (isJust . lookup "href"))
let url = fromAttrib "href" tag
let title = fromAttrib "title" tag
+ let uid = fromAttrib "id" tag
+ let spanC = case uid of
+ [] -> id
+ s -> B.spanWith (s, [], [])
lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a")
- return $ B.link (escapeURI url) title lab
+ return $ spanC $ B.link (escapeURI url) title lab
pImage :: TagParser Inlines
pImage = do
@@ -429,7 +562,7 @@ pCode = try $ do
pSpan :: TagParser Inlines
pSpan = try $ do
- getOption readerParseRaw >>= guard
+ guardEnabled Ext_native_spans
TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True)
contents <- pInTags "span" inline
return $ B.spanWith (mkAttr attr) contents
@@ -442,6 +575,22 @@ pRawHtmlInline = do
then return $ B.rawInline "html" $ renderTags' [result]
else return mempty
+mathMLToTeXMath :: String -> Either String String
+mathMLToTeXMath s = writeTeX <$> readMathML s
+
+pMath :: Bool -> TagParser Inlines
+pMath inCase = try $ do
+ open@(TagOpen _ attr) <- pSatisfy $ tagOpen (=="math") (const True)
+ unless (inCase) (guard (maybe False (== mathMLNamespace) (lookup "xmlns" attr)))
+ contents <- manyTill pAnyTag (pSatisfy (~== TagClose "math"))
+ let math = mathMLToTeXMath $
+ (renderTags $ [open] ++ contents ++ [TagClose "math"])
+ let constructor =
+ maybe B.math (\x -> if (x == "inline") then B.math else B.displayMath)
+ (lookup "display" attr)
+ return $ either (const mempty)
+ (\x -> if null x then mempty else constructor x) math
+
pInlinesInTags :: String -> (Inlines -> Inlines)
-> TagParser Inlines
pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline
@@ -479,7 +628,8 @@ pTagText :: TagParser Inlines
pTagText = try $ do
(TagText str) <- pSatisfy isTagText
st <- getState
- case runParser (many pTagContents) st "text" str of
+ qu <- ask
+ case flip runReader qu $ runParserT (many pTagContents) st "text" str of
Left _ -> fail $ "Could not parse `" ++ str ++ "'"
Right result -> return $ mconcat result
@@ -488,7 +638,9 @@ pBlank = try $ do
(TagText str) <- pSatisfy isTagText
guard $ all isSpace str
-pTagContents :: Parser [Char] ParserState Inlines
+type InlinesParser = HTMLParser String
+
+pTagContents :: InlinesParser Inlines
pTagContents =
B.displayMath <$> mathDisplay
<|> B.math <$> mathInline
@@ -498,12 +650,11 @@ pTagContents =
<|> pSymbol
<|> pBad
-pStr :: Parser [Char] ParserState Inlines
+pStr :: InlinesParser Inlines
pStr = do
result <- many1 $ satisfy $ \c ->
not (isSpace c) && not (isSpecial c) && not (isBad c)
- pos <- getPosition
- updateState $ \s -> s{ stateLastStrPos = Just pos }
+ updateLastStrPos
return $ B.str result
isSpecial :: Char -> Bool
@@ -518,13 +669,13 @@ isSpecial '\8220' = True
isSpecial '\8221' = True
isSpecial _ = False
-pSymbol :: Parser [Char] ParserState Inlines
+pSymbol :: InlinesParser Inlines
pSymbol = satisfy isSpecial >>= return . B.str . (:[])
isBad :: Char -> Bool
isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML
-pBad :: Parser [Char] ParserState Inlines
+pBad :: InlinesParser Inlines
pBad = do
c <- satisfy isBad
let c' = case c of
@@ -558,7 +709,7 @@ pBad = do
_ -> '?'
return $ B.str [c']
-pSpace :: Parser [Char] ParserState Inlines
+pSpace :: InlinesParser Inlines
pSpace = many1 (satisfy isSpace) >> return B.space
--
@@ -566,8 +717,10 @@ pSpace = many1 (satisfy isSpace) >> return B.space
--
eitherBlockOrInline :: [String]
-eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins",
- "map", "area", "object"]
+eitherBlockOrInline = ["audio", "applet", "button", "iframe",
+ "del", "ins",
+ "progress", "map", "area", "noscript", "script",
+ "object", "svg", "video", "source"]
{-
inlineHtmlTags :: [[Char]]
@@ -579,15 +732,17 @@ inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big",
-}
blockHtmlTags :: [String]
-blockHtmlTags = ["address", "article", "aside", "blockquote", "body", "button", "canvas",
+blockHtmlTags = ["?xml", "!DOCTYPE", "address", "article", "aside",
+ "blockquote", "body", "button", "canvas",
"caption", "center", "col", "colgroup", "dd", "dir", "div",
- "dl", "dt", "embed", "fieldset", "figcaption", "figure", "footer",
- "form", "h1", "h2", "h3", "h4",
- "h5", "h6", "head", "header", "hgroup", "hr", "html", "isindex", "map", "menu",
- "noframes", "noscript", "object", "ol", "output", "p", "pre", "progress",
- "section", "table", "tbody", "textarea", "thead", "tfoot", "ul", "dd",
+ "dl", "dt", "embed", "fieldset", "figcaption", "figure",
+ "footer", "form", "h1", "h2", "h3", "h4",
+ "h5", "h6", "head", "header", "hgroup", "hr", "html",
+ "isindex", "menu", "noframes", "ol", "output", "p", "pre",
+ "section", "table", "tbody", "textarea",
+ "thead", "tfoot", "ul", "dd",
"dt", "frameset", "li", "tbody", "td", "tfoot",
- "th", "thead", "tr", "script", "style", "svg", "video"]
+ "th", "thead", "tr", "script", "style"]
-- We want to allow raw docbook in markdown documents, so we
-- include docbook block tags here too.
@@ -605,8 +760,11 @@ blockDocBookTags = ["calloutlist", "bibliolist", "glosslist", "itemizedlist",
"classsynopsis", "blockquote", "epigraph", "msgset",
"sidebar", "title"]
+epubTags :: [String]
+epubTags = ["case", "switch", "default"]
+
blockTags :: [String]
-blockTags = blockHtmlTags ++ blockDocBookTags
+blockTags = blockHtmlTags ++ blockDocBookTags ++ epubTags
isInlineTag :: Tag String -> Bool
isInlineTag t = tagOpen isInlineTagName (const True) t ||
@@ -670,19 +828,23 @@ _ `closes` _ = False
--- parsers for use in markdown, textile readers
-- | Matches a stretch of HTML in balanced tags.
-htmlInBalanced :: (Tag String -> Bool) -> Parser [Char] ParserState String
+htmlInBalanced :: (Monad m)
+ => (Tag String -> Bool)
+ -> ParserT String st m String
htmlInBalanced f = try $ do
(TagOpen t _, tag) <- htmlTag f
guard $ '/' `notElem` tag -- not a self-closing tag
let stopper = htmlTag (~== TagClose t)
- let anytag = liftM snd $ htmlTag (const True)
+ let anytag = snd <$> htmlTag (const True)
contents <- many $ notFollowedBy' stopper >>
(htmlInBalanced f <|> anytag <|> count 1 anyChar)
endtag <- liftM snd stopper
return $ tag ++ concat contents ++ endtag
-- | Matches a tag meeting a certain condition.
-htmlTag :: (Tag String -> Bool) -> Parser [Char] st (Tag String, String)
+htmlTag :: Monad m
+ => (Tag String -> Bool)
+ -> ParserT [Char] st m (Tag String, String)
htmlTag f = try $ do
lookAhead $ char '<' >> (oneOf "/!?" <|> letter)
(next : _) <- getInput >>= return . canonicalizeTags . parseTags
@@ -701,7 +863,78 @@ htmlTag f = try $ do
mkAttr :: [(String, String)] -> Attr
mkAttr attr = (attribsId, attribsClasses, attribsKV)
where attribsId = fromMaybe "" $ lookup "id" attr
- attribsClasses = words $ fromMaybe "" $ lookup "class" attr
+ attribsClasses = (words $ fromMaybe "" $ lookup "class" attr) ++ epubTypes
attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr
+ epubTypes = words $ fromMaybe "" $ lookup "epub:type" attr
+
+-- Strip namespace prefixes
+stripPrefixes :: [Tag String] -> [Tag String]
+stripPrefixes = map stripPrefix
+
+stripPrefix :: Tag String -> Tag String
+stripPrefix (TagOpen s as) =
+ TagOpen (stripPrefix' s) (map (stripPrefix' *** id) as)
+stripPrefix (TagClose s) = TagClose (stripPrefix' s)
+stripPrefix x = x
+
+stripPrefix' :: String -> String
+stripPrefix' s =
+ case span (/= ':') s of
+ (_, "") -> s
+ (_, (_:ts)) -> ts
+
+isSpace :: Char -> Bool
+isSpace ' ' = True
+isSpace '\t' = True
+isSpace '\n' = True
+isSpace '\r' = True
+isSpace _ = False
+
+-- Instances
+-- This signature should be more general
+-- MonadReader HTMLLocal m => HasQuoteContext st m
+instance HasQuoteContext st (Reader HTMLLocal) where
+ getQuoteContext = asks quoteContext
+ withQuoteContext q = local (\s -> s{quoteContext = q})
+instance HasReaderOptions HTMLState where
+ extractReaderOptions = extractReaderOptions . parserState
+
+instance Default HTMLState where
+ def = HTMLState def []
+
+instance HasMeta HTMLState where
+ setMeta s b st = st {parserState = setMeta s b $ parserState st}
+ deleteMeta s st = st {parserState = deleteMeta s $ parserState st}
+
+instance Default HTMLLocal where
+ def = HTMLLocal NoQuote False
+
+instance HasLastStrPosition HTMLState where
+ setLastStrPos s st = st {parserState = setLastStrPos s (parserState st)}
+ getLastStrPos = getLastStrPos . parserState
+
+
+-- EPUB Specific
+--
+--
+sectioningContent :: [String]
+sectioningContent = ["article", "aside", "nav", "section"]
+
+{-
+groupingContent :: [String]
+groupingContent = ["p", "hr", "pre", "blockquote", "ol"
+ , "ul", "li", "dl", "dt", "dt", "dd"
+ , "figure", "figcaption", "div", "main"]
+
+
+
+types :: [(String, ([String], Int))]
+types = -- Document divisions
+ map (\s -> (s, (["section", "body"], 0)))
+ ["volume", "part", "chapter", "division"]
+ ++ -- Document section and components
+ [
+ ("abstract", ([], 0))]
+-}
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index a3dfb7c3c..4b46c869d 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -43,11 +43,8 @@ docHToBlocks d' =
(docHToInlines False $ headerTitle h)
DocAppend d1 d2 -> mappend (docHToBlocks d1) (docHToBlocks d2)
DocString _ -> inlineFallback
- DocParagraph (DocHeader h) -> docHToBlocks (DocHeader h)
DocParagraph (DocAName h) -> B.plain $ docHToInlines False $ DocAName h
- DocParagraph x -> let (ils, rest) = getInlines x
- in (B.para $ docHToInlines False ils)
- <> docHToBlocks rest
+ DocParagraph x -> B.para $ docHToInlines False x
DocIdentifier _ -> inlineFallback
DocIdentifierUnchecked _ -> inlineFallback
DocModule s -> B.plain $ docHToInlines False $ DocModule s
@@ -115,40 +112,6 @@ docHToInlines isCode d' =
DocProperty _ -> mempty
DocExamples _ -> mempty
-getInlines :: DocH String Identifier -> (DocH String Identifier, DocH String Identifier)
-getInlines (DocAppend x y) = if isInline x
- then let (a, b) = getInlines y
- in (DocAppend x a, b)
- else (DocEmpty, DocAppend x y)
-getInlines x = if isInline x
- then (x, DocEmpty)
- else (DocEmpty, x)
-
-isInline :: DocH String Identifier -> Bool
-isInline d' =
- case d' of
- DocEmpty -> True
- DocAppend d1 _ -> isInline d1
- DocString _ -> True
- DocParagraph _ -> False
- DocIdentifier _ -> True
- DocIdentifierUnchecked _ -> True
- DocModule _ -> True
- DocWarning _ -> True
- DocEmphasis _ -> True
- DocMonospaced _ -> True
- DocBold _ -> True
- DocHeader _ -> False
- DocUnorderedList _ -> False
- DocOrderedList _ -> False
- DocDefList _ -> False
- DocCodeBlock _ -> False
- DocHyperlink _ -> True
- DocPic _ -> True
- DocAName _ -> True
- DocProperty _ -> False
- DocExamples _ -> False
-
-- | Create an 'Example', stripping superfluous characters as appropriate
makeExample :: String -> String -> [String] -> Blocks
makeExample prompt expression result =
@@ -173,4 +136,3 @@ makeExample prompt expression result =
substituteBlankLine "<BLANKLINE>" = ""
substituteBlankLine line = line
coder = B.codeWith ([], ["result"], [])
-
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 97bfaa455..9f51e9a8f 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -41,7 +41,6 @@ import Text.Pandoc.Shared
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding ((<|>), many, optional, space,
mathDisplay, mathInline)
-import Text.Parsec.Prim (ParsecT, runParserT)
import qualified Text.Pandoc.UTF8 as UTF8
import Data.Char ( chr, ord )
import Control.Monad.Trans (lift)
@@ -104,7 +103,7 @@ dimenarg = try $ do
sp :: LP ()
sp = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t')
- <|> (try $ newline >>~ lookAhead anyChar >>~ notFollowedBy blankline)
+ <|> (try $ newline <* lookAhead anyChar <* notFollowedBy blankline)
isLowerHex :: Char -> Bool
isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
@@ -808,7 +807,7 @@ rawEnv name = do
----
-type IncludeParser = ParsecT [Char] [String] IO String
+type IncludeParser = ParserT [Char] [String] IO String
-- | Replace "include" commands with file contents.
handleIncludes :: String -> IO String
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 6c710c8ff..861f81b23 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -353,8 +353,7 @@ referenceKey = try $ do
notFollowedBy' referenceTitle
notFollowedBy' (() <$ reference)
many1 $ notFollowedBy space >> litChar
- let betweenAngles = try $ char '<' >>
- manyTill (escapedChar' <|> litChar) (char '>')
+ let betweenAngles = try $ char '<' >> manyTill litChar (char '>')
src <- try betweenAngles <|> sourceURL
tit <- option "" referenceTitle
-- currently we just ignore MMD-style link/image attributes
@@ -571,7 +570,7 @@ attributes :: MarkdownParser Attr
attributes = try $ do
char '{'
spnl
- attrs <- many (attribute >>~ spnl)
+ attrs <- many (attribute <* spnl)
char '}'
return $ foldl (\x f -> f x) nullAttr attrs
@@ -688,7 +687,7 @@ birdTrackLine c = try $ do
--
emailBlockQuoteStart :: MarkdownParser Char
-emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' >>~ optional (char ' ')
+emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' <* optional (char ' ')
emailBlockQuote :: MarkdownParser [String]
emailBlockQuote = try $ do
@@ -752,7 +751,7 @@ listLine = try $ do
notFollowedBy' (do indentSpaces
many spaceChar
listStart)
- notFollowedBy' $ htmlTag (~== TagClose "div")
+ notFollowedByHtmlCloser
optional (() <$ indentSpaces)
chunks <- manyTill
( many1 (satisfy $ \c -> c /= '\n' && c /= '<')
@@ -781,11 +780,18 @@ listContinuation = try $ do
blanks <- many blankline
return $ concat result ++ blanks
+notFollowedByHtmlCloser :: MarkdownParser ()
+notFollowedByHtmlCloser = do
+ inHtmlBlock <- stateInHtmlBlock <$> getState
+ case inHtmlBlock of
+ Just t -> notFollowedBy' $ htmlTag (~== TagClose t)
+ Nothing -> return ()
+
listContinuationLine :: MarkdownParser String
listContinuationLine = try $ do
notFollowedBy blankline
notFollowedBy' listStart
- notFollowedBy' $ htmlTag (~== TagClose "div")
+ notFollowedByHtmlCloser
optional indentSpaces
result <- anyLine
return $ result ++ "\n"
@@ -840,38 +846,53 @@ defListMarker = do
else mzero
return ()
-definitionListItem :: MarkdownParser (F (Inlines, [Blocks]))
-definitionListItem = try $ do
- -- first, see if this has any chance of being a definition list:
- lookAhead (anyLine >> optional blankline >> defListMarker)
- term <- trimInlinesF . mconcat <$> manyTill inline newline
- optional blankline
- raw <- many1 defRawBlock
- state <- getState
- let oldContext = stateParserContext state
- -- parse the extracted block, which may contain various block elements:
+definitionListItem :: Bool -> MarkdownParser (F (Inlines, [Blocks]))
+definitionListItem compact = try $ do
+ rawLine' <- anyLine
+ raw <- many1 $ defRawBlock compact
+ term <- parseFromString (trimInlinesF . mconcat <$> many inline) rawLine'
contents <- mapM (parseFromString parseBlocks) raw
- updateState (\st -> st {stateParserContext = oldContext})
+ optional blanklines
return $ liftM2 (,) term (sequence contents)
-defRawBlock :: MarkdownParser String
-defRawBlock = try $ do
+defRawBlock :: Bool -> MarkdownParser String
+defRawBlock compact = try $ do
+ hasBlank <- option False $ blankline >> return True
defListMarker
firstline <- anyLine
- rawlines <- many (notFollowedBy blankline >> indentSpaces >> anyLine)
- trailing <- option "" blanklines
- cont <- liftM concat $ many $ do
- lns <- many1 $ notFollowedBy blankline >> indentSpaces >> anyLine
- trl <- option "" blanklines
- return $ unlines lns ++ trl
- return $ firstline ++ "\n" ++ unlines rawlines ++ trailing ++ cont
+ let dline = try
+ ( do notFollowedBy blankline
+ if compact -- laziness not compatible with compact
+ then () <$ indentSpaces
+ else (() <$ indentSpaces)
+ <|> notFollowedBy defListMarker
+ anyLine )
+ rawlines <- many dline
+ cont <- liftM concat $ many $ try $ do
+ trailing <- option "" blanklines
+ ln <- indentSpaces >> notFollowedBy blankline >> anyLine
+ lns <- many dline
+ return $ trailing ++ unlines (ln:lns)
+ return $ trimr (firstline ++ "\n" ++ unlines rawlines ++ cont) ++
+ if hasBlank || not (null cont) then "\n\n" else ""
definitionList :: MarkdownParser (F Blocks)
-definitionList = do
- guardEnabled Ext_definition_lists
- items <- fmap sequence $ many1 definitionListItem
+definitionList = try $ do
+ lookAhead (anyLine >> optional blankline >> defListMarker)
+ compactDefinitionList <|> normalDefinitionList
+
+compactDefinitionList :: MarkdownParser (F Blocks)
+compactDefinitionList = do
+ guardEnabled Ext_compact_definition_lists
+ items <- fmap sequence $ many1 $ definitionListItem True
return $ B.definitionList <$> fmap compactify'DL items
+normalDefinitionList :: MarkdownParser (F Blocks)
+normalDefinitionList = do
+ guardEnabled Ext_definition_lists
+ items <- fmap sequence $ many1 $ definitionListItem False
+ return $ B.definitionList <$> items
+
--
-- paragraph block
--
@@ -914,16 +935,34 @@ htmlElement = rawVerbatimBlock
htmlBlock :: MarkdownParser (F Blocks)
htmlBlock = do
guardEnabled Ext_raw_html
- res <- (guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks)
- <|> htmlBlock'
- return $ return $ B.rawBlock "html" res
-
-htmlBlock' :: MarkdownParser String
+ try (do
+ (TagOpen t attrs) <- lookAhead $ fst <$> htmlTag isBlockTag
+ (guard (t `elem` ["pre","style","script"]) >>
+ (return . B.rawBlock "html") <$> rawVerbatimBlock)
+ <|> (do guardEnabled Ext_markdown_attribute
+ oldMarkdownAttribute <- stateMarkdownAttribute <$> getState
+ markdownAttribute <-
+ case lookup "markdown" attrs of
+ Just "0" -> False <$ updateState (\st -> st{
+ stateMarkdownAttribute = False })
+ Just _ -> True <$ updateState (\st -> st{
+ stateMarkdownAttribute = True })
+ Nothing -> return oldMarkdownAttribute
+ res <- if markdownAttribute
+ then rawHtmlBlocks
+ else htmlBlock'
+ updateState $ \st -> st{ stateMarkdownAttribute =
+ oldMarkdownAttribute }
+ return res)
+ <|> (guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks))
+ <|> htmlBlock'
+
+htmlBlock' :: MarkdownParser (F Blocks)
htmlBlock' = try $ do
first <- htmlElement
- finalSpace <- many spaceChar
- finalNewlines <- many newline
- return $ first ++ finalSpace ++ finalNewlines
+ skipMany spaceChar
+ optional blanklines
+ return $ return $ B.rawBlock "html" first
strictHtmlBlock :: MarkdownParser String
strictHtmlBlock = htmlInBalanced (not . isInlineTag)
@@ -934,48 +973,36 @@ rawVerbatimBlock = try $ do
["pre", "style", "script"])
(const True))
contents <- manyTill anyChar (htmlTag (~== TagClose tag))
- return $ open ++ contents ++ renderTags [TagClose tag]
+ return $ open ++ contents ++ renderTags' [TagClose tag]
rawTeXBlock :: MarkdownParser (F Blocks)
rawTeXBlock = do
guardEnabled Ext_raw_tex
- result <- (B.rawBlock "latex" <$> rawLaTeXBlock)
- <|> (B.rawBlock "context" <$> rawConTeXtEnvironment)
+ result <- (B.rawBlock "latex" . concat <$>
+ rawLaTeXBlock `sepEndBy1` blankline)
+ <|> (B.rawBlock "context" . concat <$>
+ rawConTeXtEnvironment `sepEndBy1` blankline)
spaces
return $ return result
-rawHtmlBlocks :: MarkdownParser String
+rawHtmlBlocks :: MarkdownParser (F Blocks)
rawHtmlBlocks = do
- htmlBlocks <- many1 $ try $ do
- s <- rawVerbatimBlock <|> try (
- do (t,raw) <- htmlTag isBlockTag
- guard $ t ~/= TagOpen "div" [] &&
- t ~/= TagClose "div"
- exts <- getOption readerExtensions
- -- if open tag, need markdown="1" if
- -- markdown_attributes extension is set
- case t of
- TagOpen _ as
- | Ext_markdown_attribute `Set.member`
- exts ->
- if "markdown" `notElem`
- map fst as
- then mzero
- else return $
- stripMarkdownAttribute raw
- | otherwise -> return raw
- _ -> return raw )
- sps <- do sp1 <- many spaceChar
- sp2 <- option "" (blankline >> return "\n")
- sp3 <- many spaceChar
- sp4 <- option "" blanklines
- return $ sp1 ++ sp2 ++ sp3 ++ sp4
- -- note: we want raw html to be able to
- -- precede a code block, when separated
- -- by a blank line
- return $ s ++ sps
- let combined = concat htmlBlocks
- return $ if last combined == '\n' then init combined else combined
+ (TagOpen tagtype _, raw) <- htmlTag isBlockTag
+ -- try to find closing tag
+ -- we set stateInHtmlBlock so that closing tags that can be either block or
+ -- inline will not be parsed as inline tags
+ oldInHtmlBlock <- stateInHtmlBlock <$> getState
+ updateState $ \st -> st{ stateInHtmlBlock = Just tagtype }
+ let closer = htmlTag (\x -> x ~== TagClose tagtype)
+ contents <- mconcat <$> many (notFollowedBy' closer >> block)
+ result <-
+ (closer >>= \(_, rawcloser) -> return (
+ return (B.rawBlock "html" $ stripMarkdownAttribute raw) <>
+ contents <>
+ return (B.rawBlock "html" rawcloser)))
+ <|> return (return (B.rawBlock "html" raw) <> contents)
+ updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
+ return result
-- remove markdown="1" attribute
stripMarkdownAttribute :: String -> String
@@ -1163,7 +1190,7 @@ gridPart ch = do
return (length dashes, length dashes + 1)
gridDashedLines :: Char -> Parser [Char] st [(Int,Int)]
-gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline
+gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
removeFinalBar :: String -> String
removeFinalBar =
@@ -1388,8 +1415,7 @@ escapedChar = do
ltSign :: MarkdownParser (F Inlines)
ltSign = do
guardDisabled Ext_raw_html
- <|> guardDisabled Ext_markdown_in_html_blocks
- <|> (notFollowedBy' (htmlTag isBlockTag) >> return ())
+ <|> (notFollowedByHtmlCloser >> notFollowedBy' (htmlTag isBlockTag))
char '<'
return $ return $ B.str "<"
@@ -1434,52 +1460,60 @@ math = (return . B.displayMath <$> (mathDisplay >>= applyMacros'))
enclosure :: Char
-> MarkdownParser (F Inlines)
enclosure c = do
+ -- we can't start an enclosure with _ if after a string and
+ -- the intraword_underscores extension is enabled:
+ guardDisabled Ext_intraword_underscores
+ <|> guard (c == '*')
+ <|> (guard =<< notAfterString)
cs <- many1 (char c)
(return (B.str cs) <>) <$> whitespace
- <|> case length cs of
+ <|> do
+ case length cs of
3 -> three c
2 -> two c mempty
1 -> one c mempty
_ -> return (return $ B.str cs)
+ender :: Char -> Int -> MarkdownParser ()
+ender c n = try $ do
+ count n (char c)
+ guard (c == '*')
+ <|> guardDisabled Ext_intraword_underscores
+ <|> notFollowedBy alphaNum
+
-- Parse inlines til you hit one c or a sequence of two cs.
-- If one c, emit emph and then parse two.
-- If two cs, emit strong and then parse one.
-- Otherwise, emit ccc then the results.
three :: Char -> MarkdownParser (F Inlines)
three c = do
- contents <- mconcat <$> many (notFollowedBy (char c) >> inline)
- (try (string [c,c,c]) >> return ((B.strong . B.emph) <$> contents))
- <|> (try (string [c,c]) >> one c (B.strong <$> contents))
- <|> (char c >> two c (B.emph <$> contents))
+ contents <- mconcat <$> many (notFollowedBy (ender c 1) >> inline)
+ (ender c 3 >> return ((B.strong . B.emph) <$> contents))
+ <|> (ender c 2 >> one c (B.strong <$> contents))
+ <|> (ender c 1 >> two c (B.emph <$> contents))
<|> return (return (B.str [c,c,c]) <> contents)
-- Parse inlines til you hit two c's, and emit strong.
-- If you never do hit two cs, emit ** plus inlines parsed.
two :: Char -> F Inlines -> MarkdownParser (F Inlines)
two c prefix' = do
- let ender = try $ string [c,c]
- contents <- mconcat <$> many (try $ notFollowedBy ender >> inline)
- (ender >> return (B.strong <$> (prefix' <> contents)))
+ contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline)
+ (ender c 2 >> return (B.strong <$> (prefix' <> contents)))
<|> return (return (B.str [c,c]) <> (prefix' <> contents))
-- Parse inlines til you hit a c, and emit emph.
-- If you never hit a c, emit * plus inlines parsed.
one :: Char -> F Inlines -> MarkdownParser (F Inlines)
one c prefix' = do
- contents <- mconcat <$> many ( (notFollowedBy (char c) >> inline)
+ contents <- mconcat <$> many ( (notFollowedBy (ender c 1) >> inline)
<|> try (string [c,c] >>
- notFollowedBy (char c) >>
+ notFollowedBy (ender c 1) >>
two c mempty) )
- (char c >> return (B.emph <$> (prefix' <> contents)))
+ (ender c 1 >> return (B.emph <$> (prefix' <> contents)))
<|> return (return (B.str [c]) <> (prefix' <> contents))
strongOrEmph :: MarkdownParser (F Inlines)
-strongOrEmph = enclosure '*' <|> (checkIntraword >> enclosure '_')
- where checkIntraword = do
- exts <- getOption readerExtensions
- when (Ext_intraword_underscores `Set.member` exts) $ do
- guard =<< notAfterString
+strongOrEmph = enclosure '*' <|> enclosure '_'
-- | Parses a list of inlines between start and end delimiters.
inlinesBetween :: (Show b)
@@ -1489,7 +1523,7 @@ inlinesBetween :: (Show b)
inlinesBetween start end =
(trimInlinesF . mconcat) <$> try (start >> many1Till inner end)
where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
- innerSpace = try $ whitespace >>~ notFollowedBy' end
+ innerSpace = try $ whitespace <* notFollowedBy' end
strikeout :: MarkdownParser (F Inlines)
strikeout = fmap B.strikeout <$>
@@ -1730,7 +1764,7 @@ inBrackets parser = do
spanHtml :: MarkdownParser (F Inlines)
spanHtml = try $ do
- guardEnabled Ext_markdown_in_html_blocks
+ guardEnabled Ext_native_spans
(TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" [])
contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span"))
let ident = fromMaybe "" $ lookup "id" attrs
@@ -1745,14 +1779,19 @@ spanHtml = try $ do
divHtml :: MarkdownParser (F Blocks)
divHtml = try $ do
- guardEnabled Ext_markdown_in_html_blocks
+ guardEnabled Ext_native_divs
(TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" [])
+ -- we set stateInHtmlBlock so that closing tags that can be either block or
+ -- inline will not be parsed as inline tags
+ oldInHtmlBlock <- stateInHtmlBlock <$> getState
+ updateState $ \st -> st{ stateInHtmlBlock = Just "div" }
bls <- option "" (blankline >> option "" blanklines)
contents <- mconcat <$>
many (notFollowedBy' (htmlTag (~== TagClose "div")) >> block)
closed <- option False (True <$ htmlTag (~== TagClose "div"))
if closed
then do
+ updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
let ident = fromMaybe "" $ lookup "id" attrs
let classes = maybe [] words $ lookup "class" attrs
let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
@@ -1763,10 +1802,17 @@ divHtml = try $ do
rawHtmlInline :: MarkdownParser (F Inlines)
rawHtmlInline = do
guardEnabled Ext_raw_html
+ inHtmlBlock <- stateInHtmlBlock <$> getState
+ let isCloseBlockTag t = case inHtmlBlock of
+ Just t' -> t ~== TagClose t'
+ Nothing -> False
mdInHtml <- option False $
- guardEnabled Ext_markdown_in_html_blocks >> return True
+ ( guardEnabled Ext_markdown_in_html_blocks
+ <|> guardEnabled Ext_markdown_attribute
+ ) >> return True
(_,result) <- htmlTag $ if mdInHtml
- then isInlineTag
+ then (\x -> isInlineTag x &&
+ not (isCloseBlockTag x))
else not . isTextTag
return $ return $ B.rawInline "html" result
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index f1dcce8f7..e43b8a86c 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -569,7 +569,8 @@ endline = () <$ try (newline <*
imageIdentifiers :: [MWParser ()]
imageIdentifiers = [sym (identifier ++ ":") | identifier <- identifiers]
- where identifiers = ["File", "Image", "Archivo", "Datei", "Fichier"]
+ where identifiers = ["File", "Image", "Archivo", "Datei", "Fichier",
+ "Bild"]
image :: MWParser Inlines
image = try $ do
@@ -634,7 +635,7 @@ inlinesBetween :: (Show b) => MWParser a -> MWParser b -> MWParser Inlines
inlinesBetween start end =
(trimInlines . mconcat) <$> try (start >> many1Till inner end)
where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
- innerSpace = try $ whitespace >>~ notFollowedBy' end
+ innerSpace = try $ whitespace <* notFollowedBy' end
emph :: MWParser Inlines
emph = B.emph <$> nested (inlinesBetween start end)
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 7a35e2ca0..e1c29d1e8 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -41,10 +41,10 @@ import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
)
import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
import Text.Pandoc.Shared (compactify', compactify'DL)
-import Text.TeXMath (texMathToPandoc, DisplayType(..))
+import Text.TeXMath (readTeX, writePandoc, DisplayType(..))
import Control.Applicative ( Applicative, pure
- , (<$>), (<$), (<*>), (<*), (*>), (<**>) )
+ , (<$>), (<$), (<*>), (<*), (*>) )
import Control.Arrow (first)
import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when)
import Control.Monad.Reader (Reader, runReader, ask, asks)
@@ -274,7 +274,7 @@ optionalAttributes parser = try $
parseBlockAttributes :: OrgParser ()
parseBlockAttributes = do
attrs <- many attribute
- () <$ mapM (uncurry parseAndAddAttribute) attrs
+ mapM_ (uncurry parseAndAddAttribute) attrs
where
attribute :: OrgParser (String, String)
attribute = try $ do
@@ -341,14 +341,36 @@ verseBlock blkProp = try $ do
fmap B.para . mconcat . intersperse (pure B.linebreak)
<$> mapM (parseFromString parseInlines) (lines content)
+exportsCode :: [(String, String)] -> Bool
+exportsCode attrs = not (("rundoc-exports", "none") `elem` attrs
+ || ("rundoc-exports", "results") `elem` attrs)
+
+exportsResults :: [(String, String)] -> Bool
+exportsResults attrs = ("rundoc-exports", "results") `elem` attrs
+ || ("rundoc-exports", "both") `elem` attrs
+
+followingResultsBlock :: OrgParser (Maybe String)
+followingResultsBlock =
+ optionMaybe (try $ blanklines *> stringAnyCase "#+RESULTS:"
+ *> blankline
+ *> (unlines <$> many1 exampleLine))
+
codeBlock :: BlockProperties -> OrgParser (F Blocks)
codeBlock blkProp = do
skipSpaces
- (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders)
- id' <- fromMaybe "" <$> lookupBlockAttribute "name"
- content <- rawBlockContent blkProp
- let codeBlck = B.codeBlockWith ( id', classes, kv ) content
- maybe (pure codeBlck) (labelDiv codeBlck) <$> lookupInlinesAttr "caption"
+ (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders)
+ id' <- fromMaybe "" <$> lookupBlockAttribute "name"
+ content <- rawBlockContent blkProp
+ resultsContent <- followingResultsBlock
+ let includeCode = exportsCode kv
+ let includeResults = exportsResults kv
+ let codeBlck = B.codeBlockWith ( id', classes, kv ) content
+ labelledBlck <- maybe (pure codeBlck)
+ (labelDiv codeBlck)
+ <$> lookupInlinesAttr "caption"
+ let resultBlck = pure $ maybe mempty (exampleCode) resultsContent
+ return $ (if includeCode then labelledBlck else mempty)
+ <> (if includeResults then resultBlck else mempty)
where
labelDiv blk value =
B.divWith nullAttr <$> (mappend <$> labelledBlock value
@@ -780,8 +802,12 @@ noteBlock = try $ do
-- Paragraphs or Plain text
paraOrPlain :: OrgParser (F Blocks)
-paraOrPlain = try $
- parseInlines <**> (fmap <$> option B.plain (try $ newline *> pure B.para))
+paraOrPlain = try $ do
+ ils <- parseInlines
+ nl <- option False (newline >> return True)
+ try (guard nl >> notFollowedBy (orderedListStart <|> bulletListStart) >>
+ return (B.para <$> ils))
+ <|> (return (B.plain <$> ils))
inlinesTillNewline :: OrgParser (F Inlines)
inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline
@@ -1357,7 +1383,7 @@ inlineLaTeX = try $ do
maybe mzero returnF $ parseAsMath cmd `mplus` parseAsInlineLaTeX cmd
where
parseAsMath :: String -> Maybe Inlines
- parseAsMath cs = maybeRight $ B.fromList <$> texMathToPandoc DisplayInline cs
+ parseAsMath cs = B.fromList <$> texMathToPandoc cs
parseAsInlineLaTeX :: String -> Maybe Inlines
parseAsInlineLaTeX cs = maybeRight $ runParser inlineCommand state "" cs
@@ -1365,6 +1391,9 @@ inlineLaTeX = try $ do
state :: ParserState
state = def{ stateOptions = def{ readerParseRaw = True }}
+ texMathToPandoc inp = (maybeRight $ readTeX inp) >>=
+ writePandoc DisplayInline
+
maybeRight :: Either a b -> Maybe b
maybeRight = either (const Nothing) Just
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index fa8438e70..e5eccb116 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -47,7 +47,7 @@ import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
import qualified Text.Pandoc.Builder as B
import Data.Monoid (mconcat, mempty)
import Data.Sequence (viewr, ViewR(..))
-import Data.Char (toLower)
+import Data.Char (toLower, isHexDigit)
-- | Parse reStructuredText string and return Pandoc document.
readRST :: ReaderOptions -- ^ Reader options
@@ -460,7 +460,7 @@ listItem :: RSTParser Int
listItem start = try $ do
(markerLength, first) <- rawListItem start
rest <- many (listContinuation markerLength)
- blanks <- choice [ try (many blankline >>~ lookAhead start),
+ blanks <- choice [ try (many blankline <* lookAhead start),
many1 blankline ] -- whole list must end with blank.
-- parsing with ListItemState forces markers at beginning of lines to
-- count as list item markers, even if not separated by blank space.
@@ -480,7 +480,7 @@ listItem start = try $ do
orderedList :: RSTParser Blocks
orderedList = try $ do
- (start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar)
+ (start, style, delim) <- lookAhead (anyOrderedListMarker <* spaceChar)
items <- many1 (listItem (orderedListStart style delim))
let items' = compactify' items
return $ B.orderedListWith (start, style, delim) items'
@@ -656,9 +656,6 @@ extractUnicodeChar s = maybe Nothing (\c -> Just (c,rest)) mbc
where (ds,rest) = span isHexDigit s
mbc = safeRead ('\'':'\\':'x':ds ++ "'")
-isHexDigit :: Char -> Bool
-isHexDigit c = c `elem` "0123456789ABCDEFabcdef"
-
extractCaption :: RSTParser (Inlines, Blocks)
extractCaption = do
capt <- trimInlines . mconcat <$> many inline
@@ -747,7 +744,7 @@ simpleReferenceName = do
referenceName :: RSTParser Inlines
referenceName = quotedReferenceName <|>
- (try $ simpleReferenceName >>~ lookAhead (char ':')) <|>
+ (try $ simpleReferenceName <* lookAhead (char ':')) <|>
unquotedReferenceName
referenceKey :: RSTParser [Char]
@@ -1076,7 +1073,7 @@ explicitLink = try $ do
referenceLink :: RSTParser Inlines
referenceLink = try $ do
- (label',ref) <- withRaw (quotedReferenceName <|> simpleReferenceName) >>~
+ (label',ref) <- withRaw (quotedReferenceName <|> simpleReferenceName) <*
char '_'
state <- getState
let keyTable = stateKeys state
diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs
index f03eae044..3fee3051e 100644
--- a/src/Text/Pandoc/Readers/TeXMath.hs
+++ b/src/Text/Pandoc/Readers/TeXMath.hs
@@ -27,7 +27,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of TeX math to a list of 'Pandoc' inline elements.
-}
-module Text.Pandoc.Readers.TeXMath ( readTeXMath, readTeXMath' ) where
+module Text.Pandoc.Readers.TeXMath ( texMathToInlines ) where
import Text.Pandoc.Definition
import Text.TeXMath
@@ -35,22 +35,14 @@ import Text.TeXMath
-- | Converts a raw TeX math formula to a list of 'Pandoc' inlines.
-- Defaults to raw formula between @$@ or @$$@ characters if entire formula
-- can't be converted.
-readTeXMath' :: MathType
+texMathToInlines :: MathType
-> String -- ^ String to parse (assumes @'\n'@ line endings)
-> [Inline]
-readTeXMath' mt inp = case texMathToPandoc dt inp of
- Left _ -> [Str (delim ++ inp ++ delim)]
- Right res -> res
+texMathToInlines mt inp =
+ case writePandoc dt `fmap` readTeX inp of
+ Right (Just ils) -> ils
+ _ -> [Str (delim ++ inp ++ delim)]
where (dt, delim) = case mt of
DisplayMath -> (DisplayBlock, "$$")
InlineMath -> (DisplayInline, "$")
-{-# DEPRECATED readTeXMath "Use readTeXMath' from Text.Pandoc.JSON instead" #-}
--- | Converts a raw TeX math formula to a list of 'Pandoc' inlines.
--- Defaults to raw formula between @$@ characters if entire formula
--- can't be converted. (This is provided for backwards compatibility;
--- it is better to use @readTeXMath'@, which properly distinguishes
--- between display and inline math.)
-readTeXMath :: String -- ^ String to parse (assumes @'\n'@ line endings)
- -> [Inline]
-readTeXMath = readTeXMath' InlineMath
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 6d839ec1d..cd34da942 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -265,8 +265,20 @@ definitionList :: Parser [Char] ParserState Blocks
definitionList = try $ B.definitionList <$> many1 definitionListItem
-- | List start character.
-listStart :: Parser [Char] st Char
-listStart = oneOf "*#-"
+listStart :: Parser [Char] ParserState ()
+listStart = genericListStart '*'
+ <|> () <$ genericListStart '#'
+ <|> () <$ definitionListStart
+
+genericListStart :: Char -> Parser [Char] st ()
+genericListStart c = () <$ try (many1 (char c) >> whitespace)
+
+definitionListStart :: Parser [Char] ParserState Inlines
+definitionListStart = try $ do
+ char '-'
+ whitespace
+ trimInlines . mconcat <$>
+ many1Till inline (try (string ":=")) <* optional whitespace
listInline :: Parser [Char] ParserState Inlines
listInline = try (notFollowedBy newline >> inline)
@@ -278,8 +290,7 @@ listInline = try (notFollowedBy newline >> inline)
-- break.
definitionListItem :: Parser [Char] ParserState (Inlines, [Blocks])
definitionListItem = try $ do
- string "- "
- term <- mconcat <$> many1Till inline (try (whitespace >> string ":="))
+ term <- definitionListStart
def' <- multilineDef <|> inlineDef
return (term, def')
where inlineDef :: Parser [Char] ParserState [Blocks]
@@ -488,7 +499,7 @@ str = do
return $ B.str fullStr
-- | Some number of space chars
-whitespace :: Parser [Char] ParserState Inlines
+whitespace :: Parser [Char] st Inlines
whitespace = many1 spaceChar >> return B.space <?> "whitespace"
-- | In Textile, an isolated endline character is a line break
@@ -616,7 +627,8 @@ simpleInline border construct = try $ do
attr <- attributes
body <- trimInlines . mconcat <$>
withQuoteContext InSingleQuote
- (manyTill inline (try border <* notFollowedBy alphaNum))
+ (manyTill (notFollowedBy newline >> inline)
+ (try border <* notFollowedBy alphaNum))
return $ construct $
if attr == nullAttr
then body
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
new file mode 100644
index 000000000..3a51b9d84
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -0,0 +1,548 @@
+{-# LANGUAGE ViewPatterns #-}
+{-
+Copyright (C) 2014 Matthew Pickering <matthewtpickering@gmail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Txt2Tags
+ Copyright : Copyright (C) 2014 Matthew Pickering
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Matthew Pickering <matthewtpickering@gmail.com>
+
+Conversion of txt2tags formatted plain text to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.Txt2Tags ( readTxt2Tags
+ , getT2TMeta
+ , T2TMeta (..)
+ , readTxt2TagsNoMacros)
+ where
+
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Builder ( Inlines, Blocks, (<>)
+ , trimInlines )
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Shared (escapeURI,compactify', compactify'DL)
+import Text.Pandoc.Parsing hiding (space, spaces, uri, macro)
+import Control.Applicative ((<$>), (<$), (<*>), (<*), (*>))
+import Data.Char (toLower)
+import Data.List (transpose, intersperse, intercalate)
+import Data.Maybe (fromMaybe)
+import Data.Monoid (Monoid, mconcat, mempty, mappend)
+--import Network.URI (isURI) -- Not sure whether to use this function
+import Control.Monad (void, guard, when)
+import Data.Default
+import Control.Monad.Reader (Reader, runReader, asks)
+
+import Data.Time.LocalTime (getZonedTime)
+import Text.Pandoc.Compat.Directory(getModificationTime)
+import Data.Time.Format (formatTime)
+import System.Locale (defaultTimeLocale)
+import System.IO.Error (catchIOError)
+
+type T2T = ParserT String ParserState (Reader T2TMeta)
+
+-- | An object for the T2T macros meta information
+-- the contents of each field is simply substituted verbatim into the file
+data T2TMeta = T2TMeta {
+ date :: String -- ^ Current date
+ , mtime :: String -- ^ Last modification time of infile
+ , infile :: FilePath -- ^ Input file
+ , outfile :: FilePath -- ^ Output file
+ } deriving Show
+
+instance Default T2TMeta where
+ def = T2TMeta "" "" "" ""
+
+-- | Get the meta information required by Txt2Tags macros
+getT2TMeta :: [FilePath] -> FilePath -> IO T2TMeta
+getT2TMeta inps out = do
+ curDate <- formatTime defaultTimeLocale "%F" <$> getZonedTime
+ let getModTime = fmap (formatTime defaultTimeLocale "%F") .
+ getModificationTime
+ curMtime <- catchIOError
+ (maximum <$> mapM getModTime inps)
+ (const (return ""))
+ return $ T2TMeta curDate curMtime (intercalate ", " inps) out
+
+-- | Read Txt2Tags from an input string returning a Pandoc document
+readTxt2Tags :: T2TMeta -> ReaderOptions -> String -> Pandoc
+readTxt2Tags t opts s = flip runReader t $ readWithM parseT2T (def {stateOptions = opts}) (s ++ "\n\n")
+
+-- | Read Txt2Tags (ignoring all macros) from an input string returning
+-- a Pandoc document
+readTxt2TagsNoMacros :: ReaderOptions -> String -> Pandoc
+readTxt2TagsNoMacros = readTxt2Tags def
+
+parseT2T :: T2T Pandoc
+parseT2T = do
+ _ <- (Nothing <$ try blankline) <|> (Just <$> (count 3 anyLine))
+ config <- manyTill setting (notFollowedBy setting)
+ -- TODO: Handle settings better
+ let settings = foldr (\(k,v) -> B.setMeta k (MetaString v)) nullMeta config
+ updateState (\s -> s {stateMeta = settings})
+ body <- mconcat <$> manyTill block eof
+ return $ Pandoc mempty (B.toList body)
+
+type Keyword = String
+type Value = String
+
+setting :: T2T (Keyword, Value)
+setting = do
+ string "%!"
+ keyword <- ignoreSpacesCap (many1 alphaNum)
+ char ':'
+ value <- ignoreSpacesCap (manyTill anyChar (newline))
+ return (keyword, value)
+
+-- Blocks
+
+parseBlocks :: T2T Blocks
+parseBlocks = mconcat <$> manyTill block eof
+
+block :: T2T Blocks
+block = do
+ choice
+ [ mempty <$ blanklines
+ , quote
+ , hrule -- hrule must go above title
+ , title
+ , commentBlock
+ , verbatim
+ , rawBlock
+ , taggedBlock
+ , list
+ , table
+ , para
+ ]
+
+title :: T2T Blocks
+title = try $ balancedTitle '+' <|> balancedTitle '='
+
+balancedTitle :: Char -> T2T Blocks
+balancedTitle c = try $ do
+ spaces
+ level <- length <$> many1 (char c)
+ guard (level <= 5) -- Max header level 5
+ heading <- manyTill (noneOf "\n\r") (count level (char c))
+ label <- optionMaybe (enclosed (char '[') (char ']') (alphaNum <|> oneOf "_-"))
+ many spaceChar *> newline
+ let attr = maybe nullAttr (\x -> (x, [], [])) label
+ return $ B.headerWith attr level (trimInlines $ B.text heading)
+
+para :: T2T Blocks
+para = try $ do
+ ils <- parseInlines
+ nl <- option False (True <$ newline)
+ option (B.plain ils) (guard nl >> notFollowedBy listStart >> return (B.para ils))
+ where
+ listStart = try bulletListStart <|> orderedListStart
+
+commentBlock :: T2T Blocks
+commentBlock = try (blockMarkupArea (anyLine) (const mempty) "%%%") <|> comment
+
+-- Seperator and Strong line treated the same
+hrule :: T2T Blocks
+hrule = try $ do
+ spaces
+ line <- many1 (oneOf "=-_")
+ guard (length line >= 20)
+ B.horizontalRule <$ blankline
+
+quote :: T2T Blocks
+quote = try $ do
+ lookAhead tab
+ rawQuote <- many1 (tab *> optional spaces *> anyLine)
+ contents <- parseFromString parseBlocks (intercalate "\n" rawQuote ++ "\n\n")
+ return $ B.blockQuote contents
+
+commentLine :: T2T Inlines
+commentLine = comment
+
+-- List Parsing code from Org Reader
+
+list :: T2T Blocks
+list = choice [bulletList, orderedList, definitionList]
+
+bulletList :: T2T Blocks
+bulletList = B.bulletList . compactify'
+ <$> many1 (listItem bulletListStart parseBlocks)
+
+orderedList :: T2T Blocks
+orderedList = B.orderedList . compactify'
+ <$> many1 (listItem orderedListStart parseBlocks)
+
+definitionList :: T2T Blocks
+definitionList = try $ do
+ B.definitionList . compactify'DL <$>
+ many1 (listItem definitionListStart definitionListEnd)
+
+definitionListEnd :: T2T (Inlines, [Blocks])
+definitionListEnd = (,) <$> (mconcat <$> manyTill inline newline) <*> ((:[]) <$> parseBlocks)
+
+genericListStart :: T2T Char
+ -> T2T Int
+genericListStart listMarker = try $
+ (2+) <$> (length <$> many spaceChar
+ <* listMarker <* space <* notFollowedBy space)
+
+-- parses bullet list \start and returns its length (excl. following whitespace)
+bulletListStart :: T2T Int
+bulletListStart = genericListStart (char '-')
+
+orderedListStart :: T2T Int
+orderedListStart = genericListStart (char '+' )
+
+definitionListStart :: T2T Int
+definitionListStart = genericListStart (char ':')
+
+-- parse raw text for one list item, excluding start marker and continuations
+listItem :: T2T Int
+ -> T2T a
+ -> T2T a
+listItem start end = try $ do
+ markerLength <- try start
+ firstLine <- anyLineNewline
+ blank <- option "" ("\n" <$ blankline)
+ rest <- concat <$> many (listContinuation markerLength)
+ parseFromString end $ firstLine ++ blank ++ rest
+
+-- continuation of a list item - indented and separated by blankline or endline.
+-- Note: nested lists are parsed as continuations.
+listContinuation :: Int
+ -> T2T String
+listContinuation markerLength = try $
+ notFollowedBy' (blankline >> blankline)
+ *> (mappend <$> (concat <$> many1 listLine)
+ <*> many blankline)
+ where listLine = try $ indentWith markerLength *> anyLineNewline
+
+anyLineNewline :: T2T String
+anyLineNewline = (++ "\n") <$> anyLine
+
+indentWith :: Int -> T2T String
+indentWith n = count n space
+
+-- Table
+
+table :: T2T Blocks
+table = try $ do
+ header <- fmap snd <$> option mempty (try headerRow)
+ rows <- many1 (many commentLine *> tableRow)
+ let columns = transpose rows
+ let ncolumns = length columns
+ let aligns = map (foldr1 findAlign) (map (map fst) columns)
+ let rows' = map (map snd) rows
+ let size = maximum (map length rows')
+ let rowsPadded = map (pad size) rows'
+ let headerPadded = if (not (null header)) then pad size header else mempty
+ return $ B.table mempty
+ (zip aligns (replicate ncolumns 0.0))
+ headerPadded rowsPadded
+
+pad :: (Show a, Monoid a) => Int -> [a] -> [a]
+pad n xs = xs ++ (replicate (n - length xs) mempty)
+
+
+findAlign :: Alignment -> Alignment -> Alignment
+findAlign x y
+ | x == y = x
+ | otherwise = AlignDefault
+
+headerRow :: T2T [(Alignment, Blocks)]
+headerRow = genericRow (string "||")
+
+tableRow :: T2T [(Alignment, Blocks)]
+tableRow = genericRow (char '|')
+
+genericRow :: T2T a -> T2T [(Alignment, Blocks)]
+genericRow start = try $ do
+ spaces *> start
+ manyTill tableCell newline <?> "genericRow"
+
+
+tableCell :: T2T (Alignment, Blocks)
+tableCell = try $ do
+ leftSpaces <- length <$> lookAhead (many1 space) -- Case of empty cell means we must lookAhead
+ content <- (manyTill inline (try $ lookAhead (cellEnd)))
+ rightSpaces <- length <$> many space
+ let align =
+ case compare leftSpaces rightSpaces of
+ LT -> AlignLeft
+ EQ -> AlignCenter
+ GT -> AlignRight
+ endOfCell
+ return $ (align, B.plain (B.trimInlines $ mconcat content))
+ where
+ cellEnd = (void newline <|> (many1 space *> endOfCell))
+
+endOfCell :: T2T ()
+endOfCell = try (skipMany1 $ char '|') <|> ( () <$ lookAhead newline)
+
+-- Raw area
+
+verbatim :: T2T Blocks
+verbatim = genericBlock anyLineNewline B.codeBlock "```"
+
+rawBlock :: T2T Blocks
+rawBlock = genericBlock anyLineNewline (B.para . B.str) "\"\"\""
+
+taggedBlock :: T2T Blocks
+taggedBlock = do
+ target <- getTarget
+ genericBlock anyLineNewline (B.rawBlock target) "'''"
+
+-- Generic
+
+genericBlock :: Monoid a => T2T a -> (a -> Blocks) -> String -> T2T Blocks
+genericBlock p f s = blockMarkupArea p f s <|> blockMarkupLine p f s
+
+blockMarkupArea :: Monoid a => (T2T a) -> (a -> Blocks) -> String -> T2T Blocks
+blockMarkupArea p f s = try $ (do
+ string s *> blankline
+ f . mconcat <$> (manyTill p (eof <|> void (string s *> blankline))))
+
+blockMarkupLine :: T2T a -> (a -> Blocks) -> String -> T2T Blocks
+blockMarkupLine p f s = try (f <$> (string s *> space *> p))
+
+-- Can be in either block or inline position
+comment :: Monoid a => T2T a
+comment = try $ do
+ atStart
+ notFollowedBy macro
+ mempty <$ (char '%' *> anyLine)
+
+-- Inline
+
+parseInlines :: T2T Inlines
+parseInlines = trimInlines . mconcat <$> many1 inline
+
+inline :: T2T Inlines
+inline = do
+ choice
+ [ endline
+ , macro
+ , commentLine
+ , whitespace
+ , url
+ , link
+ , image
+ , bold
+ , underline
+ , code
+ , raw
+ , tagged
+ , strike
+ , italic
+ , code
+ , str
+ , symbol
+ ]
+
+bold :: T2T Inlines
+bold = inlineMarkup inline B.strong '*' (B.str)
+
+underline :: T2T Inlines
+underline = inlineMarkup inline B.emph '_' (B.str)
+
+strike :: T2T Inlines
+strike = inlineMarkup inline B.strikeout '-' (B.str)
+
+italic :: T2T Inlines
+italic = inlineMarkup inline B.emph '/' (B.str)
+
+code :: T2T Inlines
+code = inlineMarkup ((:[]) <$> anyChar) B.code '`' id
+
+raw :: T2T Inlines
+raw = inlineMarkup ((:[]) <$> anyChar) B.text '"' id
+
+tagged :: T2T Inlines
+tagged = do
+ target <- getTarget
+ inlineMarkup ((:[]) <$> anyChar) (B.rawInline target) '\'' id
+
+-- Parser for markup indicated by a double character.
+-- Inline markup is greedy and glued
+-- Greedy meaning ***a*** = Bold [Str "*a*"]
+-- Glued meaning that markup must be tight to content
+-- Markup can't pass newlines
+inlineMarkup :: Monoid a
+ => (T2T a) -- Content parser
+ -> (a -> Inlines) -- Constructor
+ -> Char -- Fence
+ -> (String -> a) -- Special Case to handle ******
+ -> T2T Inlines
+inlineMarkup p f c special = try $ do
+ start <- many1 (char c)
+ let l = length start
+ guard (l >= 2)
+ when (l == 2) (void $ notFollowedBy space)
+ -- We must make sure that there is no space before the start of the
+ -- closing tags
+ body <- optionMaybe (try $ manyTill (noneOf "\n\r") $
+ (try $ lookAhead (noneOf " " >> string [c,c] )))
+ case body of
+ Just middle -> do
+ lastChar <- anyChar
+ end <- many1 (char c)
+ let parser inp = parseFromString (mconcat <$> many p) inp
+ let start' = special (drop 2 start)
+ body' <- parser (middle ++ [lastChar])
+ let end' = special (drop 2 end)
+ return $ f (start' <> body' <> end')
+ Nothing -> do -- Either bad or case such as *****
+ guard (l >= 5)
+ let body' = (replicate (l - 4) c)
+ return $ f (special body')
+
+link :: T2T Inlines
+link = try imageLink <|> titleLink
+
+-- Link with title
+titleLink :: T2T Inlines
+titleLink = try $ do
+ char '['
+ notFollowedBy space
+ tokens <- sepBy1 (many $ noneOf " ]") space
+ guard (length tokens >= 2)
+ char ']'
+ let link' = last tokens
+ guard (length link' > 0)
+ let tit = concat (intersperse " " (init tokens))
+ return $ B.link link' "" (B.text tit)
+
+-- Link with image
+imageLink :: T2T Inlines
+imageLink = try $ do
+ char '['
+ body <- image
+ many1 space
+ l <- manyTill (noneOf "\n\r ") (char ']')
+ return (B.link l "" body)
+
+macro :: T2T Inlines
+macro = try $ do
+ name <- string "%%" *> oneOfStringsCI (map fst commands)
+ optional (try $ enclosed (char '(') (char ')') anyChar)
+ lookAhead (spaceChar <|> oneOf specialChars <|> newline)
+ maybe (return mempty) (\f -> B.str <$> asks f) (lookup name commands)
+ where
+ commands = [ ("date", date), ("mtime", mtime)
+ , ("infile", infile), ("outfile", outfile)]
+
+-- raw URLs in text are automatically linked
+url :: T2T Inlines
+url = try $ do
+ (rawUrl, escapedUrl) <- (try uri <|> emailAddress)
+ return $ B.link rawUrl "" (B.str escapedUrl)
+
+uri :: T2T (String, String)
+uri = try $ do
+ address <- t2tURI
+ return (address, escapeURI address)
+
+-- The definition of a URI in the T2T source differs from the
+-- actual definition. This is a transcription of the definition in
+-- the source of v2.6
+--isT2TURI :: String -> Bool
+--isT2TURI (parse t2tURI "" -> Right _) = True
+--isT2TURI _ = False
+
+t2tURI :: T2T String
+t2tURI = do
+ start <- try ((++) <$> proto <*> urlLogin) <|> guess
+ domain <- many1 chars
+ sep <- many (char '/')
+ form' <- option mempty ((:) <$> char '?' <*> many1 form)
+ anchor' <- option mempty ((:) <$> char '#' <*> many anchor)
+ return (start ++ domain ++ sep ++ form' ++ anchor')
+ where
+ protos = ["http", "https", "ftp", "telnet", "gopher", "wais"]
+ proto = (++) <$> oneOfStrings protos <*> string "://"
+ guess = (++) <$> (((++) <$> stringAnyCase "www" <*> option mempty ((:[]) <$> oneOf "23"))
+ <|> stringAnyCase "ftp") <*> ((:[]) <$> char '.')
+ login = alphaNum <|> oneOf "_.-"
+ pass = many (noneOf " @")
+ chars = alphaNum <|> oneOf "%._/~:,=$@&+-"
+ anchor = alphaNum <|> oneOf "%._0"
+ form = chars <|> oneOf ";*"
+ urlLogin = option mempty $ try ((\x y z -> x ++ y ++ [z]) <$> many1 login <*> option mempty ((:) <$> char ':' <*> pass) <*> char '@')
+
+
+image :: T2T Inlines
+image = try $ do
+ -- List taken from txt2tags source
+ let extensions = [".jpg", ".jpeg", ".gif", ".png", ".eps", ".bmp"]
+ char '['
+ path <- manyTill (noneOf "\n\t\r ") (try $ lookAhead (oneOfStrings extensions))
+ ext <- oneOfStrings extensions
+ char ']'
+ return $ B.image (path ++ ext) "" mempty
+
+-- Characters used in markup
+specialChars :: String
+specialChars = "%*-_/|:+"
+
+tab :: T2T Char
+tab = char '\t'
+
+space :: T2T Char
+space = char ' '
+
+spaces :: T2T String
+spaces = many space
+
+endline :: T2T Inlines
+endline = try $ do
+ newline
+ notFollowedBy blankline
+ notFollowedBy hrule
+ notFollowedBy title
+ notFollowedBy verbatim
+ notFollowedBy rawBlock
+ notFollowedBy taggedBlock
+ notFollowedBy quote
+ notFollowedBy list
+ notFollowedBy table
+ return $ B.space
+
+str :: T2T Inlines
+str = try $ do
+ B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
+
+whitespace :: T2T Inlines
+whitespace = try $ B.space <$ spaceChar
+
+symbol :: T2T Inlines
+symbol = B.str . (:[]) <$> oneOf specialChars
+
+-- Utility
+
+getTarget :: T2T String
+getTarget = do
+ mv <- lookupMeta "target" . stateMeta <$> getState
+ let MetaString target = fromMaybe (MetaString "html") mv
+ return target
+
+atStart :: T2T ()
+atStart = (sourceColumn <$> getPosition) >>= guard . (== 1)
+
+ignoreSpacesCap :: T2T String -> T2T String
+ignoreSpacesCap p = map toLower <$> (spaces *> p <* spaces)
+
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
index 2a2f56281..1a4e037cf 100644
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -32,53 +32,54 @@ the HTML using data URIs.
-}
module Text.Pandoc.SelfContained ( makeSelfContained ) where
import Text.HTML.TagSoup
-import Network.URI (isURI, escapeURIString)
+import Network.URI (isURI, escapeURIString, URI(..), parseURI)
import Data.ByteString.Base64
import qualified Data.ByteString.Char8 as B
import Data.ByteString (ByteString)
-import System.FilePath (takeExtension, dropExtension, takeDirectory, (</>))
+import System.FilePath (takeExtension, takeDirectory, (</>))
import Data.Char (toLower, isAscii, isAlphaNum)
import Codec.Compression.GZip as Gzip
import qualified Data.ByteString.Lazy as L
-import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err)
+import Text.Pandoc.Shared (renderTags', err, fetchItem')
+import Text.Pandoc.MediaBag (MediaBag)
import Text.Pandoc.UTF8 (toString, fromString)
-import Text.Pandoc.MIME (getMimeType)
-import System.Directory (doesFileExist)
+import Text.Pandoc.Options (WriterOptions(..))
isOk :: Char -> Bool
isOk c = isAscii c && isAlphaNum c
-convertTag :: Maybe FilePath -> Tag String -> IO (Tag String)
-convertTag userdata t@(TagOpen tagname as)
+convertTag :: MediaBag -> Maybe String -> Tag String -> IO (Tag String)
+convertTag media sourceURL t@(TagOpen tagname as)
| tagname `elem` ["img", "embed", "video", "input", "audio", "source"] = do
as' <- mapM processAttribute as
return $ TagOpen tagname as'
where processAttribute (x,y) =
if x == "src" || x == "href" || x == "poster"
then do
- (raw, mime) <- getRaw userdata (fromAttrib "type" t) y
+ (raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) y
let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw)
return (x, enc)
else return (x,y)
-convertTag userdata t@(TagOpen "script" as) =
+convertTag media sourceURL t@(TagOpen "script" as) =
case fromAttrib "src" t of
[] -> return t
src -> do
- (raw, mime) <- getRaw userdata (fromAttrib "type" t) src
+ (raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) src
let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw)
return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"])
-convertTag userdata t@(TagOpen "link" as) =
+convertTag media sourceURL t@(TagOpen "link" as) =
case fromAttrib "href" t of
[] -> return t
src -> do
- (raw, mime) <- getRaw userdata (fromAttrib "type" t) src
+ (raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) src
let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw)
return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"])
-convertTag _ t = return t
+convertTag _ _ t = return t
-- NOTE: This is really crude, it doesn't respect CSS comments.
-cssURLs :: Maybe FilePath -> FilePath -> ByteString -> IO ByteString
-cssURLs userdata d orig =
+cssURLs :: MediaBag -> Maybe String -> FilePath -> ByteString
+ -> IO ByteString
+cssURLs media sourceURL d orig =
case B.breakSubstring "url(" orig of
(x,y) | B.null y -> return orig
| otherwise -> do
@@ -91,33 +92,21 @@ cssURLs userdata d orig =
let url' = if isURI url
then url
else d </> url
- (raw, mime) <- getRaw userdata "" url'
- rest <- cssURLs userdata d v
+ (raw, mime) <- getRaw media sourceURL "" url'
+ rest <- cssURLs media sourceURL d v
let enc = "data:" `B.append` fromString mime `B.append`
";base64," `B.append` (encode raw)
return $ x `B.append` "url(" `B.append` enc `B.append` rest
-getItem :: Maybe FilePath -> String -> IO (ByteString, Maybe String)
-getItem userdata f =
- if isURI f
- then openURL f >>= either handleErr return
- else do
- -- strip off trailing query or fragment part, if relative URL.
- -- this is needed for things like cmunrm.eot?#iefix,
- -- which is used to get old versions of IE to work with web fonts.
- let f' = takeWhile (\c -> c /= '?' && c /= '#') f
- let mime = case takeExtension f' of
- ".gz" -> getMimeType $ dropExtension f'
- x -> getMimeType x
- exists <- doesFileExist f'
- cont <- if exists then B.readFile f' else readDataFile userdata f'
- return (cont, mime)
- where handleErr e = err 61 $ "Failed to retrieve " ++ f ++ "\n" ++ show e
-
-getRaw :: Maybe FilePath -> String -> String -> IO (ByteString, String)
-getRaw userdata mimetype src = do
+getRaw :: MediaBag -> Maybe String -> String -> String
+ -> IO (ByteString, String)
+getRaw media sourceURL mimetype src = do
let ext = map toLower $ takeExtension src
- (raw, respMime) <- getItem userdata src
+ fetchResult <- fetchItem' media sourceURL src
+ (raw, respMime) <- case fetchResult of
+ Left msg -> err 67 $ "Could not fetch " ++ src ++
+ "\n" ++ show msg
+ Right x -> return x
let raw' = if ext == ".gz"
then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks
$ [raw]
@@ -127,21 +116,22 @@ getRaw userdata mimetype src = do
$ "Could not determine mime type for `" ++ src ++ "'"
(x, Nothing) -> x
(_, Just x ) -> x
+ let cssSourceURL = case parseURI src of
+ Just u
+ | uriScheme u `elem` ["http:","https:"] ->
+ Just $ show u{ uriPath = "",
+ uriQuery = "",
+ uriFragment = "" }
+ _ -> Nothing
result <- if mime == "text/css"
- then cssURLs userdata (takeDirectory src) raw'
+ then cssURLs media cssSourceURL (takeDirectory src) raw'
else return raw'
return (result, mime)
-- | Convert HTML into self-contained HTML, incorporating images,
--- scripts, and CSS using data: URIs. Items specified using absolute
--- URLs will be downloaded; those specified using relative URLs will
--- be sought first relative to the working directory, then relative
--- to the user data directory (if the first parameter is 'Just'
--- a directory), and finally relative to pandoc's default data
--- directory.
-makeSelfContained :: Maybe FilePath -> String -> IO String
-makeSelfContained userdata inp = do
+-- scripts, and CSS using data: URIs.
+makeSelfContained :: WriterOptions -> String -> IO String
+makeSelfContained opts inp = do
let tags = parseTags inp
- out' <- mapM (convertTag userdata) tags
+ out' <- mapM (convertTag (writerMediaBag opts) (writerSourceURL opts)) tags
return $ renderTags' out'
-
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 5b0d9b6b4..a91ca9115 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -1,5 +1,5 @@
{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses,
- FlexibleContexts, ScopedTypeVariables #-}
+ FlexibleContexts, ScopedTypeVariables, PatternGuards #-}
{-
Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
@@ -55,7 +55,11 @@ module Text.Pandoc.Shared (
normalizeSpaces,
extractSpaces,
normalize,
+ normalizeInlines,
+ normalizeBlocks,
+ removeFormatting,
stringify,
+ capitalize,
compactify,
compactify',
compactify'DL,
@@ -74,17 +78,21 @@ module Text.Pandoc.Shared (
readDataFile,
readDataFileUTF8,
fetchItem,
+ fetchItem',
openURL,
+ collapseFilePath,
-- * Error handling
err,
warn,
-- * Safe read
- safeRead
+ safeRead,
+ -- * Temp directory
+ withTempDir
) where
import Text.Pandoc.Definition
import Text.Pandoc.Walk
-import Text.Pandoc.Generic
+import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..))
import qualified Text.Pandoc.Builder as B
import qualified Text.Pandoc.UTF8 as UTF8
@@ -92,14 +100,15 @@ import System.Environment (getProgName)
import System.Exit (exitWith, ExitCode(..))
import Data.Char ( toLower, isLower, isUpper, isAlpha,
isLetter, isDigit, isSpace )
-import Data.List ( find, isPrefixOf, intercalate )
+import Data.List ( find, stripPrefix, intercalate )
import qualified Data.Map as M
import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo,
- unEscapeString, parseURIReference )
+ unEscapeString, parseURIReference, isAllowedInURI )
import qualified Data.Set as Set
import System.Directory
+import System.FilePath (joinPath, splitDirectories)
import Text.Pandoc.MIME (getMimeType)
-import System.FilePath ( (</>), takeExtension, dropExtension )
+import System.FilePath ( (</>), takeExtension, dropExtension)
import Data.Generics (Typeable, Data)
import qualified Control.Monad.State as S
import qualified Control.Exception as E
@@ -108,6 +117,7 @@ import Text.Pandoc.Pretty (charWidth)
import System.Locale (defaultTimeLocale)
import Data.Time
import System.IO (stderr)
+import System.IO.Temp
import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
renderOptions)
import qualified Data.ByteString as BS
@@ -115,6 +125,7 @@ import qualified Data.ByteString.Char8 as B8
import Text.Pandoc.Compat.Monoid
import Data.ByteString.Base64 (decodeLenient)
import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
+import qualified Data.Text as T (toUpper, pack, unpack)
#ifdef EMBED_DATA_FILES
import Text.Pandoc.Data (dataFiles)
@@ -174,9 +185,9 @@ substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
substitute _ _ [] = []
substitute [] _ xs = xs
substitute target replacement lst@(x:xs) =
- if target `isPrefixOf` lst
- then replacement ++ substitute target replacement (drop (length target) lst)
- else x : substitute target replacement xs
+ case stripPrefix target lst of
+ Just lst' -> replacement ++ substitute target replacement lst'
+ Nothing -> x : substitute target replacement xs
ordNub :: (Ord a) => [a] -> [a]
ordNub l = go Set.empty l
@@ -334,10 +345,10 @@ isSpaceOrEmpty (Str "") = True
isSpaceOrEmpty _ = False
-- | Extract the leading and trailing spaces from inside an inline element
--- and place them outside the element.
+-- and place them outside the element.
extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines
-extractSpaces f is =
+extractSpaces f is =
let contents = B.unMany is
left = case viewl contents of
(Space :< _) -> B.space
@@ -350,72 +361,160 @@ extractSpaces f is =
-- | Normalize @Pandoc@ document, consolidating doubled 'Space's,
-- combining adjacent 'Str's and 'Emph's, remove 'Null's and
-- empty elements, etc.
-normalize :: (Eq a, Data a) => a -> a
-normalize = topDown removeEmptyBlocks .
- topDown consolidateInlines .
- bottomUp (removeEmptyInlines . removeTrailingInlineSpaces)
-
-removeEmptyBlocks :: [Block] -> [Block]
-removeEmptyBlocks (Null : xs) = removeEmptyBlocks xs
-removeEmptyBlocks (BulletList [] : xs) = removeEmptyBlocks xs
-removeEmptyBlocks (OrderedList _ [] : xs) = removeEmptyBlocks xs
-removeEmptyBlocks (DefinitionList [] : xs) = removeEmptyBlocks xs
-removeEmptyBlocks (RawBlock _ [] : xs) = removeEmptyBlocks xs
-removeEmptyBlocks (x:xs) = x : removeEmptyBlocks xs
-removeEmptyBlocks [] = []
-
-removeEmptyInlines :: [Inline] -> [Inline]
-removeEmptyInlines (Emph [] : zs) = removeEmptyInlines zs
-removeEmptyInlines (Strong [] : zs) = removeEmptyInlines zs
-removeEmptyInlines (Subscript [] : zs) = removeEmptyInlines zs
-removeEmptyInlines (Superscript [] : zs) = removeEmptyInlines zs
-removeEmptyInlines (SmallCaps [] : zs) = removeEmptyInlines zs
-removeEmptyInlines (Strikeout [] : zs) = removeEmptyInlines zs
-removeEmptyInlines (RawInline _ [] : zs) = removeEmptyInlines zs
-removeEmptyInlines (Code _ [] : zs) = removeEmptyInlines zs
-removeEmptyInlines (Str "" : zs) = removeEmptyInlines zs
-removeEmptyInlines (x : xs) = x : removeEmptyInlines xs
-removeEmptyInlines [] = []
-
-removeTrailingInlineSpaces :: [Inline] -> [Inline]
-removeTrailingInlineSpaces = reverse . removeLeadingInlineSpaces . reverse
-
-removeLeadingInlineSpaces :: [Inline] -> [Inline]
-removeLeadingInlineSpaces = dropWhile isSpaceOrEmpty
-
-consolidateInlines :: [Inline] -> [Inline]
-consolidateInlines (Str x : ys) =
+normalize :: Pandoc -> Pandoc
+normalize (Pandoc (Meta meta) blocks) =
+ Pandoc (Meta $ M.map go meta) (normalizeBlocks blocks)
+ where go (MetaInlines xs) = MetaInlines $ normalizeInlines xs
+ go (MetaBlocks xs) = MetaBlocks $ normalizeBlocks xs
+ go (MetaList ms) = MetaList $ map go ms
+ go (MetaMap m) = MetaMap $ M.map go m
+ go x = x
+
+normalizeBlocks :: [Block] -> [Block]
+normalizeBlocks (Null : xs) = normalizeBlocks xs
+normalizeBlocks (Div attr bs : xs) =
+ Div attr (normalizeBlocks bs) : normalizeBlocks xs
+normalizeBlocks (BlockQuote bs : xs) =
+ case normalizeBlocks bs of
+ [] -> normalizeBlocks xs
+ bs' -> BlockQuote bs' : normalizeBlocks xs
+normalizeBlocks (BulletList [] : xs) = normalizeBlocks xs
+normalizeBlocks (BulletList items : xs) =
+ BulletList (map normalizeBlocks items) : normalizeBlocks xs
+normalizeBlocks (OrderedList _ [] : xs) = normalizeBlocks xs
+normalizeBlocks (OrderedList attr items : xs) =
+ OrderedList attr (map normalizeBlocks items) : normalizeBlocks xs
+normalizeBlocks (DefinitionList [] : xs) = normalizeBlocks xs
+normalizeBlocks (DefinitionList items : xs) =
+ DefinitionList (map go items) : normalizeBlocks xs
+ where go (ils, bs) = (normalizeInlines ils, map normalizeBlocks bs)
+normalizeBlocks (RawBlock _ "" : xs) = normalizeBlocks xs
+normalizeBlocks (RawBlock f x : xs) =
+ case normalizeBlocks xs of
+ (RawBlock f' x' : rest) | f' == f ->
+ RawBlock f (x ++ ('\n':x')) : rest
+ rest -> RawBlock f x : rest
+normalizeBlocks (Para ils : xs) =
+ case normalizeInlines ils of
+ [] -> normalizeBlocks xs
+ ils' -> Para ils' : normalizeBlocks xs
+normalizeBlocks (Plain ils : xs) =
+ case normalizeInlines ils of
+ [] -> normalizeBlocks xs
+ ils' -> Plain ils' : normalizeBlocks xs
+normalizeBlocks (Header lev attr ils : xs) =
+ Header lev attr (normalizeInlines ils) : normalizeBlocks xs
+normalizeBlocks (Table capt aligns widths hdrs rows : xs) =
+ Table (normalizeInlines capt) aligns widths
+ (map normalizeBlocks hdrs) (map (map normalizeBlocks) rows)
+ : normalizeBlocks xs
+normalizeBlocks (x:xs) = x : normalizeBlocks xs
+normalizeBlocks [] = []
+
+normalizeInlines :: [Inline] -> [Inline]
+normalizeInlines (Str x : ys) =
case concat (x : map fromStr strs) of
- "" -> consolidateInlines rest
- n -> Str n : consolidateInlines rest
+ "" -> rest
+ n -> Str n : rest
where
- (strs, rest) = span isStr ys
+ (strs, rest) = span isStr $ normalizeInlines ys
isStr (Str _) = True
isStr _ = False
fromStr (Str z) = z
- fromStr _ = error "consolidateInlines - fromStr - not a Str"
-consolidateInlines (Space : ys) = Space : rest
+ fromStr _ = error "normalizeInlines - fromStr - not a Str"
+normalizeInlines (Space : ys) =
+ if null rest
+ then []
+ else Space : rest
where isSp Space = True
isSp _ = False
- rest = consolidateInlines $ dropWhile isSp ys
-consolidateInlines (Emph xs : Emph ys : zs) = consolidateInlines $
- Emph (xs ++ ys) : zs
-consolidateInlines (Strong xs : Strong ys : zs) = consolidateInlines $
- Strong (xs ++ ys) : zs
-consolidateInlines (Subscript xs : Subscript ys : zs) = consolidateInlines $
- Subscript (xs ++ ys) : zs
-consolidateInlines (Superscript xs : Superscript ys : zs) = consolidateInlines $
- Superscript (xs ++ ys) : zs
-consolidateInlines (SmallCaps xs : SmallCaps ys : zs) = consolidateInlines $
- SmallCaps (xs ++ ys) : zs
-consolidateInlines (Strikeout xs : Strikeout ys : zs) = consolidateInlines $
- Strikeout (xs ++ ys) : zs
-consolidateInlines (RawInline f x : RawInline f' y : zs) | f == f' =
- consolidateInlines $ RawInline f (x ++ y) : zs
-consolidateInlines (Code a1 x : Code a2 y : zs) | a1 == a2 =
- consolidateInlines $ Code a1 (x ++ y) : zs
-consolidateInlines (x : xs) = x : consolidateInlines xs
-consolidateInlines [] = []
+ rest = dropWhile isSp $ normalizeInlines ys
+normalizeInlines (Emph xs : zs) =
+ case normalizeInlines zs of
+ (Emph ys : rest) -> normalizeInlines $
+ Emph (normalizeInlines $ xs ++ ys) : rest
+ rest -> case normalizeInlines xs of
+ [] -> rest
+ xs' -> Emph xs' : rest
+normalizeInlines (Strong xs : zs) =
+ case normalizeInlines zs of
+ (Strong ys : rest) -> normalizeInlines $
+ Strong (normalizeInlines $ xs ++ ys) : rest
+ rest -> case normalizeInlines xs of
+ [] -> rest
+ xs' -> Strong xs' : rest
+normalizeInlines (Subscript xs : zs) =
+ case normalizeInlines zs of
+ (Subscript ys : rest) -> normalizeInlines $
+ Subscript (normalizeInlines $ xs ++ ys) : rest
+ rest -> case normalizeInlines xs of
+ [] -> rest
+ xs' -> Subscript xs' : rest
+normalizeInlines (Superscript xs : zs) =
+ case normalizeInlines zs of
+ (Superscript ys : rest) -> normalizeInlines $
+ Superscript (normalizeInlines $ xs ++ ys) : rest
+ rest -> case normalizeInlines xs of
+ [] -> rest
+ xs' -> Superscript xs' : rest
+normalizeInlines (SmallCaps xs : zs) =
+ case normalizeInlines zs of
+ (SmallCaps ys : rest) -> normalizeInlines $
+ SmallCaps (normalizeInlines $ xs ++ ys) : rest
+ rest -> case normalizeInlines xs of
+ [] -> rest
+ xs' -> SmallCaps xs' : rest
+normalizeInlines (Strikeout xs : zs) =
+ case normalizeInlines zs of
+ (Strikeout ys : rest) -> normalizeInlines $
+ Strikeout (normalizeInlines $ xs ++ ys) : rest
+ rest -> case normalizeInlines xs of
+ [] -> rest
+ xs' -> Strikeout xs' : rest
+normalizeInlines (RawInline _ [] : ys) = normalizeInlines ys
+normalizeInlines (RawInline f xs : zs) =
+ case normalizeInlines zs of
+ (RawInline f' ys : rest) | f == f' -> normalizeInlines $
+ RawInline f (xs ++ ys) : rest
+ rest -> RawInline f xs : rest
+normalizeInlines (Code _ "" : ys) = normalizeInlines ys
+normalizeInlines (Code attr xs : zs) =
+ case normalizeInlines zs of
+ (Code attr' ys : rest) | attr == attr' -> normalizeInlines $
+ Code attr (xs ++ ys) : rest
+ rest -> Code attr xs : rest
+-- allow empty spans, they may carry identifiers etc.
+-- normalizeInlines (Span _ [] : ys) = normalizeInlines ys
+normalizeInlines (Span attr xs : zs) =
+ case normalizeInlines zs of
+ (Span attr' ys : rest) | attr == attr' -> normalizeInlines $
+ Span attr (normalizeInlines $ xs ++ ys) : rest
+ rest -> Span attr (normalizeInlines xs) : rest
+normalizeInlines (Note bs : ys) = Note (normalizeBlocks bs) :
+ normalizeInlines ys
+normalizeInlines (Quoted qt ils : ys) =
+ Quoted qt (normalizeInlines ils) : normalizeInlines ys
+normalizeInlines (Link ils t : ys) =
+ Link (normalizeInlines ils) t : normalizeInlines ys
+normalizeInlines (Image ils t : ys) =
+ Image (normalizeInlines ils) t : normalizeInlines ys
+normalizeInlines (Cite cs ils : ys) =
+ Cite cs (normalizeInlines ils) : normalizeInlines ys
+normalizeInlines (x : xs) = x : normalizeInlines xs
+normalizeInlines [] = []
+
+-- | Extract inlines, removing formatting.
+removeFormatting :: Walkable Inline a => a -> [Inline]
+removeFormatting = query go . walk deNote
+ where go :: Inline -> [Inline]
+ go (Str xs) = [Str xs]
+ go Space = [Space]
+ go (Code _ x) = [Str x]
+ go (Math _ x) = [Str x]
+ go LineBreak = [Space]
+ go _ = []
+ deNote (Note _) = Str ""
+ deNote x = x
-- | Convert pandoc structure to a string with formatting removed.
-- Footnotes are skipped (since we don't want their contents in link
@@ -432,6 +531,17 @@ stringify = query go . walk deNote
deNote (Note _) = Str ""
deNote x = x
+-- | Bring all regular text in a pandoc structure to uppercase.
+--
+-- This function correctly handles cases where a lowercase character doesn't
+-- match to a single uppercase character – e.g. “Straße” would be converted
+-- to “STRASSE”, not “STRAßE”.
+capitalize :: Walkable Inline a => a -> a
+capitalize = walk go
+ where go :: Inline -> Inline
+ go (Str s) = Str (T.unpack $ T.toUpper $ T.pack s)
+ go x = x
+
-- | Change final list item from @Para@ to @Plain@ if the list contains
-- no other @Para@ blocks.
compactify :: [[Block]] -- ^ List of list items (each a list of blocks)
@@ -463,20 +573,22 @@ compactify' items =
_ -> items
_ -> items
--- | Like @compactify'@, but akts on items of definition lists.
+-- | Like @compactify'@, but acts on items of definition lists.
compactify'DL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
compactify'DL items =
let defs = concatMap snd items
- defBlocks = reverse $ concatMap B.toList defs
- in case defBlocks of
- (Para x:_) -> if not $ any isPara (drop 1 defBlocks)
- then let (t,ds) = last items
- lastDef = B.toList $ last ds
- ds' = init ds ++
- [B.fromList $ init lastDef ++ [Plain x]]
- in init items ++ [(t, ds')]
- else items
- _ -> items
+ in case reverse (concatMap B.toList defs) of
+ (Para x:xs)
+ | not (any isPara xs) ->
+ let (t,ds) = last items
+ lastDef = B.toList $ last ds
+ ds' = init ds ++
+ if null lastDef
+ then [B.fromList lastDef]
+ else [B.fromList $ init lastDef ++ [Plain x]]
+ in init items ++ [(t, ds')]
+ | otherwise -> items
+ _ -> items
isPara :: Block -> Bool
isPara (Para _) = True
@@ -669,28 +781,38 @@ readDataFileUTF8 userDir fname =
-- Returns raw content and maybe mime type.
fetchItem :: Maybe String -> String
-> IO (Either E.SomeException (BS.ByteString, Maybe String))
-fetchItem sourceURL s
- | isURI s = openURL s
- | otherwise =
- case sourceURL >>= parseURIReference of
- Just u -> case parseURIReference s of
- Just s' -> openURL $ show $
- s' `nonStrictRelativeTo` u
- Nothing -> openURL $ show u ++ "/" ++ s
- Nothing -> E.try readLocalFile
+fetchItem sourceURL s =
+ case (sourceURL >>= parseURIReference . ensureEscaped, ensureEscaped s) of
+ (_, s') | isURI s' -> openURL s'
+ (Just u, s') -> -- try fetching from relative path at source
+ case parseURIReference s' of
+ Just u' -> openURL $ show $ u' `nonStrictRelativeTo` u
+ Nothing -> openURL s' -- will throw error
+ (Nothing, _) -> E.try readLocalFile -- get from local file system
where readLocalFile = do
- let mime = case takeExtension s of
- ".gz" -> getMimeType $ dropExtension s
- x -> getMimeType x
- cont <- BS.readFile s
+ cont <- BS.readFile fp
return (cont, mime)
+ dropFragmentAndQuery = takeWhile (\c -> c /= '?' && c /= '#')
+ fp = unEscapeString $ dropFragmentAndQuery s
+ mime = case takeExtension fp of
+ ".gz" -> getMimeType $ dropExtension fp
+ x -> getMimeType x
+ ensureEscaped = escapeURIString isAllowedInURI
+
+-- | Like 'fetchItem', but also looks for items in a 'MediaBag'.
+fetchItem' :: MediaBag -> Maybe String -> String
+ -> IO (Either E.SomeException (BS.ByteString, Maybe String))
+fetchItem' media sourceURL s = do
+ case lookupMedia s media of
+ Nothing -> fetchItem sourceURL s
+ Just (mime, bs) -> return $ Right (BS.concat $ toChunks bs, Just mime)
-- | Read from a URL and return raw data and maybe mime type.
openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe String))
openURL u
- | "data:" `isPrefixOf` u =
- let mime = takeWhile (/=',') $ drop 5 u
- contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u
+ | Just u' <- stripPrefix "data:" u =
+ let mime = takeWhile (/=',') u'
+ contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u'
in return $ Right (decodeLenient contents, Just mime)
#ifdef HTTP_CLIENT
| otherwise = withSocketsDo $ E.try $ do
@@ -734,6 +856,29 @@ warn msg = do
name <- getProgName
UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
+-- | Remove intermediate "." and ".." directories from a path.
+--
+-- @
+-- collapseFilePath "./foo" == "foo"
+-- collapseFilePath "/bar/../baz" == "/baz"
+-- collapseFilePath "/../baz" == "/../baz"
+-- collapseFilePath "parent/foo/baz/../bar" == "parent/foo/bar"
+-- collapseFilePath "parent/foo/baz/../../bar" == "parent/bar"
+-- collapseFilePath "parent/foo/.." == "parent"
+-- collapseFilePath "/parent/foo/../../bar" == "/bar"
+-- @
+collapseFilePath :: FilePath -> FilePath
+collapseFilePath = joinPath . reverse . foldl go [] . splitDirectories
+ where
+ go rs "." = rs
+ go r@(p:rs) ".." = case p of
+ ".." -> ("..":r)
+ "/" -> ("..":r)
+ _ -> rs
+ go _ "/" = ["/"]
+ go rs x = x:rs
+
+
--
-- Safe read
--
@@ -743,3 +888,15 @@ safeRead s = case reads s of
(d,x):_
| all isSpace x -> return d
_ -> fail $ "Could not read `" ++ s ++ "'"
+
+--
+-- Temp directory
+--
+
+withTempDir :: String -> (FilePath -> IO a) -> IO a
+withTempDir =
+#ifdef _WINDOWS
+ withTempDirectory "."
+#else
+ withSystemTempDirectory
+#endif
diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs
index 19112d8f5..e5b8c5167 100644
--- a/src/Text/Pandoc/Writers/AsciiDoc.hs
+++ b/src/Text/Pandoc/Writers/AsciiDoc.hs
@@ -43,16 +43,19 @@ import Text.Pandoc.Shared
import Text.Pandoc.Writers.Shared
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (blankline, space)
-import Data.List ( isPrefixOf, intersperse, intercalate )
+import Data.Maybe (fromMaybe)
+import Data.List ( stripPrefix, intersperse, intercalate )
import Text.Pandoc.Pretty
import Control.Monad.State
import qualified Data.Map as M
import Data.Aeson (Value(String), fromJSON, toJSON, Result(..))
import qualified Data.Text as T
+import Control.Applicative ((<*), (*>))
data WriterState = WriterState { defListMarker :: String
, orderedListLevel :: Int
, bulletListLevel :: Int
+ , intraword :: Bool
}
-- | Convert Pandoc to AsciiDoc.
@@ -62,6 +65,7 @@ writeAsciiDoc opts document =
defListMarker = "::"
, orderedListLevel = 1
, bulletListLevel = 1
+ , intraword = False
}
-- | Return asciidoc representation of document.
@@ -123,7 +127,7 @@ blockToAsciiDoc _ Null = return empty
blockToAsciiDoc opts (Plain inlines) = do
contents <- inlineListToAsciiDoc opts inlines
return $ contents <> cr
-blockToAsciiDoc opts (Para [Image alt (src,'f':'i':'g':':':tit)]) =
+blockToAsciiDoc opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = do
blockToAsciiDoc opts (Para [Image alt (src,tit)])
blockToAsciiDoc opts (Para inlines) = do
contents <- inlineListToAsciiDoc opts inlines
@@ -142,10 +146,10 @@ blockToAsciiDoc opts (Header level (ident,_,_) inlines) = do
let len = offset contents
-- ident seem to be empty most of the time and asciidoc will generate them automatically
-- so lets make them not show up when null
- let identifier = if (null ident) then empty else ("[[" <> text ident <> "]]")
+ let identifier = if (null ident) then empty else ("[[" <> text ident <> "]]")
let setext = writerSetextHeaders opts
- return $
- (if setext
+ return $
+ (if setext
then
identifier $$ contents $$
(case level of
@@ -155,7 +159,7 @@ blockToAsciiDoc opts (Header level (ident,_,_) inlines) = do
4 -> text $ replicate len '+'
_ -> empty) <> blankline
else
- identifier $$ text (replicate level '=') <> space <> contents <> blankline)
+ identifier $$ text (replicate level '=') <> space <> contents <> blankline)
blockToAsciiDoc _ (CodeBlock (_,classes,_) str) = return $
flush (attrs <> dashes <> space <> attrs <> cr <> text str <>
cr <> dashes) <> blankline
@@ -317,17 +321,51 @@ blockListToAsciiDoc opts blocks = cat `fmap` mapM (blockToAsciiDoc opts) blocks
-- | Convert list of Pandoc inline elements to asciidoc.
inlineListToAsciiDoc :: WriterOptions -> [Inline] -> State WriterState Doc
-inlineListToAsciiDoc opts lst =
- mapM (inlineToAsciiDoc opts) lst >>= return . cat
+inlineListToAsciiDoc opts lst = do
+ oldIntraword <- gets intraword
+ setIntraword False
+ result <- go lst
+ setIntraword oldIntraword
+ return result
+ where go [] = return empty
+ go (y:x:xs)
+ | not (isSpacy y) = do
+ y' <- if isSpacy x
+ then inlineToAsciiDoc opts y
+ else withIntraword $ inlineToAsciiDoc opts y
+ x' <- withIntraword $ inlineToAsciiDoc opts x
+ xs' <- go xs
+ return (y' <> x' <> xs')
+ | x /= Space && x /= LineBreak = do
+ y' <- withIntraword $ inlineToAsciiDoc opts y
+ xs' <- go (x:xs)
+ return (y' <> xs')
+ go (x:xs) = do
+ x' <- inlineToAsciiDoc opts x
+ xs' <- go xs
+ return (x' <> xs')
+ isSpacy Space = True
+ isSpacy LineBreak = True
+ isSpacy _ = False
+
+setIntraword :: Bool -> State WriterState ()
+setIntraword b = modify $ \st -> st{ intraword = b }
+
+withIntraword :: State WriterState a -> State WriterState a
+withIntraword p = setIntraword True *> p <* setIntraword False
-- | Convert Pandoc inline element to asciidoc.
inlineToAsciiDoc :: WriterOptions -> Inline -> State WriterState Doc
inlineToAsciiDoc opts (Emph lst) = do
contents <- inlineListToAsciiDoc opts lst
- return $ "_" <> contents <> "_"
+ isIntraword <- gets intraword
+ let marker = if isIntraword then "__" else "_"
+ return $ marker <> contents <> marker
inlineToAsciiDoc opts (Strong lst) = do
contents <- inlineListToAsciiDoc opts lst
- return $ "*" <> contents <> "*"
+ isIntraword <- gets intraword
+ let marker = if isIntraword then "**" else "*"
+ return $ marker <> contents <> marker
inlineToAsciiDoc opts (Strikeout lst) = do
contents <- inlineListToAsciiDoc opts lst
return $ "[line-through]*" <> contents <> "*"
@@ -338,12 +376,10 @@ inlineToAsciiDoc opts (Subscript lst) = do
contents <- inlineListToAsciiDoc opts lst
return $ "~" <> contents <> "~"
inlineToAsciiDoc opts (SmallCaps lst) = inlineListToAsciiDoc opts lst
-inlineToAsciiDoc opts (Quoted SingleQuote lst) = do
- contents <- inlineListToAsciiDoc opts lst
- return $ "`" <> contents <> "'"
-inlineToAsciiDoc opts (Quoted DoubleQuote lst) = do
- contents <- inlineListToAsciiDoc opts lst
- return $ "``" <> contents <> "''"
+inlineToAsciiDoc opts (Quoted SingleQuote lst) =
+ inlineListToAsciiDoc opts (Str "`" : lst ++ [Str "'"])
+inlineToAsciiDoc opts (Quoted DoubleQuote lst) =
+ inlineListToAsciiDoc opts (Str "``" : lst ++ [Str "''"])
inlineToAsciiDoc _ (Code _ str) = return $
text "`" <> text (escapeStringUsing (backslashEscapes "`") str) <> "`"
inlineToAsciiDoc _ (Str str) = return $ text $ escapeString str
@@ -366,7 +402,7 @@ inlineToAsciiDoc opts (Link txt (src, _tit)) = do
let prefix = if isRelative
then text "link:"
else empty
- let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
+ let srcSuffix = fromMaybe src (stripPrefix "mailto:" src)
let useAuto = case txt of
[Str s] | escapeURI s == srcSuffix -> True
_ -> False
diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs
index 88f590c43..914d61850 100644
--- a/src/Text/Pandoc/Writers/Custom.hs
+++ b/src/Text/Pandoc/Writers/Custom.hs
@@ -1,4 +1,5 @@
-{-# LANGUAGE OverlappingInstances, FlexibleInstances, OverloadedStrings #-}
+{-# LANGUAGE OverlappingInstances, FlexibleInstances, OverloadedStrings,
+ ScopedTypeVariables #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{- Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu>
@@ -35,12 +36,14 @@ import Text.Pandoc.Options
import Data.List ( intersperse )
import Data.Char ( toLower )
import Scripting.Lua (LuaState, StackValue, callfunc)
+import Text.Pandoc.Writers.Shared
import qualified Scripting.Lua as Lua
import Text.Pandoc.UTF8 (fromString, toString)
import Data.ByteString (ByteString)
import qualified Data.ByteString.Char8 as C8
import Data.Monoid
import qualified Data.Map as M
+import Text.Pandoc.Templates
attrToMap :: Attr -> M.Map ByteString ByteString
attrToMap (id',classes,keyvals) = M.fromList
@@ -128,9 +131,23 @@ instance StackValue MetaValue where
valuetype (MetaInlines _) = Lua.TSTRING
valuetype (MetaBlocks _) = Lua.TSTRING
+instance StackValue Citation where
+ push lua cit = do
+ Lua.createtable lua 6 0
+ let addValue ((k :: String), v) = Lua.push lua k >> Lua.push lua v >>
+ Lua.rawset lua (-3)
+ addValue ("citationId", citationId cit)
+ addValue ("citationPrefix", citationPrefix cit)
+ addValue ("citationSuffix", citationSuffix cit)
+ addValue ("citationMode", show (citationMode cit))
+ addValue ("citationNoteNum", citationNoteNum cit)
+ addValue ("citationHash", citationHash cit)
+ peek = undefined
+ valuetype _ = Lua.TTABLE
+
-- | Convert Pandoc to custom markup.
writeCustom :: FilePath -> WriterOptions -> Pandoc -> IO String
-writeCustom luaFile opts doc = do
+writeCustom luaFile opts doc@(Pandoc meta _) = do
luaScript <- C8.unpack `fmap` C8.readFile luaFile
lua <- Lua.newstate
Lua.openlibs lua
@@ -138,8 +155,17 @@ writeCustom luaFile opts doc = do
Lua.call lua 0 0
-- TODO - call hierarchicalize, so we have that info
rendered <- docToCustom lua opts doc
+ context <- metaToJSON opts
+ (fmap toString . blockListToCustom lua)
+ (fmap toString . inlineListToCustom lua)
+ meta
Lua.close lua
- return $ toString rendered
+ let body = toString rendered
+ if writerStandalone opts
+ then do
+ let context' = setField "body" body context
+ return $ renderTemplate' (writerTemplate opts) context'
+ else return body
docToCustom :: LuaState -> WriterOptions -> Pandoc -> IO ByteString
docToCustom lua opts (Pandoc (Meta metamap) blocks) = do
@@ -225,7 +251,7 @@ inlineToCustom lua (Quoted SingleQuote lst) = callfunc lua "SingleQuoted" lst
inlineToCustom lua (Quoted DoubleQuote lst) = callfunc lua "DoubleQuoted" lst
-inlineToCustom lua (Cite _ lst) = callfunc lua "Cite" lst
+inlineToCustom lua (Cite cs lst) = callfunc lua "Cite" lst cs
inlineToCustom lua (Code attr str) =
callfunc lua "Code" (fromString str) (attrToMap attr)
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index ba6a92a08..b10317506 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE OverloadedStrings, PatternGuards #-}
{-
Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
@@ -37,8 +37,9 @@ import Text.Pandoc.Writers.Shared
import Text.Pandoc.Options
import Text.Pandoc.Templates (renderTemplate')
import Text.Pandoc.Readers.TeXMath
-import Data.List ( isPrefixOf, intercalate, isSuffixOf )
+import Data.List ( stripPrefix, isPrefixOf, intercalate, isSuffixOf )
import Data.Char ( toLower )
+import Control.Applicative ((<$>))
import Data.Monoid ( Any(..) )
import Text.Pandoc.Highlighting ( languages, languagesByExtension )
import Text.Pandoc.Pretty
@@ -293,14 +294,14 @@ inlineToDocbook _ (Code _ str) =
inTagsSimple "literal" $ text (escapeStringForXML str)
inlineToDocbook opts (Math t str)
| isMathML (writerHTMLMathMethod opts) =
- case texMathToMathML dt str of
- Right r -> inTagsSimple tagtype
- $ text $ Xml.ppcElement conf
- $ fixNS
- $ removeAttr r
- Left _ -> inlinesToDocbook opts
- $ readTeXMath' t str
- | otherwise = inlinesToDocbook opts $ readTeXMath' t str
+ case writeMathML dt <$> readTeX str of
+ Right r -> inTagsSimple tagtype
+ $ text $ Xml.ppcElement conf
+ $ fixNS
+ $ removeAttr r
+ Left _ -> inlinesToDocbook opts
+ $ texMathToInlines t str
+ | otherwise = inlinesToDocbook opts $ texMathToInlines t str
where (dt, tagtype) = case t of
InlineMath -> (DisplayInline,"inlineequation")
DisplayMath -> (DisplayBlock,"informalequation")
@@ -312,19 +313,19 @@ inlineToDocbook _ (RawInline f x) | f == "html" || f == "docbook" = text x
| otherwise = empty
inlineToDocbook _ LineBreak = text "\n"
inlineToDocbook _ Space = space
-inlineToDocbook opts (Link txt (src, _)) =
- if isPrefixOf "mailto:" src
- then let src' = drop 7 src
- emailLink = inTagsSimple "email" $ text $
- escapeStringForXML $ src'
- in case txt of
- [Str s] | escapeURI s == src' -> emailLink
- _ -> inlinesToDocbook opts txt <+>
- char '(' <> emailLink <> char ')'
- else (if isPrefixOf "#" src
- then inTags False "link" [("linkend", drop 1 src)]
- else inTags False "ulink" [("url", src)]) $
- inlinesToDocbook opts txt
+inlineToDocbook opts (Link txt (src, _))
+ | Just email <- stripPrefix "mailto:" src =
+ let emailLink = inTagsSimple "email" $ text $
+ escapeStringForXML $ email
+ in case txt of
+ [Str s] | escapeURI s == email -> emailLink
+ _ -> inlinesToDocbook opts txt <+>
+ char '(' <> emailLink <> char ')'
+ | otherwise =
+ (if isPrefixOf "#" src
+ then inTags False "link" [("linkend", drop 1 src)]
+ else inTags False "ulink" [("url", src)]) $
+ inlinesToDocbook opts txt
inlineToDocbook _ (Image _ (src, tit)) =
let titleDoc = if null tit
then empty
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 77ee51519..38031b7dc 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -58,7 +58,7 @@ import System.Random (randomRIO)
import Text.Printf (printf)
import qualified Control.Exception as E
import Text.Pandoc.MIME (getMimeType, extensionFromMimeType)
-import Control.Applicative ((<|>))
+import Control.Applicative ((<|>), (<$>))
import Data.Maybe (mapMaybe)
data ListMarker = NoMarker
@@ -481,17 +481,30 @@ writeOpenXML opts (Pandoc meta blocks) = do
_ -> []
let auths = docAuthors meta
let dat = docDate meta
+ let abstract' = case lookupMeta "abstract" meta of
+ Just (MetaBlocks bs) -> bs
+ Just (MetaInlines ils) -> [Plain ils]
+ _ -> []
+ let subtitle' = case lookupMeta "subtitle" meta of
+ Just (MetaBlocks [Plain xs]) -> xs
+ Just (MetaBlocks [Para xs]) -> xs
+ Just (MetaInlines xs) -> xs
+ _ -> []
title <- withParaProp (pStyle "Title") $ blocksToOpenXML opts [Para tit | not (null tit)]
- authors <- withParaProp (pStyle "Authors") $ blocksToOpenXML opts
- [Para (intercalate [LineBreak] auths) | not (null auths)]
+ subtitle <- withParaProp (pStyle "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')]
+ authors <- withParaProp (pStyle "Author") $ blocksToOpenXML opts $
+ map Para auths
date <- withParaProp (pStyle "Date") $ blocksToOpenXML opts [Para dat | not (null dat)]
+ abstract <- if null abstract'
+ then return []
+ else withParaProp (pStyle "Abstract") $ blocksToOpenXML opts abstract'
let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs
convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs
convertSpace xs = xs
let blocks' = bottomUp convertSpace $ blocks
doc' <- blocksToOpenXML opts blocks'
notes' <- reverse `fmap` gets stFootnotes
- let meta' = title ++ authors ++ date
+ let meta' = title ++ subtitle ++ authors ++ date ++ abstract
return (meta' ++ doc', notes')
-- | Convert a list of Pandoc blocks to OpenXML.
@@ -514,8 +527,11 @@ blockToOpenXML :: WriterOptions -> Block -> WS [Element]
blockToOpenXML _ Null = return []
blockToOpenXML opts (Div _ bs) = blocksToOpenXML opts bs
blockToOpenXML opts (Header lev (ident,_,_) lst) = do
- contents <- withParaProp (pStyle $ "Heading" ++ show lev) $
- blockToOpenXML opts (Para lst)
+
+ paraProps <- withParaProp (pStyle $ "Heading" ++ show lev) $
+ getParaProps False
+ contents <- inlinesToOpenXML opts lst
+
usedIdents <- gets stSectionIds
let bookmarkName = if null ident
then uniqueIdent lst usedIdents
@@ -525,7 +541,7 @@ blockToOpenXML opts (Header lev (ident,_,_) lst) = do
let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id')
,("w:name",bookmarkName)] ()
let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] ()
- return $ [bookmarkStart] ++ contents ++ [bookmarkEnd]
+ return [mknode "w:p" [] (paraProps ++ [bookmarkStart, bookmarkEnd] ++ contents)]
blockToOpenXML opts (Plain lst) = withParaProp (pStyle "Compact")
$ blockToOpenXML opts (Para lst)
-- title beginning with fig: indicates that the image is a figure
@@ -751,9 +767,9 @@ inlineToOpenXML opts (Math mathType str) = do
let displayType = if mathType == DisplayMath
then DisplayBlock
else DisplayInline
- case texMathToOMML displayType str of
+ case writeOMML displayType <$> readTeX str of
Right r -> return [r]
- Left _ -> inlinesToOpenXML opts (readTeXMath' mathType str)
+ Left _ -> inlinesToOpenXML opts (texMathToInlines mathType str)
inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst
inlineToOpenXML opts (Code attrs str) =
withTextProp (rStyle "VerbatimChar")
@@ -814,7 +830,8 @@ inlineToOpenXML opts (Image alt (src, tit)) = do
case M.lookup src imgs of
Just (_,_,_,elt,_) -> return [elt]
Nothing -> do
- res <- liftIO $ fetchItem (writerSourceURL opts) src
+ res <- liftIO $
+ fetchItem' (writerMediaBag opts) (writerSourceURL opts) src
case res of
Left (_ :: E.SomeException) -> do
liftIO $ warn $ "Could not find image `" ++ src ++ "', skipping..."
diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs
new file mode 100644
index 000000000..26f9b5f62
--- /dev/null
+++ b/src/Text/Pandoc/Writers/DokuWiki.hs
@@ -0,0 +1,475 @@
+{-
+Copyright (C) 2008-2014 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.DokuWiki
+ Copyright : Copyright (C) 2008-2014 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Clare Macrae <clare.macrae@googlemail.com>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to DokuWiki markup.
+
+DokuWiki: <https://www.dokuwiki.org/dokuwiki>
+-}
+
+{-
+ [ ] Implement nested blockquotes (currently only ever does one level)
+ [ ] Implement alignment of text in tables
+ [ ] Implement comments
+ [ ] Work through the Dokuwiki spec, and check I've not missed anything out
+ [ ] Remove dud/duplicate code
+-}
+
+module Text.Pandoc.Writers.DokuWiki ( writeDokuWiki ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Shared
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Templates (renderTemplate')
+import Data.List ( intersect, intercalate, isPrefixOf )
+import Network.URI ( isURI )
+import Control.Monad.State
+
+data WriterState = WriterState {
+ stNotes :: Bool -- True if there are notes
+ , stIndent :: String -- Indent after the marker at the beginning of list items
+ , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list
+ }
+
+-- | Convert Pandoc to DokuWiki.
+writeDokuWiki :: WriterOptions -> Pandoc -> String
+writeDokuWiki opts document =
+ evalState (pandocToDokuWiki opts $ normalize document)
+ (WriterState { stNotes = False, stIndent = "", stUseTags = False })
+
+-- | Return DokuWiki representation of document.
+pandocToDokuWiki :: WriterOptions -> Pandoc -> State WriterState String
+pandocToDokuWiki opts (Pandoc meta blocks) = do
+ metadata <- metaToJSON opts
+ (fmap trimr . blockListToDokuWiki opts)
+ (inlineListToDokuWiki opts)
+ meta
+ body <- blockListToDokuWiki opts blocks
+ notesExist <- get >>= return . stNotes
+ let notes = if notesExist
+ then "" -- TODO Was "\n<references />" Check whether I can really remove this:
+ -- if it is definitely to do with footnotes, can remove this whole bit
+ else ""
+ let main = body ++ notes
+ let context = defField "body" main
+ $ defField "toc" (writerTableOfContents opts)
+ $ metadata
+ if writerStandalone opts
+ then return $ renderTemplate' (writerTemplate opts) context
+ else return main
+
+-- | Escape special characters for DokuWiki.
+escapeString :: String -> String
+escapeString = substitute "__" "%%__%%" .
+ substitute "**" "%%**%%" .
+ substitute "//" "%%//%%"
+
+-- | Convert Pandoc block element to DokuWiki.
+blockToDokuWiki :: WriterOptions -- ^ Options
+ -> Block -- ^ Block element
+ -> State WriterState String
+
+blockToDokuWiki _ Null = return ""
+
+blockToDokuWiki opts (Div _attrs bs) = do
+ contents <- blockListToDokuWiki opts bs
+ return $ contents ++ "\n"
+
+blockToDokuWiki opts (Plain inlines) =
+ inlineListToDokuWiki opts inlines
+
+-- title beginning with fig: indicates that the image is a figure
+-- dokuwiki doesn't support captions - so combine together alt and caption into alt
+blockToDokuWiki opts (Para [Image txt (src,'f':'i':'g':':':tit)]) = do
+ capt <- if null txt
+ then return ""
+ else (" " ++) `fmap` inlineListToDokuWiki opts txt
+ let opt = if null txt
+ then ""
+ else "|" ++ if null tit then capt else tit ++ capt
+ return $ "{{:" ++ src ++ opt ++ "}}\n"
+
+blockToDokuWiki opts (Para inlines) = do
+ indent <- gets stIndent
+ useTags <- gets stUseTags
+ contents <- inlineListToDokuWiki opts inlines
+ return $ if useTags
+ then "<HTML><p></HTML>" ++ contents ++ "<HTML></p></HTML>"
+ else contents ++ if null indent then "\n" else ""
+
+blockToDokuWiki _ (RawBlock f str)
+ | f == Format "dokuwiki" = return str
+ -- See https://www.dokuwiki.org/wiki:syntax
+ -- use uppercase HTML tag for block-level content:
+ | f == Format "html" = return $ "<HTML>\n" ++ str ++ "\n</HTML>"
+ | otherwise = return ""
+
+blockToDokuWiki _ HorizontalRule = return "\n----\n"
+
+blockToDokuWiki opts (Header level _ inlines) = do
+ -- emphasis, links etc. not allowed in headers, apparently,
+ -- so we remove formatting:
+ contents <- inlineListToDokuWiki opts $ removeFormatting inlines
+ let eqs = replicate ( 7 - level ) '='
+ return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n"
+
+blockToDokuWiki _ (CodeBlock (_,classes,_) str) = do
+ let at = classes `intersect` ["actionscript", "ada", "apache", "applescript", "asm", "asp",
+ "autoit", "bash", "blitzbasic", "bnf", "c", "c_mac", "caddcl", "cadlisp", "cfdg", "cfm",
+ "cpp", "cpp-qt", "csharp", "css", "d", "delphi", "diff", "div", "dos", "eiffel", "fortran",
+ "freebasic", "gml", "groovy", "html4strict", "idl", "ini", "inno", "io", "java", "java5",
+ "javascript", "latex", "lisp", "lua", "matlab", "mirc", "mpasm", "mysql", "nsis", "objc",
+ "ocaml", "ocaml-brief", "oobas", "oracle8", "pascal", "perl", "php", "php-brief", "plsql",
+ "python", "qbasic", "rails", "reg", "robots", "ruby", "sas", "scheme", "sdlbasic",
+ "smalltalk", "smarty", "sql", "tcl", "", "thinbasic", "tsql", "vb", "vbnet", "vhdl",
+ "visualfoxpro", "winbatch", "xml", "xpp", "z80"]
+ let (beg, end) = if null at
+ then ("<code" ++ if null classes then ">" else " class=\"" ++ unwords classes ++ "\">", "</code>")
+ else ("<source lang=\"" ++ head at ++ "\">", "</source>")
+ return $ beg ++ str ++ end
+
+blockToDokuWiki opts (BlockQuote blocks) = do
+ contents <- blockListToDokuWiki opts blocks
+ if isSimpleBlockQuote blocks
+ then return $ "> " ++ contents
+ else return $ "<HTML><blockquote>\n" ++ contents ++ "</blockquote></HTML>"
+
+blockToDokuWiki opts (Table capt aligns _ headers rows') = do
+ let alignStrings = map alignmentToString aligns
+ captionDoc <- if null capt
+ then return ""
+ else do
+ c <- inlineListToDokuWiki opts capt
+ return $ "" ++ c ++ "\n"
+ head' <- if all null headers
+ then return ""
+ else do
+ hs <- tableHeaderToDokuWiki opts alignStrings 0 headers
+ return $ hs ++ "\n"
+ body' <- zipWithM (tableRowToDokuWiki opts alignStrings) [1..] rows'
+ return $ captionDoc ++ head' ++
+ unlines body'
+
+blockToDokuWiki opts x@(BulletList items) = do
+ oldUseTags <- get >>= return . stUseTags
+ indent <- get >>= return . stIndent
+ let useTags = oldUseTags || not (isSimpleList x)
+ if useTags
+ then do
+ modify $ \s -> s { stUseTags = True }
+ contents <- mapM (listItemToDokuWiki opts) items
+ modify $ \s -> s { stUseTags = oldUseTags }
+ return $ "<HTML><ul></HTML>\n" ++ vcat contents ++ "<HTML></ul></HTML>\n"
+ else do
+ modify $ \s -> s { stIndent = stIndent s ++ " " }
+ contents <- mapM (listItemToDokuWiki opts) items
+ modify $ \s -> s { stIndent = indent }
+ return $ vcat contents ++ if null indent then "\n" else ""
+
+blockToDokuWiki opts x@(OrderedList attribs items) = do
+ oldUseTags <- get >>= return . stUseTags
+ indent <- get >>= return . stIndent
+ let useTags = oldUseTags || not (isSimpleList x)
+ if useTags
+ then do
+ modify $ \s -> s { stUseTags = True }
+ contents <- mapM (orderedListItemToDokuWiki opts) items
+ modify $ \s -> s { stUseTags = oldUseTags }
+ return $ "<HTML><ol" ++ listAttribsToString attribs ++ "></HTML>\n" ++ vcat contents ++ "<HTML></ol></HTML>\n"
+ else do
+ modify $ \s -> s { stIndent = stIndent s ++ " " }
+ contents <- mapM (orderedListItemToDokuWiki opts) items
+ modify $ \s -> s { stIndent = indent }
+ return $ vcat contents ++ if null indent then "\n" else ""
+
+-- TODO Need to decide how to make definition lists work on dokuwiki - I don't think there
+-- is a specific representation of them.
+-- TODO This creates double '; ; ' if there is a bullet or ordered list inside a definition list
+blockToDokuWiki opts x@(DefinitionList items) = do
+ oldUseTags <- get >>= return . stUseTags
+ indent <- get >>= return . stIndent
+ let useTags = oldUseTags || not (isSimpleList x)
+ if useTags
+ then do
+ modify $ \s -> s { stUseTags = True }
+ contents <- mapM (definitionListItemToDokuWiki opts) items
+ modify $ \s -> s { stUseTags = oldUseTags }
+ return $ "<HTML><dl></HTML>\n" ++ vcat contents ++ "<HTML></dl></HTML>\n"
+ else do
+ modify $ \s -> s { stIndent = stIndent s ++ " " }
+ contents <- mapM (definitionListItemToDokuWiki opts) items
+ modify $ \s -> s { stIndent = indent }
+ return $ vcat contents ++ if null indent then "\n" else ""
+
+-- Auxiliary functions for lists:
+
+-- | Convert ordered list attributes to HTML attribute string
+listAttribsToString :: ListAttributes -> String
+listAttribsToString (startnum, numstyle, _) =
+ let numstyle' = camelCaseToHyphenated $ show numstyle
+ in (if startnum /= 1
+ then " start=\"" ++ show startnum ++ "\""
+ else "") ++
+ (if numstyle /= DefaultStyle
+ then " style=\"list-style-type: " ++ numstyle' ++ ";\""
+ else "")
+
+-- | Convert bullet list item (list of blocks) to DokuWiki.
+listItemToDokuWiki :: WriterOptions -> [Block] -> State WriterState String
+listItemToDokuWiki opts items = do
+ contents <- blockListToDokuWiki opts items
+ useTags <- get >>= return . stUseTags
+ if useTags
+ then return $ "<HTML><li></HTML>" ++ contents ++ "<HTML></li></HTML>"
+ else do
+ indent <- get >>= return . stIndent
+ return $ indent ++ "* " ++ contents
+
+-- | Convert ordered list item (list of blocks) to DokuWiki.
+-- | TODO Emiminate dreadful duplication of text from listItemToDokuWiki
+orderedListItemToDokuWiki :: WriterOptions -> [Block] -> State WriterState String
+orderedListItemToDokuWiki opts items = do
+ contents <- blockListToDokuWiki opts items
+ useTags <- get >>= return . stUseTags
+ if useTags
+ then return $ "<HTML><li></HTML>" ++ contents ++ "<HTML></li></HTML>"
+ else do
+ indent <- get >>= return . stIndent
+ return $ indent ++ "- " ++ contents
+
+-- | Convert definition list item (label, list of blocks) to DokuWiki.
+definitionListItemToDokuWiki :: WriterOptions
+ -> ([Inline],[[Block]])
+ -> State WriterState String
+definitionListItemToDokuWiki opts (label, items) = do
+ labelText <- inlineListToDokuWiki opts label
+ contents <- mapM (blockListToDokuWiki opts) items
+ useTags <- get >>= return . stUseTags
+ if useTags
+ then return $ "<HTML><dt></HTML>" ++ labelText ++ "<HTML></dt></HTML>\n" ++
+ (intercalate "\n" $ map (\d -> "<HTML><dd></HTML>" ++ d ++ "<HTML></dd></HTML>") contents)
+ else do
+ indent <- get >>= return . stIndent
+ return $ indent ++ "* **" ++ labelText ++ "** " ++ concat contents
+
+-- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed.
+isSimpleList :: Block -> Bool
+isSimpleList x =
+ case x of
+ BulletList items -> all isSimpleListItem items
+ OrderedList (num, sty, _) items -> all isSimpleListItem items &&
+ num == 1 && sty `elem` [DefaultStyle, Decimal]
+ DefinitionList items -> all isSimpleListItem $ concatMap snd items
+ _ -> False
+
+-- | True if list item can be handled with the simple wiki syntax. False if
+-- HTML tags will be needed.
+isSimpleListItem :: [Block] -> Bool
+isSimpleListItem [] = True
+isSimpleListItem [x] =
+ case x of
+ Plain _ -> True
+ Para _ -> True
+ BulletList _ -> isSimpleList x
+ OrderedList _ _ -> isSimpleList x
+ DefinitionList _ -> isSimpleList x
+ _ -> False
+isSimpleListItem [x, y] | isPlainOrPara x =
+ case y of
+ BulletList _ -> isSimpleList y
+ OrderedList _ _ -> isSimpleList y
+ DefinitionList _ -> isSimpleList y
+ _ -> False
+isSimpleListItem _ = False
+
+isPlainOrPara :: Block -> Bool
+isPlainOrPara (Plain _) = True
+isPlainOrPara (Para _) = True
+isPlainOrPara _ = False
+
+isSimpleBlockQuote :: [Block] -> Bool
+isSimpleBlockQuote [BlockQuote bs] = isSimpleBlockQuote bs
+isSimpleBlockQuote [b] = isPlainOrPara b
+isSimpleBlockQuote _ = False
+
+-- | Concatenates strings with line breaks between them.
+vcat :: [String] -> String
+vcat = intercalate "\n"
+
+-- Auxiliary functions for tables:
+
+-- TODO Eliminate copy-and-pasted code in tableHeaderToDokuWiki and tableRowToDokuWiki
+tableHeaderToDokuWiki :: WriterOptions
+ -> [String]
+ -> Int
+ -> [[Block]]
+ -> State WriterState String
+tableHeaderToDokuWiki opts alignStrings rownum cols' = do
+ let celltype = if rownum == 0 then "" else ""
+ cols'' <- sequence $ zipWith
+ (\alignment item -> tableItemToDokuWiki opts celltype alignment item)
+ alignStrings cols'
+ return $ "^ " ++ "" ++ joinHeaders cols'' ++ " ^"
+
+tableRowToDokuWiki :: WriterOptions
+ -> [String]
+ -> Int
+ -> [[Block]]
+ -> State WriterState String
+tableRowToDokuWiki opts alignStrings rownum cols' = do
+ let celltype = if rownum == 0 then "" else ""
+ cols'' <- sequence $ zipWith
+ (\alignment item -> tableItemToDokuWiki opts celltype alignment item)
+ alignStrings cols'
+ return $ "| " ++ "" ++ joinColumns cols'' ++ " |"
+
+alignmentToString :: Alignment -> [Char]
+alignmentToString alignment = case alignment of
+ AlignLeft -> ""
+ AlignRight -> ""
+ AlignCenter -> ""
+ AlignDefault -> ""
+
+tableItemToDokuWiki :: WriterOptions
+ -> String
+ -> String
+ -> [Block]
+ -> State WriterState String
+-- TODO Fix celltype and align' defined but not used
+tableItemToDokuWiki opts _celltype _align' item = do
+ let mkcell x = "" ++ x ++ ""
+ contents <- blockListToDokuWiki opts item
+ return $ mkcell contents
+
+-- | Concatenates columns together.
+joinColumns :: [String] -> String
+joinColumns = intercalate " | "
+
+-- | Concatenates headers together.
+joinHeaders :: [String] -> String
+joinHeaders = intercalate " ^ "
+
+-- | Convert list of Pandoc block elements to DokuWiki.
+blockListToDokuWiki :: WriterOptions -- ^ Options
+ -> [Block] -- ^ List of block elements
+ -> State WriterState String
+blockListToDokuWiki opts blocks =
+ mapM (blockToDokuWiki opts) blocks >>= return . vcat
+
+-- | Convert list of Pandoc inline elements to DokuWiki.
+inlineListToDokuWiki :: WriterOptions -> [Inline] -> State WriterState String
+inlineListToDokuWiki opts lst = mapM (inlineToDokuWiki opts) lst >>= return . concat
+
+-- | Convert Pandoc inline element to DokuWiki.
+inlineToDokuWiki :: WriterOptions -> Inline -> State WriterState String
+
+inlineToDokuWiki opts (Span _attrs ils) = do
+ contents <- inlineListToDokuWiki opts ils
+ return contents
+
+inlineToDokuWiki opts (Emph lst) = do
+ contents <- inlineListToDokuWiki opts lst
+ return $ "//" ++ contents ++ "//"
+
+inlineToDokuWiki opts (Strong lst) = do
+ contents <- inlineListToDokuWiki opts lst
+ return $ "**" ++ contents ++ "**"
+
+inlineToDokuWiki opts (Strikeout lst) = do
+ contents <- inlineListToDokuWiki opts lst
+ return $ "<del>" ++ contents ++ "</del>"
+
+inlineToDokuWiki opts (Superscript lst) = do
+ contents <- inlineListToDokuWiki opts lst
+ return $ "<sup>" ++ contents ++ "</sup>"
+
+inlineToDokuWiki opts (Subscript lst) = do
+ contents <- inlineListToDokuWiki opts lst
+ return $ "<sub>" ++ contents ++ "</sub>"
+
+inlineToDokuWiki opts (SmallCaps lst) = inlineListToDokuWiki opts lst
+
+inlineToDokuWiki opts (Quoted SingleQuote lst) = do
+ contents <- inlineListToDokuWiki opts lst
+ return $ "\8216" ++ contents ++ "\8217"
+
+inlineToDokuWiki opts (Quoted DoubleQuote lst) = do
+ contents <- inlineListToDokuWiki opts lst
+ return $ "\8220" ++ contents ++ "\8221"
+
+inlineToDokuWiki opts (Cite _ lst) = inlineListToDokuWiki opts lst
+
+inlineToDokuWiki _ (Code _ str) =
+ -- In dokuwiki, text surrounded by '' is really just a font statement, i.e. <tt>,
+ -- and so other formatting can be present inside.
+ -- However, in pandoc, and markdown, inlined code doesn't contain formatting.
+ -- So I have opted for using %% to disable all formatting inside inline code blocks.
+ -- This gives the best results when converting from other formats to dokuwiki, even if
+ -- the resultand code is a little ugly, for short strings that don't contain formatting
+ -- characters.
+ -- It does mean that if pandoc could ever read dokuwiki, and so round-trip the format,
+ -- any formatting inside inlined code blocks would be lost, or presented incorrectly.
+ return $ "''%%" ++ str ++ "%%''"
+
+inlineToDokuWiki _ (Str str) = return $ escapeString str
+
+inlineToDokuWiki _ (Math _ str) = return $ "<math>" ++ str ++ "</math>"
+ -- note: str should NOT be escaped
+
+inlineToDokuWiki _ (RawInline f str)
+ | f == Format "dokuwiki" = return str
+ | f == Format "html" = return $ "<html>" ++ str ++ "</html>"
+ | otherwise = return ""
+
+inlineToDokuWiki _ (LineBreak) = return "\\\\ "
+
+inlineToDokuWiki _ Space = return " "
+
+inlineToDokuWiki opts (Link txt (src, _)) = do
+ label <- inlineListToDokuWiki opts txt
+ case txt of
+ [Str s] | "mailto:" `isPrefixOf` src -> return $ "<" ++ s ++ ">"
+ | escapeURI s == src -> return src
+ _ -> if isURI src
+ then return $ "[[" ++ src ++ "|" ++ label ++ "]]"
+ else return $ "[[" ++ src' ++ "|" ++ label ++ "]]"
+ where src' = case src of
+ '/':xs -> xs -- with leading / it's a
+ _ -> src -- link to a help page
+inlineToDokuWiki opts (Image alt (source, tit)) = do
+ alt' <- inlineListToDokuWiki opts alt
+ let txt = if (null tit)
+ then if null alt
+ then ""
+ else "|" ++ alt'
+ else "|" ++ tit
+ return $ "{{:" ++ source ++ txt ++ "}}"
+
+inlineToDokuWiki opts (Note contents) = do
+ contents' <- blockListToDokuWiki opts contents
+ modify (\s -> s { stNotes = True })
+ return $ "((" ++ contents' ++ "))"
+ -- note - may not work for notes with multiple blocks
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index b6687c330..2aab7701f 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables #-}
+{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables, ViewPatterns #-}
{-
Copyright (C) 2010-2014 John MacFarlane <jgm@berkeley.edu>
@@ -29,10 +29,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' documents to EPUB.
-}
module Text.Pandoc.Writers.EPUB ( writeEPUB ) where
-import Data.IORef
+import Data.IORef ( IORef, newIORef, readIORef, modifyIORef )
import qualified Data.Map as M
import Data.Maybe ( fromMaybe )
-import Data.List ( isInfixOf, intercalate )
+import Data.List ( isPrefixOf, isInfixOf, intercalate )
import System.Environment ( getEnv )
import Text.Printf (printf)
import System.FilePath ( (</>), takeExtension, takeFileName )
@@ -40,28 +40,35 @@ import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Lazy.Char8 as B8
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.SelfContained ( makeSelfContained )
-import Codec.Archive.Zip
+import Codec.Archive.Zip ( emptyArchive, addEntryToArchive, eRelativePath, fromEntry , Entry, toEntry, fromArchive)
import Control.Applicative ((<$>))
-import Data.Time.Clock.POSIX
-import Data.Time
-import System.Locale
-import Text.Pandoc.Shared hiding ( Element )
-import qualified Text.Pandoc.Shared as Shared
+import Data.Time.Clock.POSIX ( getPOSIXTime )
+import Data.Time (getCurrentTime,UTCTime, formatTime)
+import System.Locale ( defaultTimeLocale )
+import Text.Pandoc.Shared ( trimr, renderTags', safeRead, uniqueIdent, trim
+ , normalizeDate, readDataFile, stringify, warn
+ , hierarchicalize, fetchItem' )
+import qualified Text.Pandoc.Shared as S (Element(..))
import Text.Pandoc.Builder (fromList, setMeta)
-import Text.Pandoc.Options
+import Text.Pandoc.Options ( WriterOptions(..)
+ , HTMLMathMethod(..)
+ , EPUBVersion(..)
+ , ObfuscationMethod(NoObfuscation) )
import Text.Pandoc.Definition
-import Text.Pandoc.Walk
-import Control.Monad.State
-import Text.XML.Light hiding (ppTopElement)
-import Text.Pandoc.UUID
-import Text.Pandoc.Writers.HTML
-import Text.Pandoc.Writers.Markdown ( writePlain )
+import Text.Pandoc.Walk (walk, walkM)
+import Control.Monad.State (modify, get, execState, State, put, evalState)
+import Control.Monad (foldM, when, mplus, liftM)
+import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs
+ , strContent, lookupAttr, Node(..), QName(..), parseXML
+ , onlyElems, node, ppElement)
+import Text.Pandoc.UUID (getRandomUUID)
+import Text.Pandoc.Writers.HTML (writeHtmlString, writeHtml)
import Data.Char ( toLower, isDigit, isAlphaNum )
import Network.URI ( unEscapeString )
import Text.Pandoc.MIME (getMimeType)
import qualified Control.Exception as E
import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
-import Text.HTML.TagSoup
+import Text.HTML.TagSoup (Tag(TagOpen), fromAttrib, parseTags)
-- A Chapter includes a list of blocks and maybe a section
-- number offset. Note, some chapters are unnumbered. The section
@@ -87,6 +94,7 @@ data EPUBMetadata = EPUBMetadata{
, epubRights :: Maybe String
, epubCoverImage :: Maybe String
, epubStylesheet :: Maybe Stylesheet
+ , epubPageDirection :: ProgressionDirection
} deriving Show
data Stylesheet = StylesheetPath FilePath
@@ -115,6 +123,8 @@ data Title = Title{
, titleType :: Maybe String
} deriving Show
+data ProgressionDirection = LTR | RTL | Default deriving Show
+
dcName :: String -> QName
dcName n = QName n Nothing (Just "dc")
@@ -124,20 +134,15 @@ dcNode = node . dcName
opfName :: String -> QName
opfName n = QName n Nothing (Just "opf")
-plainify :: [Inline] -> String
-plainify t =
- trimr $ writePlain def{ writerStandalone = False }
- $ Pandoc nullMeta [Plain $ walk removeNote t]
-
-removeNote :: Inline -> Inline
-removeNote (Note _) = Str ""
-removeNote x = x
-
toId :: FilePath -> String
toId = map (\x -> if isAlphaNum x || x == '-' || x == '_'
then x
else '_') . takeFileName
+removeNote :: Inline -> Inline
+removeNote (Note _) = Str ""
+removeNote x = x
+
getEPUBMetadata :: WriterOptions -> Meta -> IO EPUBMetadata
getEPUBMetadata opts meta = do
let md = metadataFromMeta opts meta
@@ -172,7 +177,7 @@ getEPUBMetadata opts meta = do
if any (\c -> creatorRole c == Just "aut") $ epubCreator m
then return m
else do
- let authors' = map plainify $ docAuthors meta
+ let authors' = map stringify $ docAuthors meta
let toAuthor name = Creator{ creatorText = name
, creatorRole = Just "aut"
, creatorFileAs = Nothing }
@@ -221,8 +226,8 @@ addMetadataFromXML _ md = md
metaValueToString :: MetaValue -> String
metaValueToString (MetaString s) = s
-metaValueToString (MetaInlines ils) = plainify ils
-metaValueToString (MetaBlocks bs) = plainify $ query (:[]) bs
+metaValueToString (MetaInlines ils) = stringify ils
+metaValueToString (MetaBlocks bs) = stringify bs
metaValueToString (MetaBool b) = show b
metaValueToString _ = ""
@@ -294,6 +299,7 @@ metadataFromMeta opts meta = EPUBMetadata{
, epubRights = rights
, epubCoverImage = coverImage
, epubStylesheet = stylesheet
+ , epubPageDirection = pageDirection
}
where identifiers = getIdentifier meta
titles = getTitle meta
@@ -316,6 +322,14 @@ metadataFromMeta opts meta = EPUBMetadata{
stylesheet = (StylesheetContents <$> writerEpubStylesheet opts) `mplus`
((StylesheetPath . metaValueToString) <$>
lookupMeta "stylesheet" meta)
+ pageDirection = maybe Default stringToPageDirection
+ (lookupMeta "page-progression-direction" meta)
+ stringToPageDirection (metaValueToString -> s) =
+ case s of
+ "ltr" -> LTR
+ "rtl" -> RTL
+ _ -> Default
+
-- | Produce an EPUB file from a Pandoc document.
writeEPUB :: WriterOptions -- ^ Writer options
@@ -339,7 +353,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
if epub3
then MathML Nothing
else writerHTMLMathMethod opts
- , writerWrapText = False }
+ , writerWrapText = True }
metadata <- getEPUBMetadata opts' meta
-- cover page
@@ -366,7 +380,8 @@ writeEPUB opts doc@(Pandoc meta _) = do
walkM (transformBlock opts' mediaRef)
pics <- readIORef mediaRef
let readPicEntry entries (oldsrc, newsrc) = do
- res <- fetchItem (writerSourceURL opts') oldsrc
+ res <- fetchItem' (writerMediaBag opts')
+ (writerSourceURL opts') oldsrc
case res of
Left _ -> do
warn $ "Could not find media `" ++ oldsrc ++ "', skipping..."
@@ -379,6 +394,12 @@ writeEPUB opts doc@(Pandoc meta _) = do
let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f
fontEntries <- mapM mkFontEntry $ writerEpubFonts opts'
+ -- set page progression direction
+ let progressionDirection = case epubPageDirection metadata of
+ LTR -> "ltr"
+ RTL -> "rtl"
+ Default -> "default"
+
-- body pages
-- add level 1 header to beginning if none there
@@ -468,7 +489,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
[] -> case epubTitle metadata of
[] -> "UNTITLED"
(x:_) -> titleText x
- x -> plainify x
+ x -> stringify x
let uuid = case epubIdentifier metadata of
(x:_) -> identifierText x -- use first identifier as UUID
[] -> error "epubIdentifier is null" -- shouldn't happen
@@ -498,7 +519,8 @@ writeEPUB opts doc@(Pandoc meta _) = do
(pictureNode x)]) ++
map pictureNode picEntries ++
map fontNode fontEntries
- , unode "spine" ! [("toc","ncx")] $
+ , unode "spine" ! [("toc","ncx")
+ ,("page-progression-direction", progressionDirection)] $
case epubCoverImage metadata of
Nothing -> []
Just _ -> [ unode "itemref" !
@@ -529,25 +551,25 @@ writeEPUB opts doc@(Pandoc meta _) = do
let tocLevel = writerTOCDepth opts
let navPointNode :: (Int -> String -> String -> [Element] -> Element)
- -> Shared.Element -> State Int Element
- navPointNode formatter (Sec _ nums (ident,_,_) ils children) = do
+ -> S.Element -> State Int Element
+ navPointNode formatter (S.Sec _ nums (ident,_,_) ils children) = do
n <- get
modify (+1)
let showNums :: [Int] -> String
showNums = intercalate "." . map show
- let tit' = plainify ils
+ let tit' = stringify ils
let tit = if writerNumberSections opts && not (null nums)
then showNums nums ++ " " ++ tit'
else tit'
let src = case lookup ident reftable of
Just x -> x
Nothing -> error (ident ++ " not found in reftable")
- let isSec (Sec lev _ _ _ _) = lev <= tocLevel
+ let isSec (S.Sec lev _ _ _ _) = lev <= tocLevel
isSec _ = False
let subsecs = filter isSec children
subs <- mapM (navPointNode formatter) subsecs
return $ formatter n tit src subs
- navPointNode _ (Blk _) = error "navPointNode encountered Blk"
+ navPointNode _ (S.Blk _) = error "navPointNode encountered Blk"
let navMapFormatter :: Int -> String -> String -> [Element] -> Element
navMapFormatter n tit src subs = unode "navPoint" !
@@ -558,7 +580,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
] ++ subs
let tpNode = unode "navPoint" ! [("id", "navPoint-0")] $
- [ unode "navLabel" $ unode "text" (plainify $ docTitle meta)
+ [ unode "navLabel" $ unode "text" (stringify $ docTitle meta)
, unode "content" ! [("src","title_page.xhtml")] $ () ]
let tocData = UTF8.fromStringLazy $ ppTopElement $
@@ -587,7 +609,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
navXhtmlFormatter n tit src subs = unode "li" !
[("id", "toc-li-" ++ show n)] $
(unode "a" ! [("href",src)]
- $ (unode "span" tit))
+ $ tit)
: case subs of
[] -> []
(_:_) -> [unode "ol" ! [("class","toc")] $ subs]
@@ -751,7 +773,7 @@ transformTag :: WriterOptions
-> Tag String
-> IO (Tag String)
transformTag opts mediaRef tag@(TagOpen name attr)
- | name == "video" || name == "source" || name == "img" = do
+ | name `elem` ["video", "source", "img", "audio"] = do
let src = fromAttrib "src" tag
let poster = fromAttrib "poster" tag
let oldsrc = maybe src (</> src) $ writerSourceURL opts
@@ -784,7 +806,7 @@ transformBlock opts mediaRef (RawBlock fmt raw)
| fmt == Format "html" = do
let tags = parseTags raw
tags' <- mapM (transformTag opts mediaRef) tags
- return $ RawBlock fmt (renderTags tags')
+ return $ RawBlock fmt (renderTags' tags')
transformBlock _ _ b = return b
transformInline :: WriterOptions
@@ -798,8 +820,13 @@ transformInline opts mediaRef (Image lab (src,tit)) = do
return $ Image lab (newsrc, tit)
transformInline opts _ (x@(Math _ _))
| WebTeX _ <- writerHTMLMathMethod opts = do
- raw <- makeSelfContained Nothing $ writeHtmlInline opts x
+ raw <- makeSelfContained opts $ writeHtmlInline opts x
return $ RawInline (Format "html") raw
+transformInline opts mediaRef (RawInline fmt raw)
+ | fmt == Format "html" = do
+ let tags = parseTags raw
+ tags' <- mapM (transformTag opts mediaRef) tags
+ return $ RawInline fmt (renderTags' tags')
transformInline _ _ x = return x
writeHtmlInline :: WriterOptions -> Inline -> String
@@ -825,11 +852,11 @@ ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity .
unEntity (x:xs) = x : unEntity xs
mediaTypeOf :: FilePath -> Maybe String
-mediaTypeOf x = case getMimeType x of
- Just y@('i':'m':'a':'g':'e':_) -> Just y
- Just y@('v':'i':'d':'e':'o':_) -> Just y
- Just y@('a':'u':'d':'i':'o':_) -> Just y
- _ -> Nothing
+mediaTypeOf x =
+ let mediaPrefixes = ["image", "video", "audio"] in
+ case getMimeType x of
+ Just y | any (`isPrefixOf` y) mediaPrefixes -> Just y
+ _ -> Nothing
data IdentState = IdentState{
chapterNumber :: Int,
diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs
index 803617f95..233b8b32b 100644
--- a/src/Text/Pandoc/Writers/FB2.hs
+++ b/src/Text/Pandoc/Writers/FB2.hs
@@ -1,3 +1,5 @@
+{-# LANGUAGE PatternGuards #-}
+
{-
Copyright (c) 2011-2012, Sergey Astanin
All rights reserved.
@@ -28,8 +30,8 @@ module Text.Pandoc.Writers.FB2 (writeFB2) where
import Control.Monad.State (StateT, evalStateT, get, modify)
import Control.Monad.State (liftM, liftM2, liftIO)
import Data.ByteString.Base64 (encode)
-import Data.Char (toUpper, toLower, isSpace, isAscii, isControl)
-import Data.List (intersperse, intercalate, isPrefixOf)
+import Data.Char (toLower, isSpace, isAscii, isControl)
+import Data.List (intersperse, intercalate, isPrefixOf, stripPrefix)
import Data.Either (lefts, rights)
import Network.Browser (browse, request, setAllowRedirects, setOutHandler)
import Network.HTTP (catchIO_, getRequest, getHeaders, getResponseBody)
@@ -44,8 +46,7 @@ import qualified Text.XML.Light.Cursor as XC
import Text.Pandoc.Definition
import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..), def)
-import Text.Pandoc.Shared (orderedListMarkers, isHeaderBlock)
-import Text.Pandoc.Walk
+import Text.Pandoc.Shared (orderedListMarkers, isHeaderBlock, capitalize)
-- | Data to be written at the end of the document:
-- (foot)notes, URLs, references, images.
@@ -253,22 +254,21 @@ readDataURI :: String -- ^ URI
-> Maybe (String,String,Bool,String)
-- ^ Maybe (mime,charset,isBase64,data)
readDataURI uri =
- let prefix = "data:"
- in if not (prefix `isPrefixOf` uri)
- then Nothing
- else
- let rest = drop (length prefix) uri
- meta = takeWhile (/= ',') rest -- without trailing ','
- uridata = drop (length meta + 1) rest
- parts = split (== ';') meta
- (mime,cs,enc)=foldr upd ("text/plain","US-ASCII",False) parts
- in Just (mime,cs,enc,uridata)
+ case stripPrefix "data:" uri of
+ Nothing -> Nothing
+ Just rest ->
+ let meta = takeWhile (/= ',') rest -- without trailing ','
+ uridata = drop (length meta + 1) rest
+ parts = split (== ';') meta
+ (mime,cs,enc)=foldr upd ("text/plain","US-ASCII",False) parts
+ in Just (mime,cs,enc,uridata)
+
where
upd str m@(mime,cs,enc)
- | isMimeType str = (str,cs,enc)
- | "charset=" `isPrefixOf` str = (mime,drop (length "charset=") str,enc)
- | str == "base64" = (mime,cs,True)
- | otherwise = m
+ | isMimeType str = (str,cs,enc)
+ | Just str' <- stripPrefix "charset=" str = (mime,str',enc)
+ | str == "base64" = (mime,cs,True)
+ | otherwise = m
-- Without parameters like ;charset=...; see RFC 2045, 5.1
isMimeType :: String -> Bool
@@ -296,7 +296,6 @@ fetchURL url = do
let content_type = lookupHeader HdrContentType (getHeaders r)
content <- liftM (Just . toStr . encode . toBS) . getResponseBody $ Right r
return $ liftM2 (,) content_type content
- where
toBS :: String -> B.ByteString
toBS = B.pack . map (toEnum . fromEnum)
@@ -421,10 +420,6 @@ indent = indentBlock
indentLines ins = let lns = split isLineBreak ins :: [[Inline]]
in intercalate [LineBreak] $ map ((Str spacer):) lns
-capitalize :: Inline -> Inline
-capitalize (Str xs) = Str $ map toUpper xs
-capitalize x = x
-
-- | Convert a Pandoc's Inline element to FictionBook XML representation.
toXml :: Inline -> FBM [Content]
toXml (Str s) = return [txt s]
@@ -434,7 +429,7 @@ toXml (Strong ss) = list `liftM` wrap "strong" ss
toXml (Strikeout ss) = list `liftM` wrap "strikethrough" ss
toXml (Superscript ss) = list `liftM` wrap "sup" ss
toXml (Subscript ss) = list `liftM` wrap "sub" ss
-toXml (SmallCaps ss) = cMapM toXml $ walk capitalize ss
+toXml (SmallCaps ss) = cMapM toXml $ capitalize ss
toXml (Quoted SingleQuote ss) = do -- FIXME: should be language-specific
inner <- cMapM toXml ss
return $ [txt "‘"] ++ inner ++ [txt "’"]
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 9a26cf2ac..36ce2ba21 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -63,6 +63,7 @@ import Text.XML.Light.Output
import System.FilePath (takeExtension)
import Data.Monoid
import Data.Aeson (Value)
+import Control.Applicative ((<$>))
data WriterState = WriterState
{ stNotes :: [Html] -- ^ List of notes
@@ -235,6 +236,9 @@ showSecNum = concat . intersperse "." . map show
-- | Converts an Element to a list item for a table of contents,
-- retrieving the appropriate identifier from state.
elementToListItem :: WriterOptions -> Element -> State WriterState (Maybe Html)
+-- Don't include the empty headers created in slide shows
+-- shows when an hrule is used to separate slides without a new title:
+elementToListItem _ (Sec _ _ _ [Str "\0"] _) = return Nothing
elementToListItem opts (Sec lev num (id',classes,_) headerText subsecs)
| lev <= writerTOCDepth opts = do
let num' = zipWith (+) num (writerNumberOffset opts ++ repeat 0)
@@ -429,9 +433,11 @@ blockToHtml opts (Div attr@(_,classes,_) bs) = do
let contents' = nl opts >> contents >> nl opts
return $
if "notes" `elem` classes
- then case writerSlideVariant opts of
- RevealJsSlides -> addAttrs opts attr $ H5.aside $ contents'
- NoSlides -> addAttrs opts attr $ H.div $ contents'
+ then let opts' = opts{ writerIncremental = False } in
+ -- we don't want incremental output inside speaker notes
+ case writerSlideVariant opts of
+ RevealJsSlides -> addAttrs opts' attr $ H5.aside $ contents'
+ NoSlides -> addAttrs opts' attr $ H.div $ contents'
_ -> mempty
else addAttrs opts attr $ H.div $ contents'
blockToHtml _ (RawBlock f str)
@@ -698,18 +704,18 @@ inlineToHtml opts inline =
else DisplayBlock
let conf = useShortEmptyTags (const False)
defaultConfigPP
- case texMathToMathML dt str of
- Right r -> return $ preEscapedString $
- ppcElement conf r
- Left _ -> inlineListToHtml opts
- (readTeXMath' t str) >>= return .
- (H.span ! A.class_ "math")
+ case writeMathML dt <$> readTeX str of
+ Right r -> return $ preEscapedString $
+ ppcElement conf r
+ Left _ -> inlineListToHtml opts
+ (texMathToInlines t str) >>=
+ return . (H.span ! A.class_ "math")
MathJax _ -> return $ H.span ! A.class_ "math" $ toHtml $
case t of
InlineMath -> "\\(" ++ str ++ "\\)"
DisplayMath -> "\\[" ++ str ++ "\\]"
PlainMath -> do
- x <- inlineListToHtml opts (readTeXMath' t str)
+ x <- inlineListToHtml opts (texMathToInlines t str)
let m = H.span ! A.class_ "math" $ x
let brtag = if writerHtml5 opts then H5.br else H.br
return $ case t of
@@ -723,10 +729,6 @@ inlineToHtml opts inline =
_ -> return mempty
| f == Format "html" -> return $ preEscapedString str
| otherwise -> return mempty
- (Link [Str str] (s,_)) | "mailto:" `isPrefixOf` s &&
- s == escapeURI ("mailto" ++ str) ->
- -- autolink
- return $ obfuscateLink opts str s
(Link txt (s,_)) | "mailto:" `isPrefixOf` s -> do
linkText <- inlineListToHtml opts txt
return $ obfuscateLink opts (renderHtml linkText) s
diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs
index 1c82839d0..14f398da9 100644
--- a/src/Text/Pandoc/Writers/Haddock.hs
+++ b/src/Text/Pandoc/Writers/Haddock.hs
@@ -39,7 +39,7 @@ import Text.Pandoc.Options
import Data.List ( intersperse, transpose )
import Text.Pandoc.Pretty
import Control.Monad.State
-import Text.Pandoc.Readers.TeXMath (readTeXMath')
+import Text.Pandoc.Readers.TeXMath (texMathToInlines)
import Network.URI (isURI)
import Data.Default
@@ -319,7 +319,7 @@ inlineToHaddock opts (Math mt str) = do
let adjust x = case mt of
DisplayMath -> cr <> x <> cr
InlineMath -> x
- adjust `fmap` (inlineListToHaddock opts $ readTeXMath' mt str)
+ adjust `fmap` (inlineListToHaddock opts $ texMathToInlines mt str)
inlineToHaddock _ (RawInline f str)
| f == "haddock" = return $ text str
| otherwise = return empty
diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs
index 19d486b25..ae20efd4b 100644
--- a/src/Text/Pandoc/Writers/ICML.hs
+++ b/src/Text/Pandoc/Writers/ICML.hs
@@ -42,7 +42,7 @@ type WS a = State WriterState a
defaultWriterState :: WriterState
defaultWriterState = WriterState{
- blockStyles = Set.empty
+ blockStyles = Set.empty
, inlineStyles = Set.empty
, links = []
, listDepth = 1
@@ -267,7 +267,7 @@ hyperlinksToDoc (x:xs) = hyp x $$ hyperlinksToDoc xs
$ inTags False "BorderColor" [("type","enumeration")] (text "Black")
$$ (inTags False "Destination" [("type","object")]
$ text $ "HyperlinkURLDestination/"++(escapeStringForXML url))
-
+
-- | Convert a list of Pandoc blocks to ICML.
blocksToICML :: WriterOptions -> Style -> [Block] -> WS Doc
@@ -352,7 +352,7 @@ listItemsToICML opts listType style attribs (first:rest) = do
-- | Convert a list of blocks to ICML list items.
listItemToICML :: WriterOptions -> Style -> Bool-> Maybe ListAttributes -> [Block] -> WS Doc
listItemToICML opts style isFirst attribs item =
- let makeNumbStart (Just (beginsWith, numbStl, _)) =
+ let makeNumbStart (Just (beginsWith, numbStl, _)) =
let doN DefaultStyle = []
doN LowerRoman = [lowerRomanName]
doN UpperRoman = [upperRomanName]
@@ -467,7 +467,7 @@ parStyle opts style lst =
-- | Wrap a Doc in an ICML Character Style.
charStyle :: Style -> Doc -> WS Doc
-charStyle style content =
+charStyle style content =
let (stlStr, attrs) = styleToStrAttr style
doc = inTags True "CharacterStyleRange" attrs $ inTagsSimple "Content" $ flush content
in do
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index f2f7438c4..d140932a7 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -1,4 +1,5 @@
-{-# LANGUAGE OverloadedStrings, ScopedTypeVariables #-}
+{-# LANGUAGE OverloadedStrings, ScopedTypeVariables,
+ PatternGuards #-}
{-
Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
@@ -37,7 +38,7 @@ import Text.Pandoc.Options
import Text.Pandoc.Templates
import Text.Printf ( printf )
import Network.URI ( isURI, unEscapeString )
-import Data.List ( (\\), isSuffixOf, isInfixOf,
+import Data.List ( (\\), isSuffixOf, isInfixOf, stripPrefix,
isPrefixOf, intercalate, intersperse )
import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, ord )
import Data.Maybe ( fromMaybe )
@@ -220,6 +221,7 @@ stringToLaTeX ctx (x:xs) = do
'>' -> "\\textgreater{}" ++ rest
'[' -> "{[}" ++ rest -- to avoid interpretation as
']' -> "{]}" ++ rest -- optional arguments
+ '\'' | ctx == CodeString -> "\\textquotesingle{}" ++ rest
'\160' -> "~" ++ rest
'\x2026' -> "\\ldots{}" ++ rest
'\x2018' | ligatures -> "`" ++ rest
@@ -470,19 +472,18 @@ blockToLaTeX (Table caption aligns widths heads rows) = do
captionText <- inlineListToLaTeX caption
let capt = if isEmpty captionText
then empty
- else text "\\addlinespace"
- $$ text "\\caption" <> braces captionText
+ else text "\\caption" <> braces captionText <> "\\\\"
rows' <- mapM (tableRowToLaTeX False aligns widths) rows
let colDescriptors = text $ concat $ map toColDescriptor aligns
modify $ \s -> s{ stTable = True }
return $ "\\begin{longtable}[c]" <>
braces ("@{}" <> colDescriptors <> "@{}")
-- the @{} removes extra space at beginning and end
+ $$ capt
$$ "\\toprule\\addlinespace"
$$ headers
$$ vcat rows'
$$ "\\bottomrule"
- $$ capt
$$ "\\end{longtable}"
toColDescriptor :: Alignment -> String
@@ -742,7 +743,7 @@ inlineToLaTeX (Quoted qt lst) = do
else char '\x2018' <> inner <> char '\x2019'
inlineToLaTeX (Str str) = liftM text $ stringToLaTeX TextString str
inlineToLaTeX (Math InlineMath str) =
- return $ char '$' <> text str <> char '$'
+ return $ "\\(" <> text str <> "\\)"
inlineToLaTeX (Math DisplayMath str) =
return $ "\\[" <> text str <> "\\]"
inlineToLaTeX (RawInline f str)
@@ -757,10 +758,17 @@ inlineToLaTeX (Link txt ('#':ident, _)) = do
return $ text "\\hyperref" <> brackets (text lab) <> braces contents
inlineToLaTeX (Link txt (src, _)) =
case txt of
- [Str x] | x == src -> -- autolink
+ [Str x] | escapeURI x == src -> -- autolink
do modify $ \s -> s{ stUrl = True }
- src' <- stringToLaTeX URLString x
+ src' <- stringToLaTeX URLString src
return $ text $ "\\url{" ++ src' ++ "}"
+ [Str x] | Just rest <- stripPrefix "mailto:" src,
+ escapeURI x == rest -> -- email autolink
+ do modify $ \s -> s{ stUrl = True }
+ src' <- stringToLaTeX URLString src
+ contents <- inlineListToLaTeX txt
+ return $ "\\href" <> braces (text src') <>
+ braces ("\\nolinkurl" <> braces contents)
_ -> do contents <- inlineListToLaTeX txt
src' <- stringToLaTeX URLString src
return $ text ("\\href{" ++ src' ++ "}{") <>
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
index 41eb3e5be..6b2c4c200 100644
--- a/src/Text/Pandoc/Writers/Man.hs
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -36,7 +36,8 @@ import Text.Pandoc.Writers.Shared
import Text.Pandoc.Options
import Text.Pandoc.Readers.TeXMath
import Text.Printf ( printf )
-import Data.List ( isPrefixOf, intersperse, intercalate )
+import Data.List ( stripPrefix, intersperse, intercalate )
+import Data.Maybe (fromMaybe)
import Text.Pandoc.Pretty
import Text.Pandoc.Builder (deleteMeta)
import Control.Monad.State
@@ -331,9 +332,9 @@ inlineToMan _ (Code _ str) =
return $ text $ "\\f[C]" ++ escapeCode str ++ "\\f[]"
inlineToMan _ (Str str) = return $ text $ escapeString str
inlineToMan opts (Math InlineMath str) =
- inlineListToMan opts $ readTeXMath' InlineMath str
+ inlineListToMan opts $ texMathToInlines InlineMath str
inlineToMan opts (Math DisplayMath str) = do
- contents <- inlineListToMan opts $ readTeXMath' DisplayMath str
+ contents <- inlineListToMan opts $ texMathToInlines DisplayMath str
return $ cr <> text ".RS" $$ contents $$ text ".RE"
inlineToMan _ (RawInline f str)
| f == Format "man" = return $ text str
@@ -343,7 +344,7 @@ inlineToMan _ (LineBreak) = return $
inlineToMan _ Space = return space
inlineToMan opts (Link txt (src, _)) = do
linktext <- inlineListToMan opts txt
- let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
+ let srcSuffix = fromMaybe src (stripPrefix "mailto:" src)
return $ case txt of
[Str s]
| escapeURI s == srcSuffix ->
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index a67271a5d..95d4db29b 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -37,16 +37,17 @@ import Text.Pandoc.Templates (renderTemplate')
import Text.Pandoc.Shared
import Text.Pandoc.Writers.Shared
import Text.Pandoc.Options
-import Text.Pandoc.Parsing hiding (blankline, char, space)
-import Data.List ( group, isPrefixOf, find, intersperse, transpose, sortBy )
+import Text.Pandoc.Parsing hiding (blankline, blanklines, char, space)
+import Data.Maybe (fromMaybe)
+import Data.List ( group, stripPrefix, find, intersperse, transpose, sortBy )
import Data.Char ( isSpace, isPunctuation )
import Data.Ord ( comparing )
import Text.Pandoc.Pretty
import Control.Monad.State
import qualified Data.Set as Set
import Text.Pandoc.Writers.HTML (writeHtmlString)
-import Text.Pandoc.Readers.TeXMath (readTeXMath')
-import Text.HTML.TagSoup (renderTags, parseTags, isTagText, Tag(..))
+import Text.Pandoc.Readers.TeXMath (texMathToInlines)
+import Text.HTML.TagSoup (parseTags, isTagText, Tag(..))
import Network.URI (isURI)
import Data.Default
import Data.Yaml (Value(Object,String,Array,Bool,Number))
@@ -77,26 +78,15 @@ writePlain :: WriterOptions -> Pandoc -> String
writePlain opts document =
evalState (pandocToMarkdown opts{
writerExtensions = Set.delete Ext_escaped_line_breaks $
+ Set.delete Ext_pipe_tables $
+ Set.delete Ext_raw_html $
+ Set.delete Ext_markdown_in_html_blocks $
+ Set.delete Ext_raw_tex $
+ Set.delete Ext_footnotes $
+ Set.delete Ext_tex_math_dollars $
+ Set.delete Ext_citations $
writerExtensions opts }
- document') def{ stPlain = True }
- where document' = plainify document
-
-plainify :: Pandoc -> Pandoc
-plainify = walk go
- where go :: Inline -> Inline
- go (Emph xs) = SmallCaps xs
- go (Strong xs) = SmallCaps xs
- go (Strikeout xs) = SmallCaps xs
- go (Superscript xs) = SmallCaps xs
- go (Subscript xs) = SmallCaps xs
- go (SmallCaps xs) = SmallCaps xs
- go (Code _ s) = Str s
- go (Math _ s) = Str s
- go (RawInline _ _) = Str ""
- go (Link xs _) = SmallCaps xs
- go (Image xs _) = SmallCaps $ [Str "["] ++ xs ++ [Str "]"]
- go (Cite _ cits) = SmallCaps cits
- go x = x
+ document) def{ stPlain = True }
pandocTitleBlock :: Doc -> [Doc] -> Doc -> Doc
pandocTitleBlock tit auths dat =
@@ -187,7 +177,7 @@ pandocToMarkdown opts (Pandoc meta blocks) = do
then tableOfContents opts headerBlocks
else empty
-- Strip off final 'references' header if markdown citations enabled
- let blocks' = if not isPlain && isEnabled Ext_citations opts
+ let blocks' = if isEnabled Ext_citations opts
then case reverse blocks of
(Div (_,["references"],_) _):xs -> reverse xs
_ -> blocks
@@ -308,12 +298,12 @@ blockToMarkdown :: WriterOptions -- ^ Options
-> State WriterState Doc
blockToMarkdown _ Null = return empty
blockToMarkdown opts (Div attrs ils) = do
- isPlain <- gets stPlain
contents <- blockListToMarkdown opts ils
- return $ if isPlain || not (isEnabled Ext_markdown_in_html_blocks opts)
- then contents <> blankline
- else tagWithAttrs "div" attrs <> blankline <>
+ return $ if isEnabled Ext_raw_html opts &&
+ isEnabled Ext_markdown_in_html_blocks opts
+ then tagWithAttrs "div" attrs <> blankline <>
contents <> blankline <> "</div>" <> blankline
+ else contents <> blankline
blockToMarkdown opts (Plain inlines) = do
contents <- inlineListToMarkdown opts inlines
-- escape if para starts with ordered list marker
@@ -337,21 +327,22 @@ blockToMarkdown opts (Para inlines) =
(<> blankline) `fmap` blockToMarkdown opts (Plain inlines)
blockToMarkdown opts (RawBlock f str)
| f == "html" = do
- st <- get
- if stPlain st
- then return empty
- else return $ if isEnabled Ext_markdown_attribute opts
+ plain <- gets stPlain
+ return $ if plain
+ then empty
+ else if isEnabled Ext_markdown_attribute opts
then text (addMarkdownAttribute str) <> text "\n"
else text str <> text "\n"
| f `elem` ["latex", "tex", "markdown"] = do
- st <- get
- if stPlain st
- then return empty
- else return $ text str <> text "\n"
+ plain <- gets stPlain
+ return $ if plain
+ then empty
+ else text str <> text "\n"
blockToMarkdown _ (RawBlock _ _) = return empty
-blockToMarkdown _ HorizontalRule =
- return $ blankline <> text "* * * * *" <> blankline
+blockToMarkdown opts HorizontalRule = do
+ return $ blankline <> text (replicate (writerColumns opts) '-') <> blankline
blockToMarkdown opts (Header level attr inlines) = do
+ plain <- gets stPlain
-- we calculate the id that would be used by auto_identifiers
-- so we know whether to print an explicit identifier
ids <- gets stIds
@@ -366,19 +357,23 @@ blockToMarkdown opts (Header level attr inlines) = do
_ | isEnabled Ext_header_attributes opts ->
space <> attrsToMarkdown attr
| otherwise -> empty
- contents <- inlineListToMarkdown opts inlines
- st <- get
+ contents <- inlineListToMarkdown opts $
+ if level == 1 && plain
+ then capitalize inlines
+ else inlines
let setext = writerSetextHeaders opts
return $ nowrap
$ case level of
- 1 | setext ->
+ 1 | plain -> blanklines 3 <> contents <> blanklines 2
+ | setext ->
contents <> attr' <> cr <> text (replicate (offset contents) '=') <>
blankline
- 2 | setext ->
+ 2 | plain -> blanklines 2 <> contents <> blankline
+ | setext ->
contents <> attr' <> cr <> text (replicate (offset contents) '-') <>
blankline
-- ghc interprets '#' characters in column 1 as linenum specifiers.
- _ | stPlain st || isEnabled Ext_literate_haskell opts ->
+ _ | plain || isEnabled Ext_literate_haskell opts ->
contents <> blankline
_ -> text (replicate level '#') <> space <> contents <> attr' <> blankline
blockToMarkdown opts (CodeBlock (_,classes,_) str)
@@ -405,17 +400,15 @@ blockToMarkdown opts (CodeBlock attribs str) = return $
attrs = if isEnabled Ext_fenced_code_attributes opts
then nowrap $ " " <> attrsToMarkdown attribs
else case attribs of
- (_,[cls],_) -> " " <> text cls
- _ -> empty
+ (_,(cls:_),_) -> " " <> text cls
+ _ -> empty
blockToMarkdown opts (BlockQuote blocks) = do
- st <- get
+ plain <- gets stPlain
-- if we're writing literate haskell, put a space before the bird tracks
-- so they won't be interpreted as lhs...
let leader = if isEnabled Ext_literate_haskell opts
then " > "
- else if stPlain st
- then " "
- else "> "
+ else if plain then " " else "> "
contents <- blockListToMarkdown opts blocks
return $ (prefixed leader contents) <> blankline
blockToMarkdown opts t@(Table caption aligns widths headers rows) = do
@@ -471,7 +464,7 @@ addMarkdownAttribute :: String -> String
addMarkdownAttribute s =
case span isTagText $ reverse $ parseTags s of
(xs,(TagOpen t attrs:rest)) ->
- renderTags $ reverse rest ++ (TagOpen t attrs' : reverse xs)
+ renderTags' $ reverse rest ++ (TagOpen t attrs' : reverse xs)
where attrs' = ("markdown","1"):[(x,y) | (x,y) <- attrs,
x /= "markdown"]
_ -> s
@@ -609,8 +602,19 @@ definitionListItemToMarkdown opts (label, defs) = do
let sps = case writerTabStop opts - 3 of
n | n > 0 -> text $ replicate n ' '
_ -> text " "
- let contents = vcat $ map (\d -> hang tabStop (leader <> sps) $ vcat d <> cr) defs'
- return $ nowrap labelText <> cr <> contents <> cr
+ if isEnabled Ext_compact_definition_lists opts
+ then do
+ let contents = vcat $ map (\d -> hang tabStop (leader <> sps)
+ $ vcat d <> cr) defs'
+ return $ nowrap labelText <> cr <> contents <> cr
+ else do
+ let contents = vcat $ map (\d -> hang tabStop (leader <> sps)
+ $ vcat d <> cr) defs'
+ let isTight = case defs of
+ ((Plain _ : _): _) -> True
+ _ -> False
+ return $ blankline <> nowrap labelText <>
+ (if isTight then cr else blankline) <> contents <> blankline
else do
return $ nowrap labelText <> text " " <> cr <>
vsep (map vsep defs') <> blankline
@@ -625,15 +629,21 @@ blockListToMarkdown opts blocks =
-- code block will be treated as a list continuation paragraph
where fixBlocks (b : CodeBlock attr x : rest)
| (not (isEnabled Ext_fenced_code_blocks opts) || attr == nullAttr)
- && isListBlock b =
- b : RawBlock "html" "<!-- -->\n" : CodeBlock attr x :
- fixBlocks rest
+ && isListBlock b = b : commentSep : CodeBlock attr x :
+ fixBlocks rest
+ fixBlocks (b1@(BulletList _) : b2@(BulletList _) : bs) =
+ b1 : commentSep : fixBlocks (b2:bs)
+ fixBlocks (b1@(OrderedList _ _) : b2@(OrderedList _ _) : bs) =
+ b1 : commentSep : fixBlocks (b2:bs)
+ fixBlocks (b1@(DefinitionList _) : b2@(DefinitionList _) : bs) =
+ b1 : commentSep : fixBlocks (b2:bs)
fixBlocks (x : xs) = x : fixBlocks xs
fixBlocks [] = []
isListBlock (BulletList _) = True
isListBlock (OrderedList _ _) = True
isListBlock (DefinitionList _) = True
isListBlock _ = False
+ commentSep = RawBlock "html" "<!-- -->\n"
-- | Get reference for target; if none exists, create unique one and return.
-- Prefer label if possible; otherwise, generate a unique key.
@@ -657,7 +667,11 @@ getReference label (src, tit) = do
-- | Convert list of Pandoc inline elements to markdown.
inlineListToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc
inlineListToMarkdown opts lst =
- mapM (inlineToMarkdown opts) lst >>= return . cat
+ mapM (inlineToMarkdown opts) (avoidBadWraps lst) >>= return . cat
+ where avoidBadWraps [] = []
+ avoidBadWraps (Space:Str (c:cs):xs)
+ | c `elem` "-*+>" = Str (' ':c:cs) : avoidBadWraps xs
+ avoidBadWraps (x:xs) = x : avoidBadWraps xs
escapeSpaces :: Inline -> Inline
escapeSpaces (Str s) = Str $ substitute " " "\\ " s
@@ -667,52 +681,67 @@ escapeSpaces x = x
-- | Convert Pandoc inline element to markdown.
inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc
inlineToMarkdown opts (Span attrs ils) = do
- st <- get
contents <- inlineListToMarkdown opts ils
- return $ if stPlain st
- then contents
- else tagWithAttrs "span" attrs <> contents <> text "</span>"
+ return $ if isEnabled Ext_raw_html opts
+ then tagWithAttrs "span" attrs <> contents <> text "</span>"
+ else contents
inlineToMarkdown opts (Emph lst) = do
+ plain <- gets stPlain
contents <- inlineListToMarkdown opts lst
- return $ "*" <> contents <> "*"
+ return $ if plain
+ then "_" <> contents <> "_"
+ else "*" <> contents <> "*"
inlineToMarkdown opts (Strong lst) = do
- contents <- inlineListToMarkdown opts lst
- return $ "**" <> contents <> "**"
+ plain <- gets stPlain
+ if plain
+ then inlineListToMarkdown opts $ capitalize lst
+ else do
+ contents <- inlineListToMarkdown opts lst
+ return $ "**" <> contents <> "**"
inlineToMarkdown opts (Strikeout lst) = do
contents <- inlineListToMarkdown opts lst
return $ if isEnabled Ext_strikeout opts
then "~~" <> contents <> "~~"
else "<s>" <> contents <> "</s>"
inlineToMarkdown opts (Superscript lst) = do
- let lst' = walk escapeSpaces lst
- contents <- inlineListToMarkdown opts lst'
+ contents <- inlineListToMarkdown opts $ walk escapeSpaces lst
return $ if isEnabled Ext_superscript opts
then "^" <> contents <> "^"
else "<sup>" <> contents <> "</sup>"
inlineToMarkdown opts (Subscript lst) = do
- let lst' = walk escapeSpaces lst
- contents <- inlineListToMarkdown opts lst'
+ contents <- inlineListToMarkdown opts $ walk escapeSpaces lst
return $ if isEnabled Ext_subscript opts
then "~" <> contents <> "~"
else "<sub>" <> contents <> "</sub>"
-inlineToMarkdown opts (SmallCaps lst) = inlineListToMarkdown opts lst
+inlineToMarkdown opts (SmallCaps lst) = do
+ plain <- gets stPlain
+ if plain
+ then inlineListToMarkdown opts $ capitalize lst
+ else do
+ contents <- inlineListToMarkdown opts lst
+ return $ tagWithAttrs "span"
+ ("",[],[("style","font-variant:small-caps;")])
+ <> contents <> text "</span>"
inlineToMarkdown opts (Quoted SingleQuote lst) = do
contents <- inlineListToMarkdown opts lst
return $ "‘" <> contents <> "’"
inlineToMarkdown opts (Quoted DoubleQuote lst) = do
contents <- inlineListToMarkdown opts lst
return $ "“" <> contents <> "”"
-inlineToMarkdown opts (Code attr str) =
+inlineToMarkdown opts (Code attr str) = do
let tickGroups = filter (\s -> '`' `elem` s) $ group str
- longest = if null tickGroups
+ let longest = if null tickGroups
then 0
else maximum $ map length tickGroups
- marker = replicate (longest + 1) '`'
- spacer = if (longest == 0) then "" else " "
- attrs = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr
+ let marker = replicate (longest + 1) '`'
+ let spacer = if (longest == 0) then "" else " "
+ let attrs = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr
then attrsToMarkdown attr
else empty
- in return $ text (marker ++ spacer ++ str ++ spacer ++ marker) <> attrs
+ plain <- gets stPlain
+ if plain
+ then return $ text str
+ else return $ text (marker ++ spacer ++ str ++ spacer ++ marker) <> attrs
inlineToMarkdown _ (Str str) = do
st <- get
if stPlain st
@@ -725,7 +754,11 @@ inlineToMarkdown opts (Math InlineMath str)
return $ "\\(" <> text str <> "\\)"
| isEnabled Ext_tex_math_double_backslash opts =
return $ "\\\\(" <> text str <> "\\\\)"
- | otherwise = inlineListToMarkdown opts $ readTeXMath' InlineMath str
+ | otherwise = do
+ plain <- gets stPlain
+ inlineListToMarkdown opts $
+ (if plain then makeMathPlainer else id) $
+ texMathToInlines InlineMath str
inlineToMarkdown opts (Math DisplayMath str)
| isEnabled Ext_tex_math_dollars opts =
return $ "$$" <> text str <> "$$"
@@ -734,16 +767,23 @@ inlineToMarkdown opts (Math DisplayMath str)
| isEnabled Ext_tex_math_double_backslash opts =
return $ "\\\\[" <> text str <> "\\\\]"
| otherwise = (\x -> cr <> x <> cr) `fmap`
- inlineListToMarkdown opts (readTeXMath' DisplayMath str)
-inlineToMarkdown opts (RawInline f str)
- | f == "html" || f == "markdown" ||
- (isEnabled Ext_raw_tex opts && (f == "latex" || f == "tex")) =
- return $ text str
-inlineToMarkdown _ (RawInline _ _) = return empty
-inlineToMarkdown opts (LineBreak)
- | isEnabled Ext_hard_line_breaks opts = return cr
- | isEnabled Ext_escaped_line_breaks opts = return $ "\\" <> cr
- | otherwise = return $ " " <> cr
+ inlineListToMarkdown opts (texMathToInlines DisplayMath str)
+inlineToMarkdown opts (RawInline f str) = do
+ plain <- gets stPlain
+ if not plain &&
+ ( f == "markdown" ||
+ (isEnabled Ext_raw_tex opts && (f == "latex" || f == "tex")) ||
+ (isEnabled Ext_raw_html opts && f == "html") )
+ then return $ text str
+ else return empty
+inlineToMarkdown opts (LineBreak) = do
+ plain <- gets stPlain
+ if plain || isEnabled Ext_hard_line_breaks opts
+ then return cr
+ else return $
+ if isEnabled Ext_escaped_line_breaks opts
+ then "\\" <> cr
+ else " " <> cr
inlineToMarkdown _ Space = return space
inlineToMarkdown opts (Cite [] lst) = inlineListToMarkdown opts lst
inlineToMarkdown opts (Cite (c:cs) lst)
@@ -776,11 +816,12 @@ inlineToMarkdown opts (Cite (c:cs) lst)
modekey SuppressAuthor = "-"
modekey _ = ""
inlineToMarkdown opts (Link txt (src, tit)) = do
+ plain <- gets stPlain
linktext <- inlineListToMarkdown opts txt
let linktitle = if null tit
then empty
else text $ " \"" ++ tit ++ "\""
- let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
+ let srcSuffix = fromMaybe src (stripPrefix "mailto:" src)
let useAuto = isURI src &&
case txt of
[Str s] | escapeURI s == srcSuffix -> True
@@ -789,22 +830,29 @@ inlineToMarkdown opts (Link txt (src, tit)) = do
ref <- if useRefLinks then getReference txt (src, tit) else return []
reftext <- inlineListToMarkdown opts ref
return $ if useAuto
- then "<" <> text srcSuffix <> ">"
+ then if plain
+ then text srcSuffix
+ else "<" <> text srcSuffix <> ">"
else if useRefLinks
then let first = "[" <> linktext <> "]"
second = if txt == ref
then "[]"
else "[" <> reftext <> "]"
in first <> second
- else "[" <> linktext <> "](" <>
- text src <> linktitle <> ")"
+ else if plain
+ then linktext
+ else "[" <> linktext <> "](" <>
+ text src <> linktitle <> ")"
inlineToMarkdown opts (Image alternate (source, tit)) = do
+ plain <- gets stPlain
let txt = if null alternate || alternate == [Str source]
-- to prevent autolinks
then [Str ""]
else alternate
linkPart <- inlineToMarkdown opts (Link txt (source, tit))
- return $ "!" <> linkPart
+ return $ if plain
+ then "[" <> linkPart <> "]"
+ else "!" <> linkPart
inlineToMarkdown opts (Note contents) = do
modify (\st -> st{ stNotes = contents : stNotes st })
st <- get
@@ -812,3 +860,9 @@ inlineToMarkdown opts (Note contents) = do
if isEnabled Ext_footnotes opts
then return $ "[^" <> ref <> "]"
else return $ "[" <> ref <> "]"
+
+makeMathPlainer :: [Inline] -> [Inline]
+makeMathPlainer = walk go
+ where
+ go (Emph xs) = Span nullAttr xs
+ go x = x
diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs
index 3b987ba2b..3f392a5d0 100644
--- a/src/Text/Pandoc/Writers/MediaWiki.hs
+++ b/src/Text/Pandoc/Writers/MediaWiki.hs
@@ -37,92 +37,99 @@ import Text.Pandoc.Writers.Shared
import Text.Pandoc.Pretty (render)
import Text.Pandoc.Templates (renderTemplate')
import Text.Pandoc.XML ( escapeStringForXML )
-import Data.List ( intersect, intercalate, intersperse )
+import Data.List ( intersect, intercalate )
import Network.URI ( isURI )
+import Control.Monad.Reader
import Control.Monad.State
data WriterState = WriterState {
stNotes :: Bool -- True if there are notes
- , stListLevel :: [Char] -- String at beginning of list items, e.g. "**"
- , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list
}
+data WriterReader = WriterReader {
+ options :: WriterOptions -- Writer options
+ , listLevel :: String -- String at beginning of list items, e.g. "**"
+ , useTags :: Bool -- True if we should use HTML tags because we're in a complex list
+ }
+
+type MediaWikiWriter = ReaderT WriterReader (State WriterState)
+
-- | Convert Pandoc to MediaWiki.
writeMediaWiki :: WriterOptions -> Pandoc -> String
writeMediaWiki opts document =
- evalState (pandocToMediaWiki opts document)
- WriterState { stNotes = False, stListLevel = [], stUseTags = False }
+ let initialState = WriterState { stNotes = False }
+ env = WriterReader { options = opts, listLevel = [], useTags = False }
+ in evalState (runReaderT (pandocToMediaWiki document) env) initialState
-- | Return MediaWiki representation of document.
-pandocToMediaWiki :: WriterOptions -> Pandoc -> State WriterState String
-pandocToMediaWiki opts (Pandoc meta blocks) = do
+pandocToMediaWiki :: Pandoc -> MediaWikiWriter String
+pandocToMediaWiki (Pandoc meta blocks) = do
+ opts <- asks options
metadata <- metaToJSON opts
- (fmap trimr . blockListToMediaWiki opts)
- (inlineListToMediaWiki opts)
+ (fmap trimr . blockListToMediaWiki)
+ inlineListToMediaWiki
meta
- body <- blockListToMediaWiki opts blocks
- notesExist <- get >>= return . stNotes
+ body <- blockListToMediaWiki blocks
+ notesExist <- gets stNotes
let notes = if notesExist
then "\n<references />"
else ""
let main = body ++ notes
let context = defField "body" main
- $ defField "toc" (writerTableOfContents opts)
- $ metadata
- if writerStandalone opts
- then return $ renderTemplate' (writerTemplate opts) context
- else return main
+ $ defField "toc" (writerTableOfContents opts) metadata
+ return $ if writerStandalone opts
+ then renderTemplate' (writerTemplate opts) context
+ else main
-- | Escape special characters for MediaWiki.
escapeString :: String -> String
escapeString = escapeStringForXML
-- | Convert Pandoc block element to MediaWiki.
-blockToMediaWiki :: WriterOptions -- ^ Options
- -> Block -- ^ Block element
- -> State WriterState String
+blockToMediaWiki :: Block -- ^ Block element
+ -> MediaWikiWriter String
-blockToMediaWiki _ Null = return ""
+blockToMediaWiki Null = return ""
-blockToMediaWiki opts (Div attrs bs) = do
- contents <- blockListToMediaWiki opts bs
+blockToMediaWiki (Div attrs bs) = do
+ contents <- blockListToMediaWiki bs
return $ render Nothing (tagWithAttrs "div" attrs) ++ "\n\n" ++
contents ++ "\n\n" ++ "</div>"
-blockToMediaWiki opts (Plain inlines) =
- inlineListToMediaWiki opts inlines
+blockToMediaWiki (Plain inlines) =
+ inlineListToMediaWiki inlines
-- title beginning with fig: indicates that the image is a figure
-blockToMediaWiki opts (Para [Image txt (src,'f':'i':'g':':':tit)]) = do
+blockToMediaWiki (Para [Image txt (src,'f':'i':'g':':':tit)]) = do
capt <- if null txt
then return ""
- else ("|caption " ++) `fmap` inlineListToMediaWiki opts txt
+ else ("|caption " ++) `fmap` inlineListToMediaWiki txt
let opt = if null txt
then ""
else "|alt=" ++ if null tit then capt else tit ++ capt
return $ "[[Image:" ++ src ++ "|frame|none" ++ opt ++ "]]\n"
-blockToMediaWiki opts (Para inlines) = do
- useTags <- get >>= return . stUseTags
- listLevel <- get >>= return . stListLevel
- contents <- inlineListToMediaWiki opts inlines
- return $ if useTags
+blockToMediaWiki (Para inlines) = do
+ tags <- asks useTags
+ lev <- asks listLevel
+ contents <- inlineListToMediaWiki inlines
+ return $ if tags
then "<p>" ++ contents ++ "</p>"
- else contents ++ if null listLevel then "\n" else ""
+ else contents ++ if null lev then "\n" else ""
-blockToMediaWiki _ (RawBlock f str)
+blockToMediaWiki (RawBlock f str)
| f == Format "mediawiki" = return str
| f == Format "html" = return str
| otherwise = return ""
-blockToMediaWiki _ HorizontalRule = return "\n-----\n"
+blockToMediaWiki HorizontalRule = return "\n-----\n"
-blockToMediaWiki opts (Header level _ inlines) = do
- contents <- inlineListToMediaWiki opts inlines
+blockToMediaWiki (Header level _ inlines) = do
+ contents <- inlineListToMediaWiki inlines
let eqs = replicate level '='
return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n"
-blockToMediaWiki _ (CodeBlock (_,classes,_) str) = do
+blockToMediaWiki (CodeBlock (_,classes,_) str) = do
let at = classes `intersect` ["actionscript", "ada", "apache", "applescript", "asm", "asp",
"autoit", "bash", "blitzbasic", "bnf", "c", "c_mac", "caddcl", "cadlisp", "cfdg", "cfm",
"cpp", "cpp-qt", "csharp", "css", "d", "delphi", "diff", "div", "dos", "eiffel", "fortran",
@@ -132,75 +139,64 @@ blockToMediaWiki _ (CodeBlock (_,classes,_) str) = do
"python", "qbasic", "rails", "reg", "robots", "ruby", "sas", "scheme", "sdlbasic",
"smalltalk", "smarty", "sql", "tcl", "", "thinbasic", "tsql", "vb", "vbnet", "vhdl",
"visualfoxpro", "winbatch", "xml", "xpp", "z80"]
- let (beg, end) = if null at
- then ("<pre" ++ if null classes then ">" else " class=\"" ++ unwords classes ++ "\">", "</pre>")
- else ("<source lang=\"" ++ head at ++ "\">", "</source>")
- return $ beg ++ escapeString str ++ end
-
-blockToMediaWiki opts (BlockQuote blocks) = do
- contents <- blockListToMediaWiki opts blocks
+ return $
+ if null at
+ then "<pre" ++ (if null classes
+ then ">"
+ else " class=\"" ++ unwords classes ++ "\">") ++
+ escapeString str ++ "</pre>"
+ else "<source lang=\"" ++ head at ++ "\">" ++ str ++ "</source>"
+ -- note: no escape!
+
+blockToMediaWiki (BlockQuote blocks) = do
+ contents <- blockListToMediaWiki blocks
return $ "<blockquote>" ++ contents ++ "</blockquote>"
-blockToMediaWiki opts (Table capt aligns widths headers rows') = do
+blockToMediaWiki (Table capt aligns widths headers rows') = do
caption <- if null capt
then return ""
else do
- c <- inlineListToMediaWiki opts capt
+ c <- inlineListToMediaWiki capt
return $ "|+ " ++ trimr c ++ "\n"
let headless = all null headers
let allrows = if headless then rows' else headers:rows'
- tableBody <- (concat . intersperse "|-\n") `fmap`
- mapM (tableRowToMediaWiki opts headless aligns widths)
+ tableBody <- intercalate "|-\n" `fmap`
+ mapM (tableRowToMediaWiki headless aligns widths)
(zip [1..] allrows)
return $ "{|\n" ++ caption ++ tableBody ++ "|}\n"
-blockToMediaWiki opts x@(BulletList items) = do
- oldUseTags <- get >>= return . stUseTags
- listLevel <- get >>= return . stListLevel
- let useTags = oldUseTags || not (isSimpleList x)
- if useTags
+blockToMediaWiki x@(BulletList items) = do
+ tags <- fmap (|| not (isSimpleList x)) $ asks useTags
+ if tags
then do
- modify $ \s -> s { stUseTags = True }
- contents <- mapM (listItemToMediaWiki opts) items
- modify $ \s -> s { stUseTags = oldUseTags }
+ contents <- local (\ s -> s { useTags = True }) $ mapM listItemToMediaWiki items
return $ "<ul>\n" ++ vcat contents ++ "</ul>\n"
else do
- modify $ \s -> s { stListLevel = stListLevel s ++ "*" }
- contents <- mapM (listItemToMediaWiki opts) items
- modify $ \s -> s { stListLevel = init (stListLevel s) }
- return $ vcat contents ++ if null listLevel then "\n" else ""
-
-blockToMediaWiki opts x@(OrderedList attribs items) = do
- oldUseTags <- get >>= return . stUseTags
- listLevel <- get >>= return . stListLevel
- let useTags = oldUseTags || not (isSimpleList x)
- if useTags
+ lev <- asks listLevel
+ contents <- local (\s -> s { listLevel = listLevel s ++ "*" }) $ mapM listItemToMediaWiki items
+ return $ vcat contents ++ if null lev then "\n" else ""
+
+blockToMediaWiki x@(OrderedList attribs items) = do
+ tags <- fmap (|| not (isSimpleList x)) $ asks useTags
+ if tags
then do
- modify $ \s -> s { stUseTags = True }
- contents <- mapM (listItemToMediaWiki opts) items
- modify $ \s -> s { stUseTags = oldUseTags }
+ contents <- local (\s -> s { useTags = True }) $ mapM listItemToMediaWiki items
return $ "<ol" ++ listAttribsToString attribs ++ ">\n" ++ vcat contents ++ "</ol>\n"
else do
- modify $ \s -> s { stListLevel = stListLevel s ++ "#" }
- contents <- mapM (listItemToMediaWiki opts) items
- modify $ \s -> s { stListLevel = init (stListLevel s) }
- return $ vcat contents ++ if null listLevel then "\n" else ""
-
-blockToMediaWiki opts x@(DefinitionList items) = do
- oldUseTags <- get >>= return . stUseTags
- listLevel <- get >>= return . stListLevel
- let useTags = oldUseTags || not (isSimpleList x)
- if useTags
+ lev <- asks listLevel
+ contents <- local (\s -> s { listLevel = listLevel s ++ "#" }) $ mapM listItemToMediaWiki items
+ return $ vcat contents ++ if null lev then "\n" else ""
+
+blockToMediaWiki x@(DefinitionList items) = do
+ tags <- fmap (|| not (isSimpleList x)) $ asks useTags
+ if tags
then do
- modify $ \s -> s { stUseTags = True }
- contents <- mapM (definitionListItemToMediaWiki opts) items
- modify $ \s -> s { stUseTags = oldUseTags }
+ contents <- local (\s -> s { useTags = True }) $ mapM definitionListItemToMediaWiki items
return $ "<dl>\n" ++ vcat contents ++ "</dl>\n"
else do
- modify $ \s -> s { stListLevel = stListLevel s ++ ";" }
- contents <- mapM (definitionListItemToMediaWiki opts) items
- modify $ \s -> s { stListLevel = init (stListLevel s) }
- return $ vcat contents ++ if null listLevel then "\n" else ""
+ lev <- asks listLevel
+ contents <- local (\s -> s { listLevel = listLevel s ++ ";" }) $ mapM definitionListItemToMediaWiki items
+ return $ vcat contents ++ if null lev then "\n" else ""
-- Auxiliary functions for lists:
@@ -216,31 +212,30 @@ listAttribsToString (startnum, numstyle, _) =
else "")
-- | Convert bullet or ordered list item (list of blocks) to MediaWiki.
-listItemToMediaWiki :: WriterOptions -> [Block] -> State WriterState String
-listItemToMediaWiki opts items = do
- contents <- blockListToMediaWiki opts items
- useTags <- get >>= return . stUseTags
- if useTags
+listItemToMediaWiki :: [Block] -> MediaWikiWriter String
+listItemToMediaWiki items = do
+ contents <- blockListToMediaWiki items
+ tags <- asks useTags
+ if tags
then return $ "<li>" ++ contents ++ "</li>"
else do
- marker <- get >>= return . stListLevel
+ marker <- asks listLevel
return $ marker ++ " " ++ contents
-- | Convert definition list item (label, list of blocks) to MediaWiki.
-definitionListItemToMediaWiki :: WriterOptions
- -> ([Inline],[[Block]])
- -> State WriterState String
-definitionListItemToMediaWiki opts (label, items) = do
- labelText <- inlineListToMediaWiki opts label
- contents <- mapM (blockListToMediaWiki opts) items
- useTags <- get >>= return . stUseTags
- if useTags
+definitionListItemToMediaWiki :: ([Inline],[[Block]])
+ -> MediaWikiWriter String
+definitionListItemToMediaWiki (label, items) = do
+ labelText <- inlineListToMediaWiki label
+ contents <- mapM blockListToMediaWiki items
+ tags <- asks useTags
+ if tags
then return $ "<dt>" ++ labelText ++ "</dt>\n" ++
- (intercalate "\n" $ map (\d -> "<dd>" ++ d ++ "</dd>") contents)
+ intercalate "\n" (map (\d -> "<dd>" ++ d ++ "</dd>") contents)
else do
- marker <- get >>= return . stListLevel
+ marker <- asks listLevel
return $ marker ++ " " ++ labelText ++ "\n" ++
- (intercalate "\n" $ map (\d -> init marker ++ ": " ++ d) contents)
+ intercalate "\n" (map (\d -> init marker ++ ": " ++ d) contents)
-- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed.
isSimpleList :: Block -> Bool
@@ -283,25 +278,22 @@ vcat = intercalate "\n"
-- Auxiliary functions for tables:
-tableRowToMediaWiki :: WriterOptions
- -> Bool
+tableRowToMediaWiki :: Bool
-> [Alignment]
-> [Double]
-> (Int, [[Block]])
- -> State WriterState String
-tableRowToMediaWiki opts headless alignments widths (rownum, cells) = do
- cells' <- mapM (\cellData ->
- tableCellToMediaWiki opts headless rownum cellData)
+ -> MediaWikiWriter String
+tableRowToMediaWiki headless alignments widths (rownum, cells) = do
+ cells' <- mapM (tableCellToMediaWiki headless rownum)
$ zip3 alignments widths cells
return $ unlines cells'
-tableCellToMediaWiki :: WriterOptions
- -> Bool
+tableCellToMediaWiki :: Bool
-> Int
-> (Alignment, Double, [Block])
- -> State WriterState String
-tableCellToMediaWiki opts headless rownum (alignment, width, bs) = do
- contents <- blockListToMediaWiki opts bs
+ -> MediaWikiWriter String
+tableCellToMediaWiki headless rownum (alignment, width, bs) = do
+ contents <- blockListToMediaWiki bs
let marker = if rownum == 1 && not headless then "!" else "|"
let percent w = show (truncate (100*w) :: Integer) ++ "%"
let attrs = ["align=" ++ show (alignmentToString alignment) |
@@ -313,7 +305,7 @@ tableCellToMediaWiki opts headless rownum (alignment, width, bs) = do
else unwords attrs ++ "|"
return $ marker ++ attr ++ trimr contents
-alignmentToString :: Alignment -> [Char]
+alignmentToString :: Alignment -> String
alignmentToString alignment = case alignment of
AlignLeft -> "left"
AlignRight -> "right"
@@ -321,94 +313,94 @@ alignmentToString alignment = case alignment of
AlignDefault -> "left"
-- | Convert list of Pandoc block elements to MediaWiki.
-blockListToMediaWiki :: WriterOptions -- ^ Options
- -> [Block] -- ^ List of block elements
- -> State WriterState String
-blockListToMediaWiki opts blocks =
- mapM (blockToMediaWiki opts) blocks >>= return . vcat
+blockListToMediaWiki :: [Block] -- ^ List of block elements
+ -> MediaWikiWriter String
+blockListToMediaWiki blocks =
+ fmap vcat $ mapM blockToMediaWiki blocks
-- | Convert list of Pandoc inline elements to MediaWiki.
-inlineListToMediaWiki :: WriterOptions -> [Inline] -> State WriterState String
-inlineListToMediaWiki opts lst =
- mapM (inlineToMediaWiki opts) lst >>= return . concat
+inlineListToMediaWiki :: [Inline] -> MediaWikiWriter String
+inlineListToMediaWiki lst =
+ fmap concat $ mapM inlineToMediaWiki lst
-- | Convert Pandoc inline element to MediaWiki.
-inlineToMediaWiki :: WriterOptions -> Inline -> State WriterState String
+inlineToMediaWiki :: Inline -> MediaWikiWriter String
-inlineToMediaWiki opts (Span attrs ils) = do
- contents <- inlineListToMediaWiki opts ils
+inlineToMediaWiki (Span attrs ils) = do
+ contents <- inlineListToMediaWiki ils
return $ render Nothing (tagWithAttrs "span" attrs) ++ contents ++ "</span>"
-inlineToMediaWiki opts (Emph lst) = do
- contents <- inlineListToMediaWiki opts lst
+inlineToMediaWiki (Emph lst) = do
+ contents <- inlineListToMediaWiki lst
return $ "''" ++ contents ++ "''"
-inlineToMediaWiki opts (Strong lst) = do
- contents <- inlineListToMediaWiki opts lst
+inlineToMediaWiki (Strong lst) = do
+ contents <- inlineListToMediaWiki lst
return $ "'''" ++ contents ++ "'''"
-inlineToMediaWiki opts (Strikeout lst) = do
- contents <- inlineListToMediaWiki opts lst
+inlineToMediaWiki (Strikeout lst) = do
+ contents <- inlineListToMediaWiki lst
return $ "<s>" ++ contents ++ "</s>"
-inlineToMediaWiki opts (Superscript lst) = do
- contents <- inlineListToMediaWiki opts lst
+inlineToMediaWiki (Superscript lst) = do
+ contents <- inlineListToMediaWiki lst
return $ "<sup>" ++ contents ++ "</sup>"
-inlineToMediaWiki opts (Subscript lst) = do
- contents <- inlineListToMediaWiki opts lst
+inlineToMediaWiki (Subscript lst) = do
+ contents <- inlineListToMediaWiki lst
return $ "<sub>" ++ contents ++ "</sub>"
-inlineToMediaWiki opts (SmallCaps lst) = inlineListToMediaWiki opts lst
+inlineToMediaWiki (SmallCaps lst) = inlineListToMediaWiki lst
-inlineToMediaWiki opts (Quoted SingleQuote lst) = do
- contents <- inlineListToMediaWiki opts lst
+inlineToMediaWiki (Quoted SingleQuote lst) = do
+ contents <- inlineListToMediaWiki lst
return $ "\8216" ++ contents ++ "\8217"
-inlineToMediaWiki opts (Quoted DoubleQuote lst) = do
- contents <- inlineListToMediaWiki opts lst
+inlineToMediaWiki (Quoted DoubleQuote lst) = do
+ contents <- inlineListToMediaWiki lst
return $ "\8220" ++ contents ++ "\8221"
-inlineToMediaWiki opts (Cite _ lst) = inlineListToMediaWiki opts lst
+inlineToMediaWiki (Cite _ lst) = inlineListToMediaWiki lst
-inlineToMediaWiki _ (Code _ str) =
- return $ "<code>" ++ (escapeString str) ++ "</code>"
+inlineToMediaWiki (Code _ str) =
+ return $ "<code>" ++ escapeString str ++ "</code>"
-inlineToMediaWiki _ (Str str) = return $ escapeString str
+inlineToMediaWiki (Str str) = return $ escapeString str
-inlineToMediaWiki _ (Math _ str) = return $ "<math>" ++ str ++ "</math>"
- -- note: str should NOT be escaped
+inlineToMediaWiki (Math _ str) = return $ "<math>" ++ str ++ "</math>"
+ -- note: str should NOT be escaped
-inlineToMediaWiki _ (RawInline f str)
+inlineToMediaWiki (RawInline f str)
| f == Format "mediawiki" = return str
| f == Format "html" = return str
| otherwise = return ""
-inlineToMediaWiki _ (LineBreak) = return "<br />"
+inlineToMediaWiki (LineBreak) = return "<br />"
-inlineToMediaWiki _ Space = return " "
+inlineToMediaWiki Space = return " "
-inlineToMediaWiki opts (Link txt (src, _)) = do
- label <- inlineListToMediaWiki opts txt
+inlineToMediaWiki (Link txt (src, _)) = do
+ label <- inlineListToMediaWiki txt
case txt of
[Str s] | escapeURI s == src -> return src
- _ -> if isURI src
- then return $ "[" ++ src ++ " " ++ label ++ "]"
- else return $ "[[" ++ src' ++ "|" ++ label ++ "]]"
+ _ -> return $ if isURI src
+ then "[" ++ src ++ " " ++ label ++ "]"
+ else "[[" ++ src' ++ "|" ++ label ++ "]]"
where src' = case src of
'/':xs -> xs -- with leading / it's a
_ -> src -- link to a help page
-inlineToMediaWiki opts (Image alt (source, tit)) = do
- alt' <- inlineListToMediaWiki opts alt
- let txt = if (null tit)
+
+inlineToMediaWiki (Image alt (source, tit)) = do
+ alt' <- inlineListToMediaWiki alt
+ let txt = if null tit
then if null alt
then ""
- else "|" ++ alt'
- else "|" ++ tit
+ else '|' : alt'
+ else '|' : tit
return $ "[[Image:" ++ source ++ txt ++ "]]"
-inlineToMediaWiki opts (Note contents) = do
- contents' <- blockListToMediaWiki opts contents
+inlineToMediaWiki (Note contents) = do
+ contents' <- blockListToMediaWiki contents
modify (\s -> s { stNotes = True })
return $ "<ref>" ++ contents' ++ "</ref>"
-- note - may not work for notes with multiple blocks
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index 15f7c8be8..feaa0167c 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -37,8 +37,9 @@ import Text.TeXMath
import qualified Data.ByteString.Lazy as B
import Text.Pandoc.UTF8 ( fromStringLazy )
import Codec.Archive.Zip
+import Control.Applicative ((<$>))
import Text.Pandoc.Options ( WriterOptions(..) )
-import Text.Pandoc.Shared ( stringify, readDataFile, fetchItem, warn )
+import Text.Pandoc.Shared ( stringify, readDataFile, fetchItem', warn )
import Text.Pandoc.ImageSize ( imageSize, sizeInPoints )
import Text.Pandoc.MIME ( getMimeType )
import Text.Pandoc.Definition
@@ -131,7 +132,7 @@ writeODT opts doc@(Pandoc meta _) = do
transformPicMath :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline
transformPicMath opts entriesRef (Image lab (src,_)) = do
- res <- fetchItem (writerSourceURL opts) src
+ res <- fetchItem' (writerMediaBag opts) (writerSourceURL opts) src
case res of
Left (_ :: E.SomeException) -> do
warn $ "Could not find image `" ++ src ++ "', skipping..."
@@ -150,7 +151,7 @@ transformPicMath opts entriesRef (Image lab (src,_)) = do
transformPicMath _ entriesRef (Math t math) = do
entries <- readIORef entriesRef
let dt = if t == InlineMath then DisplayInline else DisplayBlock
- case texMathToMathML dt math of
+ case writeMathML dt <$> readTeX math of
Left _ -> return $ Math t math
Right r -> do
let conf = useShortEmptyTags (const False) defaultConfigPP
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index b6da2694c..773d142f4 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -380,7 +380,7 @@ inlineToOpenDocument o ils
| SmallCaps l <- ils = withTextStyle SmallC $ inlinesToOpenDocument o l
| Quoted t l <- ils = inQuotes t <$> inlinesToOpenDocument o l
| Code _ s <- ils = withTextStyle Pre $ inTextStyle $ preformatted s
- | Math t s <- ils = inlinesToOpenDocument o (readTeXMath' t s)
+ | Math t s <- ils = inlinesToOpenDocument o (texMathToInlines t s)
| Cite _ l <- ils = inlinesToOpenDocument o l
| RawInline f s <- ils = if f == Format "opendocument"
then return $ text s
@@ -504,7 +504,7 @@ paraStyle parent attrs = do
tight = if t then [ ("fo:margin-top" , "0in" )
, ("fo:margin-bottom" , "0in" )]
else []
- indent = if (i /= 0 || b)
+ indent = if (i /= 0 || b)
then [ ("fo:margin-left" , indentVal)
, ("fo:margin-right" , "0in" )
, ("fo:text-indent" , "0in" )
@@ -534,7 +534,7 @@ paraTableStyles t s (a:xs)
[ ("fo:text-align", x)
, ("style:justify-single-word", "false")]
-data TextStyle = Italic | Bold | Strike | Sub | Sup | SmallC | Pre
+data TextStyle = Italic | Bold | Strike | Sub | Sup | SmallC | Pre
deriving ( Eq,Ord )
textStyleAttr :: TextStyle -> [(String,String)]
diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
index 87046537c..414883b29 100644
--- a/src/Text/Pandoc/Writers/Org.hs
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -238,6 +238,8 @@ inlineListToOrg lst = mapM inlineToOrg lst >>= return . hcat
-- | Convert Pandoc inline element to Org.
inlineToOrg :: Inline -> State WriterState Doc
+inlineToOrg (Span (uid, [], []) []) =
+ return $ "<<" <> text uid <> ">>"
inlineToOrg (Span _ lst) =
inlineListToOrg lst
inlineToOrg (Emph lst) = do
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index 31c97349b..57ebfc360 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -37,7 +37,8 @@ import Text.Pandoc.Shared
import Text.Pandoc.Writers.Shared
import Text.Pandoc.Templates (renderTemplate')
import Text.Pandoc.Builder (deleteMeta)
-import Data.List ( isPrefixOf, intersperse, transpose )
+import Data.Maybe (fromMaybe)
+import Data.List ( isPrefixOf, stripPrefix, intersperse, transpose )
import Network.URI (isURI)
import Text.Pandoc.Pretty
import Control.Monad.State
@@ -174,7 +175,7 @@ blockToRST (Para [Image txt (src,'f':'i':'g':':':tit)]) = do
let alt = ":alt: " <> if null tit then capt else text tit
return $ hang 3 ".. " $ fig $$ alt $+$ capt $$ blankline
blockToRST (Para inlines)
- | LineBreak `elem` inlines = do -- use line block if LineBreaks
+ | LineBreak `elem` inlines = do -- use line block if LineBreaks
lns <- mapM inlineListToRST $ splitBy (==LineBreak) inlines
return $ (vcat $ map (text "| " <>) lns) <> blankline
| otherwise = do
@@ -401,7 +402,7 @@ inlineToRST (Link [Str str] (src, _))
if "mailto:" `isPrefixOf` src
then src == escapeURI ("mailto:" ++ str)
else src == escapeURI str = do
- let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
+ let srcSuffix = fromMaybe src (stripPrefix "mailto:" src)
return $ text srcSuffix
inlineToRST (Link [Image alt (imgsrc,imgtit)] (src, _tit)) = do
label <- registerImage alt (imgsrc,imgtit) (Just src)
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
index e0428aaa8..43405ce3c 100644
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -36,50 +36,64 @@ import Text.Pandoc.Readers.TeXMath
import Text.Pandoc.Templates (renderTemplate')
import Text.Pandoc.Walk
import Data.List ( isSuffixOf, intercalate )
-import Data.Char ( ord, chr, isDigit, toLower )
-import System.FilePath ( takeExtension )
+import Data.Char ( ord, chr, isDigit )
import qualified Data.ByteString as B
+import qualified Data.Map as M
import Text.Printf ( printf )
-import Network.URI ( isURI, unEscapeString )
-import qualified Control.Exception as E
+import Text.Pandoc.ImageSize
--- | Convert Image inlines into a raw RTF embedded image, read from a file.
+-- | Convert Image inlines into a raw RTF embedded image, read from a file,
+-- or a MediaBag, or the internet.
-- If file not found or filetype not jpeg or png, leave the inline unchanged.
-rtfEmbedImage :: Inline -> IO Inline
-rtfEmbedImage x@(Image _ (src,_)) = do
- let ext = map toLower (takeExtension src)
- if ext `elem` [".jpg",".jpeg",".png"] && not (isURI src)
- then do
- let src' = unEscapeString src
- imgdata <- E.catch (B.readFile src')
- (\e -> let _ = (e :: E.SomeException) in return B.empty)
- let bytes = map (printf "%02x") $ B.unpack imgdata
- let filetype = case ext of
- ".jpg" -> "\\jpegblip"
- ".jpeg" -> "\\jpegblip"
- ".png" -> "\\pngblip"
- _ -> error "Unknown file type"
- let raw = "{\\pict" ++ filetype ++ " " ++ concat bytes ++ "}"
- return $ if B.null imgdata
- then x
- else RawInline (Format "rtf") raw
- else return x
-rtfEmbedImage x = return x
+rtfEmbedImage :: WriterOptions -> Inline -> IO Inline
+rtfEmbedImage opts x@(Image _ (src,_)) = do
+ result <- fetchItem' (writerMediaBag opts) (writerSourceURL opts) src
+ case result of
+ Right (imgdata, Just mime)
+ | mime == "image/jpeg" || mime == "image/png" -> do
+ let bytes = map (printf "%02x") $ B.unpack imgdata
+ let filetype = case mime of
+ "image/jpeg" -> "\\jpegblip"
+ "image/png" -> "\\pngblip"
+ _ -> error "Unknown file type"
+ let sizeSpec = case imageSize imgdata of
+ Nothing -> ""
+ Just sz -> "\\picw" ++ show xpx ++
+ "\\pich" ++ show ypx ++
+ "\\picwgoal" ++ show (xpt * 20)
+ ++ "\\pichgoal" ++ show (ypt * 20)
+ -- twip = 1/1440in = 1/20pt
+ where (xpx, ypx) = sizeInPixels sz
+ (xpt, ypt) = sizeInPoints sz
+ let raw = "{\\pict" ++ filetype ++ sizeSpec ++ " " ++
+ concat bytes ++ "}"
+ return $ if B.null imgdata
+ then x
+ else RawInline (Format "rtf") raw
+ _ -> return x
+rtfEmbedImage _ x = return x
-- | Convert Pandoc to a string in rich text format, with
-- images embedded as encoded binary data.
writeRTFWithEmbeddedImages :: WriterOptions -> Pandoc -> IO String
writeRTFWithEmbeddedImages options doc =
- writeRTF options `fmap` walkM rtfEmbedImage doc
+ writeRTF options `fmap` walkM (rtfEmbedImage options) doc
-- | Convert Pandoc to a string in rich text format.
writeRTF :: WriterOptions -> Pandoc -> String
-writeRTF options (Pandoc meta blocks) =
+writeRTF options (Pandoc meta@(Meta metamap) blocks) =
let spacer = not $ all null $ docTitle meta : docDate meta : docAuthors meta
+ toPlain (MetaBlocks [Para ils]) = MetaInlines ils
+ toPlain x = x
+ -- adjust title, author, date so we don't get para inside para
+ meta' = Meta $ M.adjust toPlain "title"
+ . M.adjust toPlain "author"
+ . M.adjust toPlain "date"
+ $ metamap
Just metadata = metaToJSON options
(Just . concatMap (blockToRTF 0 AlignDefault))
(Just . inlineListToRTF)
- meta
+ meta'
body = concatMap (blockToRTF 0 AlignDefault) blocks
isTOCHeader (Header lev _ _) = lev <= writerTOCDepth options
isTOCHeader _ = False
@@ -324,7 +338,7 @@ inlineToRTF (Quoted DoubleQuote lst) =
"\\u8220\"" ++ (inlineListToRTF lst) ++ "\\u8221\""
inlineToRTF (Code _ str) = "{\\f1 " ++ (codeStringToRTF str) ++ "}"
inlineToRTF (Str str) = stringToRTF str
-inlineToRTF (Math t str) = inlineListToRTF $ readTeXMath' t str
+inlineToRTF (Math t str) = inlineListToRTF $ texMathToInlines t str
inlineToRTF (Cite _ lst) = inlineListToRTF lst
inlineToRTF (RawInline f str)
| f == Format "rtf" = str