aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc.hs56
-rw-r--r--src/Text/Pandoc/Compat/Locale.hs9
-rw-r--r--src/Text/Pandoc/Error.hs64
-rw-r--r--src/Text/Pandoc/ImageSize.hs68
-rw-r--r--src/Text/Pandoc/MIME.hs3
-rw-r--r--src/Text/Pandoc/MediaBag.hs16
-rw-r--r--src/Text/Pandoc/Options.hs36
-rw-r--r--src/Text/Pandoc/PDF.hs50
-rw-r--r--src/Text/Pandoc/Parsing.hs119
-rw-r--r--src/Text/Pandoc/Pretty.hs23
-rw-r--r--src/Text/Pandoc/Readers/CommonMark.hs119
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs85
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs70
-rw-r--r--src/Text/Pandoc/Readers/Docx/Lists.hs10
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs258
-rw-r--r--src/Text/Pandoc/Readers/Docx/StyleMap.hs106
-rw-r--r--src/Text/Pandoc/Readers/Docx/Util.hs26
-rw-r--r--src/Text/Pandoc/Readers/EPUB.hs37
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs65
-rw-r--r--src/Text/Pandoc/Readers/Haddock.hs12
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs318
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs784
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs20
-rw-r--r--src/Text/Pandoc/Readers/Native.hs40
-rw-r--r--src/Text/Pandoc/Readers/OPML.hs48
-rw-r--r--src/Text/Pandoc/Readers/Org.hs598
-rw-r--r--src/Text/Pandoc/Readers/RST.hs118
-rw-r--r--src/Text/Pandoc/Readers/TWiki.hs527
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs33
-rw-r--r--src/Text/Pandoc/Readers/Txt2Tags.hs8
-rw-r--r--src/Text/Pandoc/SelfContained.hs23
-rw-r--r--src/Text/Pandoc/Shared.hs42
-rw-r--r--src/Text/Pandoc/Templates.hs5
-rw-r--r--src/Text/Pandoc/Writers/AsciiDoc.hs6
-rw-r--r--src/Text/Pandoc/Writers/CommonMark.hs178
-rw-r--r--src/Text/Pandoc/Writers/ConTeXt.hs24
-rw-r--r--src/Text/Pandoc/Writers/Custom.hs16
-rw-r--r--src/Text/Pandoc/Writers/Docbook.hs3
-rw-r--r--src/Text/Pandoc/Writers/Docx.hs440
-rw-r--r--src/Text/Pandoc/Writers/DokuWiki.hs16
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs136
-rw-r--r--src/Text/Pandoc/Writers/FB2.hs2
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs215
-rw-r--r--src/Text/Pandoc/Writers/ICML.hs14
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs84
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs118
-rw-r--r--src/Text/Pandoc/Writers/MediaWiki.hs8
-rw-r--r--src/Text/Pandoc/Writers/ODT.hs16
-rw-r--r--src/Text/Pandoc/Writers/OPML.hs3
-rw-r--r--src/Text/Pandoc/Writers/OpenDocument.hs15
-rw-r--r--src/Text/Pandoc/Writers/RST.hs82
-rw-r--r--src/Text/Pandoc/Writers/RTF.hs4
-rw-r--r--src/Text/Pandoc/Writers/Texinfo.hs6
53 files changed, 3592 insertions, 1590 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index fd849316b..dd361f8d7 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -37,10 +37,12 @@ inline links:
> module Main where
> import Text.Pandoc
+> import Text.Pandoc.Error (handleError)
>
> markdownToRST :: String -> String
-> markdownToRST =
-> (writeRST def {writerReferenceLinks = True}) . readMarkdown def
+> markdownToRST = handleError .
+> writeRST def {writerReferenceLinks = True} .
+> readMarkdown def
>
> main = getContents >>= putStrLn . markdownToRST
@@ -66,6 +68,7 @@ module Text.Pandoc
, mkStringReader
, readDocx
, readMarkdown
+ , readCommonMark
, readMediaWiki
, readRST
, readOrg
@@ -77,6 +80,7 @@ module Text.Pandoc
, readHaddock
, readNative
, readJSON
+ , readTWiki
, readTxt2Tags
, readTxt2TagsNoMacros
, readEPUB
@@ -108,6 +112,7 @@ module Text.Pandoc
, writeOrg
, writeAsciiDoc
, writeHaddock
+ , writeCommonMark
, writeCustom
-- * Rendering templates and default templates
, module Text.Pandoc.Templates
@@ -123,6 +128,7 @@ import Text.Pandoc.Definition
import Text.Pandoc.Generic
import Text.Pandoc.JSON
import Text.Pandoc.Readers.Markdown
+import Text.Pandoc.Readers.CommonMark
import Text.Pandoc.Readers.MediaWiki
import Text.Pandoc.Readers.RST
import Text.Pandoc.Readers.Org
@@ -133,6 +139,7 @@ import Text.Pandoc.Readers.HTML
import Text.Pandoc.Readers.Textile
import Text.Pandoc.Readers.Native
import Text.Pandoc.Readers.Haddock
+import Text.Pandoc.Readers.TWiki
import Text.Pandoc.Readers.Docx
import Text.Pandoc.Readers.Txt2Tags
import Text.Pandoc.Readers.EPUB
@@ -159,11 +166,13 @@ import Text.Pandoc.Writers.Textile
import Text.Pandoc.Writers.Org
import Text.Pandoc.Writers.AsciiDoc
import Text.Pandoc.Writers.Haddock
+import Text.Pandoc.Writers.CommonMark
import Text.Pandoc.Writers.Custom
import Text.Pandoc.Templates
import Text.Pandoc.Options
-import Text.Pandoc.Shared (safeRead, warn)
+import Text.Pandoc.Shared (safeRead, warn, mapLeft)
import Text.Pandoc.MediaBag (MediaBag)
+import Text.Pandoc.Error
import Data.Aeson
import qualified Data.ByteString.Lazy as BL
import Data.List (intercalate)
@@ -199,32 +208,35 @@ parseFormatSpec = parse formatSpec ""
'-' -> Set.delete ext
_ -> Set.insert ext
--- auxiliary function for readers:
-markdown :: ReaderOptions -> String -> IO Pandoc
-markdown o s = do
- let (doc, warnings) = readMarkdownWithWarnings o s
- mapM_ warn warnings
- return doc
-data Reader = StringReader (ReaderOptions -> String -> IO Pandoc)
- | ByteStringReader (ReaderOptions -> BL.ByteString -> IO (Pandoc, MediaBag))
+data Reader = StringReader (ReaderOptions -> String -> IO (Either PandocError Pandoc))
+ | ByteStringReader (ReaderOptions -> BL.ByteString -> IO (Either PandocError (Pandoc,MediaBag)))
-mkStringReader :: (ReaderOptions -> String -> Pandoc) -> Reader
+mkStringReader :: (ReaderOptions -> String -> (Either PandocError Pandoc)) -> Reader
mkStringReader r = StringReader (\o s -> return $ r o s)
-mkBSReader :: (ReaderOptions -> BL.ByteString -> (Pandoc, MediaBag)) -> Reader
+mkStringReaderWithWarnings :: (ReaderOptions -> String -> Either PandocError (Pandoc, [String])) -> Reader
+mkStringReaderWithWarnings r = StringReader $ \o s -> do
+ case r o s of
+ Left err -> return $ Left err
+ Right (doc, warnings) -> do
+ mapM_ warn warnings
+ return (Right doc)
+
+mkBSReader :: (ReaderOptions -> BL.ByteString -> (Either PandocError (Pandoc, MediaBag))) -> Reader
mkBSReader r = ByteStringReader (\o s -> return $ r o s)
-- | Association list of formats and readers.
readers :: [(String, Reader)]
readers = [ ("native" , StringReader $ \_ s -> return $ readNative s)
,("json" , mkStringReader readJSON )
- ,("markdown" , StringReader markdown)
- ,("markdown_strict" , StringReader markdown)
- ,("markdown_phpextra" , StringReader markdown)
- ,("markdown_github" , StringReader markdown)
- ,("markdown_mmd", StringReader markdown)
- ,("rst" , mkStringReader readRST )
+ ,("markdown" , mkStringReaderWithWarnings readMarkdownWithWarnings)
+ ,("markdown_strict" , mkStringReaderWithWarnings readMarkdownWithWarnings)
+ ,("markdown_phpextra" , mkStringReaderWithWarnings readMarkdownWithWarnings)
+ ,("markdown_github" , mkStringReaderWithWarnings readMarkdownWithWarnings)
+ ,("markdown_mmd", mkStringReaderWithWarnings readMarkdownWithWarnings)
+ ,("commonmark" , mkStringReader readCommonMark)
+ ,("rst" , mkStringReaderWithWarnings readRSTWithWarnings )
,("mediawiki" , mkStringReader readMediaWiki)
,("docbook" , mkStringReader readDocBook)
,("opml" , mkStringReader readOPML)
@@ -233,6 +245,7 @@ readers = [ ("native" , StringReader $ \_ s -> return $ readNative s)
,("html" , mkStringReader readHtml)
,("latex" , mkStringReader readLaTeX)
,("haddock" , mkStringReader readHaddock)
+ ,("twiki" , mkStringReader readTWiki)
,("docx" , mkBSReader readDocx)
,("t2t" , mkStringReader readTxt2TagsNoMacros)
,("epub" , mkBSReader readEPUB)
@@ -294,6 +307,7 @@ writers = [
,("org" , PureStringWriter writeOrg)
,("asciidoc" , PureStringWriter writeAsciiDoc)
,("haddock" , PureStringWriter writeHaddock)
+ ,("commonmark" , PureStringWriter writeCommonMark)
]
getDefaultExtensions :: String -> Set Extension
@@ -355,8 +369,8 @@ class ToJSONFilter a => ToJsonFilter a
where toJsonFilter :: a -> IO ()
toJsonFilter = toJSONFilter
-readJSON :: ReaderOptions -> String -> Pandoc
-readJSON _ = either error id . eitherDecode' . UTF8.fromStringLazy
+readJSON :: ReaderOptions -> String -> Either PandocError Pandoc
+readJSON _ = mapLeft ParseFailure . eitherDecode' . UTF8.fromStringLazy
writeJSON :: WriterOptions -> Pandoc -> String
writeJSON _ = UTF8.toStringLazy . encode
diff --git a/src/Text/Pandoc/Compat/Locale.hs b/src/Text/Pandoc/Compat/Locale.hs
new file mode 100644
index 000000000..ac791136c
--- /dev/null
+++ b/src/Text/Pandoc/Compat/Locale.hs
@@ -0,0 +1,9 @@
+{-# LANGUAGE CPP #-}
+module Text.Pandoc.Compat.Locale ( defaultTimeLocale )
+where
+
+#if MIN_VERSION_time(1,5,0)
+import Data.Time.Format ( defaultTimeLocale )
+#else
+import System.Locale ( defaultTimeLocale )
+#endif
diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs
new file mode 100644
index 000000000..73d1e8f08
--- /dev/null
+++ b/src/Text/Pandoc/Error.hs
@@ -0,0 +1,64 @@
+{-
+Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+{- |
+ Module : Text.Pandoc.Error
+ Copyright : Copyright (C) 2006-2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+This module provides a standard way to deal with possible errors encounted
+during parsing.
+
+-}
+module Text.Pandoc.Error (PandocError(..), handleError) where
+
+import Text.Parsec.Error
+import Text.Parsec.Pos hiding (Line)
+import Text.Pandoc.Compat.Except
+
+type Input = String
+
+data PandocError = -- | Generic parse failure
+ ParseFailure String
+ -- | Error thrown by a Parsec parser
+ | ParsecError Input ParseError
+ deriving (Show)
+
+
+instance Error PandocError where
+ strMsg = ParseFailure
+
+
+-- | An unsafe method to handle `PandocError`s.
+handleError :: Either PandocError a -> a
+handleError (Right r) = r
+handleError (Left err) =
+ case err of
+ ParseFailure string -> error string
+ ParsecError input err' ->
+ let errPos = errorPos err'
+ errLine = sourceLine errPos
+ errColumn = sourceColumn errPos
+ theline = (lines input ++ [""]) !! (errLine - 1)
+ in error $ "\nError at " ++ show err' ++ "\n" ++
+ theline ++ "\n" ++ replicate (errColumn - 1) ' ' ++
+ "^"
+
diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
index 68b34dcf3..8f0a991ba 100644
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE OverloadedStrings, ScopedTypeVariables #-}
+{-# OPTIONS_GHC -fno-warn-type-defaults #-}
{-
Copyright (C) 2011-2014 John MacFarlane <jgm@berkeley.edu>
@@ -38,8 +39,11 @@ import Control.Monad
import Data.Bits
import Data.Binary
import Data.Binary.Get
-import Text.Pandoc.Shared (safeRead)
+import Text.Pandoc.Shared (safeRead, hush)
import qualified Data.Map as M
+import Text.Pandoc.Compat.Except
+import Control.Monad.Trans
+import Data.Maybe (fromMaybe)
-- quick and dirty functions to get image sizes
-- algorithms borrowed from wwwis.pl
@@ -64,7 +68,7 @@ imageType img = case B.take 4 img of
"%!PS"
| (B.take 4 $ B.drop 1 $ B.dropWhile (/=' ') img) == "EPSF"
-> return Eps
- _ -> fail "Unknown image type"
+ _ -> (hush . Left) "Unknown image type"
imageSize :: ByteString -> Maybe ImageSize
imageSize img = do
@@ -114,7 +118,7 @@ pngSize img = do
([w1,w2,w3,w4,h1,h2,h3,h4] :: [Integer]) -> return
((shift w1 24) + (shift w2 16) + (shift w3 8) + w4,
(shift h1 24) + (shift h2 16) + (shift h3 8) + h4)
- _ -> fail "PNG parse error"
+ _ -> (hush . Left) "PNG parse error"
let (dpix, dpiy) = findpHYs rest''
return $ ImageSize { pxX = x, pxY = y, dpiX = dpix, dpiY = dpiy }
@@ -143,7 +147,7 @@ gifSize img = do
dpiX = 72,
dpiY = 72
}
- _ -> fail "GIF parse error"
+ _ -> (hush . Left) "GIF parse error"
jpegSize :: ByteString -> Maybe ImageSize
jpegSize img = do
@@ -174,36 +178,37 @@ findJfifSize bs = do
Just (c,bs'') | c >= '\xc0' && c <= '\xc3' -> do
case map fromIntegral $ unpack $ B.take 4 $ B.drop 3 bs'' of
[h1,h2,w1,w2] -> return (shift w1 8 + w2, shift h1 8 + h2)
- _ -> fail "JPEG parse error"
+ _ -> (hush . Left) "JPEG parse error"
Just (_,bs'') -> do
case map fromIntegral $ unpack $ B.take 2 bs'' of
[c1,c2] -> do
let len = shift c1 8 + c2
-- skip variables
findJfifSize $ B.drop len bs''
- _ -> fail "JPEG parse error"
- Nothing -> fail "Did not find length record"
+ _ -> (hush . Left) "JPEG parse error"
+ Nothing -> (hush . Left) "Did not find length record"
exifSize :: ByteString -> Maybe ImageSize
-exifSize bs = runGet (Just <$> exifHeader bl) bl
+exifSize bs = hush . runGet header $ bl
where bl = BL.fromChunks [bs]
+ header = runExceptT $ exifHeader bl
-- NOTE: It would be nicer to do
-- runGet ((Just <$> exifHeader) <|> return Nothing)
-- which would prevent pandoc from raising an error when an exif header can't
-- be parsed. But we only get an Alternative instance for Get in binary 0.6,
-- and binary 0.5 ships with ghc 7.6.
-exifHeader :: BL.ByteString -> Get ImageSize
+exifHeader :: BL.ByteString -> ExceptT String Get ImageSize
exifHeader hdr = do
- _app1DataSize <- getWord16be
- exifHdr <- getWord32be
- unless (exifHdr == 0x45786966) $ fail "Did not find exif header"
- zeros <- getWord16be
- unless (zeros == 0) $ fail "Expected zeros after exif header"
+ _app1DataSize <- lift getWord16be
+ exifHdr <- lift getWord32be
+ unless (exifHdr == 0x45786966) $ throwError "Did not find exif header"
+ zeros <- lift getWord16be
+ unless (zeros == 0) $ throwError "Expected zeros after exif header"
-- beginning of tiff header -- we read whole thing to use
-- in getting data from offsets:
let tiffHeader = BL.drop 8 hdr
- byteAlign <- getWord16be
+ byteAlign <- lift getWord16be
let bigEndian = byteAlign == 0x4d4d
let (getWord16, getWord32, getWord64) =
if bigEndian
@@ -213,17 +218,17 @@ exifHeader hdr = do
num <- getWord32
den <- getWord32
return $ fromIntegral num / fromIntegral den
- tagmark <- getWord16
- unless (tagmark == 0x002a) $ fail "Failed alignment sanity check"
- ifdOffset <- getWord32
- skip (fromIntegral ifdOffset - 8) -- skip to IDF
- numentries <- getWord16
- let ifdEntry = do
- tag <- getWord16 >>= \t ->
- maybe (return UnknownTagType) return
- (M.lookup t tagTypeTable)
- dataFormat <- getWord16
- numComponents <- getWord32
+ tagmark <- lift getWord16
+ unless (tagmark == 0x002a) $ throwError "Failed alignment sanity check"
+ ifdOffset <- lift getWord32
+ lift $ skip (fromIntegral ifdOffset - 8) -- skip to IDF
+ numentries <- lift getWord16
+ let ifdEntry :: ExceptT String Get (TagType, DataFormat)
+ ifdEntry = do
+ tag <- fromMaybe UnknownTagType . flip M.lookup tagTypeTable
+ <$> lift getWord16
+ dataFormat <- lift getWord16
+ numComponents <- lift getWord32
(fmt, bytesPerComponent) <-
case dataFormat of
1 -> return (UnsignedByte . runGet getWord8, 1)
@@ -238,9 +243,10 @@ exifHeader hdr = do
10 -> return (SignedRational . runGet getRational, 8)
11 -> return (SingleFloat . runGet getWord32 {- TODO -}, 4)
12 -> return (DoubleFloat . runGet getWord64 {- TODO -}, 8)
- _ -> fail $ "Unknown data format " ++ show dataFormat
+ _ -> throwError $ "Unknown data format " ++ show dataFormat
let totalBytes = fromIntegral $ numComponents * bytesPerComponent
- payload <- if totalBytes <= 4 -- data is right here
+ payload <- lift $
+ if totalBytes <= 4 -- data is right here
then fmt <$>
(getLazyByteString (fromIntegral totalBytes) <*
skip (4 - totalBytes))
@@ -252,9 +258,9 @@ exifHeader hdr = do
entries <- sequence $ replicate (fromIntegral numentries) ifdEntry
subentries <- case lookup ExifOffset entries of
Just (UnsignedLong offset) -> do
- pos <- bytesRead
- skip (fromIntegral offset - (fromIntegral pos - 8))
- numsubentries <- getWord16
+ pos <- lift bytesRead
+ lift $ skip (fromIntegral offset - (fromIntegral pos - 8))
+ numsubentries <- lift getWord16
sequence $
replicate (fromIntegral numsubentries) ifdEntry
_ -> return []
diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs
index 3b3b3b5b3..2fdba93e0 100644
--- a/src/Text/Pandoc/MIME.hs
+++ b/src/Text/Pandoc/MIME.hs
@@ -328,7 +328,7 @@ mimeTypesList = -- List borrowed from happstack-server.
,("oth","application/vnd.oasis.opendocument.text-web")
,("otp","application/vnd.oasis.opendocument.presentation-template")
,("ots","application/vnd.oasis.opendocument.spreadsheet-template")
- ,("otf","application/x-font-opentype")
+ ,("otf","application/vnd.ms-opentype")
,("ott","application/vnd.oasis.opendocument.text-template")
,("oza","application/x-oz-application")
,("p","text/x-pascal")
@@ -477,6 +477,7 @@ mimeTypesList = -- List borrowed from happstack-server.
,("vrml","model/vrml")
,("vs","text/plain")
,("vsd","application/vnd.visio")
+ ,("vtt","text/vtt")
,("wad","application/x-doom")
,("wav","audio/x-wav")
,("wax","audio/x-ms-wax")
diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs
index 5921b56cf..1246cdc8f 100644
--- a/src/Text/Pandoc/MediaBag.hs
+++ b/src/Text/Pandoc/MediaBag.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving, DeriveDataTypeable #-}
{-
Copyright (C) 2014 John MacFarlane <jgm@berkeley.edu>
@@ -46,13 +46,15 @@ import Text.Pandoc.MIME (MimeType, getMimeTypeDef)
import qualified Text.Pandoc.UTF8 as UTF8
import Data.Maybe (fromMaybe)
import System.IO (stderr)
+import Data.Data (Data)
+import Data.Typeable (Typeable)
-- | A container for a collection of binary resources, with names and
-- mime types. Note that a 'MediaBag' is a Monoid, so 'mempty'
-- can be used for an empty 'MediaBag', and '<>' can be used to append
-- two 'MediaBag's.
-newtype MediaBag = MediaBag (M.Map String (MimeType, BL.ByteString))
- deriving (Monoid)
+newtype MediaBag = MediaBag (M.Map [String] (MimeType, BL.ByteString))
+ deriving (Monoid, Data, Typeable)
instance Show MediaBag where
show bag = "MediaBag " ++ show (mediaDirectory bag)
@@ -65,7 +67,7 @@ insertMedia :: FilePath -- ^ relative path and canonical name of resource
-> MediaBag
-> MediaBag
insertMedia fp mbMime contents (MediaBag mediamap) =
- MediaBag (M.insert fp (mime, contents) mediamap)
+ MediaBag (M.insert (splitPath fp) (mime, contents) mediamap)
where mime = fromMaybe fallback mbMime
fallback = case takeExtension fp of
".gz" -> getMimeTypeDef $ dropExtension fp
@@ -75,14 +77,14 @@ insertMedia fp mbMime contents (MediaBag mediamap) =
lookupMedia :: FilePath
-> MediaBag
-> Maybe (MimeType, BL.ByteString)
-lookupMedia fp (MediaBag mediamap) = M.lookup fp mediamap
+lookupMedia fp (MediaBag mediamap) = M.lookup (splitPath fp) mediamap
-- | Get a list of the file paths stored in a 'MediaBag', with
-- their corresponding mime types and the lengths in bytes of the contents.
mediaDirectory :: MediaBag -> [(String, MimeType, Int)]
mediaDirectory (MediaBag mediamap) =
M.foldWithKey (\fp (mime,contents) ->
- ((fp, mime, fromIntegral $ BL.length contents):)) [] mediamap
+ (((joinPath fp), mime, fromIntegral $ BL.length contents):)) [] mediamap
-- | Extract contents of MediaBag to a given directory. Print informational
-- messages if 'verbose' is true.
@@ -93,7 +95,7 @@ extractMediaBag :: Bool
extractMediaBag verbose dir (MediaBag mediamap) = do
sequence_ $ M.foldWithKey
(\fp (_ ,contents) ->
- ((writeMedia verbose dir (fp, contents)):)) [] mediamap
+ ((writeMedia verbose dir (joinPath fp, contents)):)) [] mediamap
writeMedia :: Bool -> FilePath -> (FilePath, BL.ByteString) -> IO ()
writeMedia verbose dir (subpath, bs) = do
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index 84ccbbdc9..1776d14e5 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE DeriveDataTypeable #-}
{-
Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu>
@@ -51,6 +52,8 @@ import Data.Default
import Text.Pandoc.Highlighting (Style, pygments)
import Text.Pandoc.MediaBag (MediaBag)
import Data.Monoid
+import Data.Data (Data)
+import Data.Typeable (Typeable)
-- | Individually selectable syntax extensions.
data Extension =
@@ -74,7 +77,7 @@ data Extension =
| Ext_latex_macros -- ^ Parse LaTeX macro definitions (for math only)
| Ext_fenced_code_blocks -- ^ Parse fenced code blocks
| Ext_fenced_code_attributes -- ^ Allow attributes on fenced code blocks
- | Ext_backtick_code_blocks -- ^ Github style ``` code blocks
+ | Ext_backtick_code_blocks -- ^ GitHub style ``` code blocks
| Ext_inline_code_attributes -- ^ Allow attributes on inline code
| Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks
| Ext_native_divs -- ^ Use Div blocks for contents of <div> tags
@@ -109,7 +112,8 @@ data Extension =
| Ext_implicit_header_references -- ^ Implicit reference links for headers
| Ext_line_blocks -- ^ RST style line blocks
| Ext_epub_html_exts -- ^ Recognise the EPUB extended version of HTML
- deriving (Show, Read, Enum, Eq, Ord, Bounded)
+ | Ext_shortcut_reference_links -- ^ Shortcut reference links
+ deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable)
pandocExtensions :: Set Extension
pandocExtensions = Set.fromList
@@ -151,6 +155,7 @@ pandocExtensions = Set.fromList
, Ext_header_attributes
, Ext_implicit_header_references
, Ext_line_blocks
+ , Ext_shortcut_reference_links
]
phpMarkdownExtraExtensions :: Set Extension
@@ -164,6 +169,7 @@ phpMarkdownExtraExtensions = Set.fromList
, Ext_intraword_underscores
, Ext_header_attributes
, Ext_abbreviations
+ , Ext_shortcut_reference_links
]
githubMarkdownExtensions :: Set Extension
@@ -180,6 +186,7 @@ githubMarkdownExtensions = Set.fromList
, Ext_strikeout
, Ext_hard_line_breaks
, Ext_lists_without_preceding_blankline
+ , Ext_shortcut_reference_links
]
multimarkdownExtensions :: Set Extension
@@ -202,7 +209,9 @@ multimarkdownExtensions = Set.fromList
strictExtensions :: Set Extension
strictExtensions = Set.fromList
- [ Ext_raw_html ]
+ [ Ext_raw_html
+ , Ext_shortcut_reference_links
+ ]
data ReaderOptions = ReaderOptions{
readerExtensions :: Set Extension -- ^ Syntax extensions
@@ -220,7 +229,7 @@ data ReaderOptions = ReaderOptions{
, readerDefaultImageExtension :: String -- ^ Default extension for images
, readerTrace :: Bool -- ^ Print debugging info
, readerTrackChanges :: TrackChanges
-} deriving (Show, Read)
+} deriving (Show, Read, Data, Typeable)
instance Default ReaderOptions
where def = ReaderOptions{
@@ -242,7 +251,7 @@ instance Default ReaderOptions
-- Writer options
--
-data EPUBVersion = EPUB2 | EPUB3 deriving (Eq, Show, Read)
+data EPUBVersion = EPUB2 | EPUB3 deriving (Eq, Show, Read, Data, Typeable)
data HTMLMathMethod = PlainMath
| LaTeXMathML (Maybe String) -- url of LaTeXMathML.js
@@ -251,18 +260,19 @@ data HTMLMathMethod = PlainMath
| WebTeX String -- url of TeX->image script.
| MathML (Maybe String) -- url of MathMLinHTML.js
| MathJax String -- url of MathJax.js
- deriving (Show, Read, Eq)
+ | KaTeX String String -- url of stylesheet and katex.js
+ deriving (Show, Read, Eq, Data, Typeable)
data CiteMethod = Citeproc -- use citeproc to render them
| Natbib -- output natbib cite commands
| Biblatex -- output biblatex cite commands
- deriving (Show, Read, Eq)
+ deriving (Show, Read, Eq, Data, Typeable)
-- | Methods for obfuscating email addresses in HTML.
data ObfuscationMethod = NoObfuscation
| ReferenceObfuscation
| JavascriptObfuscation
- deriving (Show, Read, Eq)
+ deriving (Show, Read, Eq, Data, Typeable)
-- | Varieties of HTML slide shows.
data HTMLSlideVariant = S5Slides
@@ -271,13 +281,13 @@ data HTMLSlideVariant = S5Slides
| DZSlides
| RevealJsSlides
| NoSlides
- deriving (Show, Read, Eq)
+ deriving (Show, Read, Eq, Data, Typeable)
-- | Options for accepting or rejecting MS Word track-changes.
data TrackChanges = AcceptChanges
| RejectChanges
| AllChanges
- deriving (Show, Read, Eq)
+ deriving (Show, Read, Eq, Data, Typeable)
-- | Options for writers
data WriterOptions = WriterOptions
@@ -322,7 +332,9 @@ data WriterOptions = WriterOptions
, writerReferenceODT :: Maybe FilePath -- ^ Path to reference ODT if specified
, writerReferenceDocx :: Maybe FilePath -- ^ Path to reference DOCX if specified
, writerMediaBag :: MediaBag -- ^ Media collected by docx or epub reader
- } deriving Show
+ , writerVerbose :: Bool -- ^ Verbose debugging output
+ , writerLaTeXArgs :: [String] -- ^ Flags to pass to latex-engine
+ } deriving (Show, Data, Typeable)
instance Default WriterOptions where
def = WriterOptions { writerStandalone = False
@@ -365,6 +377,8 @@ instance Default WriterOptions where
, writerReferenceODT = Nothing
, writerReferenceDocx = Nothing
, writerMediaBag = mempty
+ , writerVerbose = False
+ , writerLaTeXArgs = []
}
-- | Returns True if the given extension is enabled.
diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index d5f7c609d..2d602a0df 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -36,10 +36,11 @@ import qualified Data.ByteString.Lazy.Char8 as BC
import qualified Data.ByteString as BS
import System.Exit (ExitCode (..))
import System.FilePath
+import System.IO (stderr, stdout)
import System.Directory
import Data.Digest.Pure.SHA (showDigest, sha1)
import System.Environment
-import Control.Monad (unless, (<=<))
+import Control.Monad (unless, when, (<=<))
import qualified Control.Exception as E
import Control.Applicative ((<$))
import Data.List (isInfixOf)
@@ -70,7 +71,8 @@ makePDF :: String -- ^ pdf creator (pdflatex, lualatex, xelatex)
makePDF program writer opts doc = withTempDir "tex2pdf." $ \tmpdir -> do
doc' <- handleImages opts tmpdir doc
let source = writer opts doc'
- tex2pdf' tmpdir program source
+ args = writerLaTeXArgs opts
+ tex2pdf' (writerVerbose opts) args tmpdir program source
handleImages :: WriterOptions
-> FilePath -- ^ temp dir to store images
@@ -106,8 +108,7 @@ convertImages tmpdir (Image ils (src, tit)) = do
img <- convertImage tmpdir src
newPath <-
case img of
- Left e -> src <$
- warn ("Unable to convert image `" ++ src ++ "':\n" ++ e)
+ Left e -> src <$ warn e
Right fp -> return fp
return (Image ils (newPath, tit))
convertImages _ x = return x
@@ -121,7 +122,8 @@ convertImage tmpdir fname =
Just "application/pdf" -> doNothing
_ -> JP.readImage fname >>= \res ->
case res of
- Left msg -> return $ Left msg
+ Left _ -> return $ Left $ "Unable to convert `" ++
+ fname ++ "' for use with pdflatex."
Right img ->
E.catch (Right fileOut <$ JP.savePngImage fileOut img) $
\(e :: E.SomeException) -> return (Left (show e))
@@ -130,22 +132,25 @@ convertImage tmpdir fname =
mime = getMimeType fname
doNothing = return (Right fname)
-tex2pdf' :: FilePath -- ^ temp directory for output
+tex2pdf' :: Bool -- ^ Verbose output
+ -> [String] -- ^ Arguments to the latex-engine
+ -> FilePath -- ^ temp directory for output
-> String -- ^ tex program
-> String -- ^ tex source
-> IO (Either ByteString ByteString)
-tex2pdf' tmpDir program source = do
+tex2pdf' verbose args tmpDir program source = do
let numruns = if "\\tableofcontents" `isInfixOf` source
then 3 -- to get page numbers
else 2 -- 1 run won't give you PDF bookmarks
- (exit, log', mbPdf) <- runTeXProgram program numruns tmpDir source
+ (exit, log', mbPdf) <- runTeXProgram verbose program args 1 numruns tmpDir source
case (exit, mbPdf) of
(ExitFailure _, _) -> do
let logmsg = extractMsg log'
let extramsg =
case logmsg of
- x | "! Package inputenc Error" `BC.isPrefixOf` x ->
- "\nTry running pandoc with --latex-engine=xelatex."
+ x | ("! Package inputenc Error" `BC.isPrefixOf` x
+ && program /= "xelatex")
+ -> "\nTry running pandoc with --latex-engine=xelatex."
_ -> ""
return $ Left $ logmsg <> extramsg
(ExitSuccess, Nothing) -> return $ Left ""
@@ -170,9 +175,9 @@ extractMsg log' = do
-- Run a TeX program on an input bytestring and return (exit code,
-- contents of stdout, contents of produced PDF if any). Rerun
-- a fixed number of times to resolve references.
-runTeXProgram :: String -> Int -> FilePath -> String
+runTeXProgram :: Bool -> String -> [String] -> Int -> Int -> FilePath -> String
-> IO (ExitCode, ByteString, Maybe ByteString)
-runTeXProgram program runsLeft tmpDir source = do
+runTeXProgram verbose program args runNumber numRuns tmpDir source = do
let file = tmpDir </> "input.tex"
exists <- doesFileExist file
unless exists $ UTF8.writeFile file source
@@ -185,16 +190,31 @@ runTeXProgram program runsLeft tmpDir source = do
let file' = file
#endif
let programArgs = ["-halt-on-error", "-interaction", "nonstopmode",
- "-output-directory", tmpDir', file']
+ "-output-directory", tmpDir', file'] ++ args
env' <- getEnvironment
let sep = searchPathSeparator:[]
let texinputs = maybe (tmpDir' ++ sep) ((tmpDir' ++ sep) ++)
$ lookup "TEXINPUTS" env'
let env'' = ("TEXINPUTS", texinputs) :
[(k,v) | (k,v) <- env', k /= "TEXINPUTS"]
+ when (verbose && runNumber == 1) $ do
+ putStrLn $ "[makePDF] Command line:"
+ putStrLn $ program ++ " " ++ unwords (map show programArgs)
+ putStr "\n"
+ putStrLn $ "[makePDF] Environment:"
+ mapM_ print env''
+ putStr "\n"
+ putStrLn $ "[makePDF] Contents of " ++ file' ++ ":"
+ B.readFile file' >>= B.putStr
+ putStr "\n"
(exit, out, err) <- pipeProcess (Just env'') program programArgs BL.empty
- if runsLeft > 1
- then runTeXProgram program (runsLeft - 1) tmpDir source
+ when verbose $ do
+ putStrLn $ "[makePDF] Run #" ++ show runNumber
+ B.hPutStr stdout out
+ B.hPutStr stderr err
+ putStr "\n"
+ if runNumber <= numRuns
+ then runTeXProgram verbose program args (runNumber + 1) numRuns tmpDir source
else do
let pdfFile = replaceDirectory (replaceExtension file ".pdf") tmpDir
pdfExists <- doesFileExist pdfFile
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index d1fba1e21..33120e55d 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -65,6 +65,8 @@ module Text.Pandoc.Parsing ( anyLine,
widthsFromIndices,
gridTableWith,
readWith,
+ returnWarnings,
+ returnState,
readWithM,
testStringWith,
guardEnabled,
@@ -103,11 +105,8 @@ module Text.Pandoc.Parsing ( anyLine,
applyMacros',
Parser,
ParserT,
- F(..),
- runF,
- askF,
- asksF,
token,
+ generalize,
-- * Re-exports from Text.Pandoc.Parsec
Stream,
runParser,
@@ -162,6 +161,8 @@ module Text.Pandoc.Parsing ( anyLine,
setSourceColumn,
setSourceLine,
newPos,
+ addWarning,
+ (<+?>)
)
where
@@ -186,30 +187,16 @@ import Data.Default
import qualified Data.Set as Set
import Control.Monad.Reader
import Control.Monad.Identity
-import Control.Applicative ((<$>), (<*>), (*>), (<*), (<$), Applicative)
+import Control.Applicative ((<$>), (<*>), (*>), (<*), (<$))
import Data.Monoid
import Data.Maybe (catMaybes)
+import Text.Pandoc.Error
+
type Parser t s = Parsec t s
type ParserT = ParsecT
-newtype F a = F { unF :: Reader ParserState a } deriving (Monad, Applicative, Functor)
-
-runF :: F a -> ParserState -> a
-runF = runReader . unF
-
-askF :: F ParserState
-askF = F ask
-
-asksF :: (ParserState -> a) -> F a
-asksF f = F $ asks f
-
-instance Monoid a => Monoid (F a) where
- mempty = return mempty
- mappend = liftM2 mappend
- mconcat = liftM mconcat . sequence
-
-- | Parse any line of text
anyLine :: Stream [Char] m Char => ParserT [Char] st m [Char]
anyLine = do
@@ -312,12 +299,14 @@ stringAnyCase (x:xs) = do
return (firstChar:rest)
-- | Parse contents of 'str' using 'parser' and return result.
-parseFromString :: Stream s m t => ParserT s st m a -> s -> ParserT s st m a
+parseFromString :: Monad m => ParserT String st m a -> String -> ParserT String st m a
parseFromString parser str = do
oldPos <- getPosition
oldInput <- getInput
setInput str
result <- parser
+ spaces
+ eof
setInput oldInput
setPosition oldPos
return result
@@ -452,7 +441,7 @@ uri = try $ do
let percentEscaped = try $ char '%' >> skipMany1 (satisfy isHexDigit)
let entity = () <$ characterReference
let punct = skipMany1 (char ',')
- <|> () <$ (satisfy (\c -> not (isSpace c) && c /= '<'))
+ <|> () <$ (satisfy (\c -> not (isSpace c) && c /= '<' && c /= '>'))
let uriChunk = skipMany1 wordChar
<|> percentEscaped
<|> entity
@@ -472,7 +461,12 @@ mathInlineWith op cl = try $ do
string op
notFollowedBy space
words' <- many1Till (count 1 (noneOf " \t\n\\")
- <|> (char '\\' >> anyChar >>= \c -> return ['\\',c])
+ <|> (char '\\' >>
+ -- This next clause is needed because \text{..} can
+ -- contain $, \(\), etc.
+ (try (string "text" >>
+ (("\\text" ++) <$> inBalancedBraces 0 ""))
+ <|> (\c -> ['\\',c]) <$> anyChar))
<|> do (blankline <* notFollowedBy' blankline) <|>
(oneOf " \t" <* skipMany (oneOf " \t"))
notFollowedBy (char '$')
@@ -480,6 +474,23 @@ mathInlineWith op cl = try $ do
) (try $ string cl)
notFollowedBy digit -- to prevent capture of $5
return $ concat words'
+ where
+ inBalancedBraces :: Stream s m Char => Int -> String -> ParserT s st m String
+ inBalancedBraces 0 "" = do
+ c <- anyChar
+ if c == '{'
+ then inBalancedBraces 1 "{"
+ else mzero
+ inBalancedBraces 0 s = return $ reverse s
+ inBalancedBraces numOpen ('\\':xs) = do
+ c <- anyChar
+ inBalancedBraces numOpen (c:'\\':xs)
+ inBalancedBraces numOpen xs = do
+ c <- anyChar
+ case c of
+ '}' -> inBalancedBraces (numOpen - 1) (c:xs)
+ '{' -> inBalancedBraces (numOpen + 1) (c:xs)
+ _ -> inBalancedBraces numOpen (c:xs)
mathDisplayWith :: Stream s m Char => String -> String -> ParserT s st m String
mathDisplayWith op cl = try $ do
@@ -837,27 +848,30 @@ readWithM :: (Monad m, Functor m)
=> ParserT [Char] st m a -- ^ parser
-> st -- ^ initial state
-> String -- ^ input
- -> m a
+ -> m (Either PandocError a)
readWithM parser state input =
- handleError <$> (runParserT parser state "source" input)
- where
- handleError (Left err') =
- let errPos = errorPos err'
- errLine = sourceLine errPos
- errColumn = sourceColumn errPos
- theline = (lines input ++ [""]) !! (errLine - 1)
- in error $ "\nError at " ++ show err' ++ "\n" ++
- theline ++ "\n" ++ replicate (errColumn - 1) ' ' ++
- "^"
- handleError (Right result) = result
+ mapLeft (ParsecError input) <$> runParserT parser state "source" input
+
-- | Parse a string with a given parser and state
readWith :: Parser [Char] st a
-> st
-> String
- -> a
+ -> Either PandocError a
readWith p t inp = runIdentity $ readWithM p t inp
+returnWarnings :: (Stream s m c)
+ => ParserT s ParserState m a
+ -> ParserT s ParserState m (a, [String])
+returnWarnings p = do
+ doc <- p
+ warnings <- stateWarnings <$> getState
+ return (doc, warnings)
+
+-- | Return the final internal state with the result of a parser
+returnState :: (Stream s m c) => ParsecT s st m a -> ParsecT s st m (a, st)
+returnState p = (,) <$> p <*> getState
+
-- | Parse a string with @parser@ (for testing).
testStringWith :: (Show a, Stream [Char] Identity Char)
=> ParserT [Char] ParserState Identity a
@@ -879,7 +893,6 @@ data ParserState = ParserState
stateNotes :: NoteTable, -- ^ List of notes (raw bodies)
stateNotes' :: NoteTable', -- ^ List of notes (parsed bodies)
stateMeta :: Meta, -- ^ Document metadata
- stateMeta' :: F Meta, -- ^ Document metadata
stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used
stateHeaders :: M.Map Inlines String, -- ^ List of headers and ids (used for implicit ref links)
stateIdentifiers :: [String], -- ^ List of header identifiers used
@@ -888,14 +901,14 @@ data ParserState = ParserState
stateHasChapters :: Bool, -- ^ True if \chapter encountered
stateMacros :: [Macro], -- ^ List of macros defined so far
stateRstDefaultRole :: String, -- ^ Current rST default interpreted text role
- stateRstCustomRoles :: M.Map String (String, Maybe String, Attr -> (String, Attr)), -- ^ Current rST custom text roles
+ stateRstCustomRoles :: M.Map String (String, Maybe String, Attr), -- ^ Current rST custom text roles
-- Triple represents: 1) Base role, 2) Optional format (only for :raw:
- -- roles), 3) Source language annotation for code (could be used to
- -- annotate role classes too).
+ -- roles), 3) Additional classes (rest of Attr is unused)).
stateCaption :: Maybe Inlines, -- ^ Caption in current environment
stateInHtmlBlock :: Maybe String, -- ^ Tag type of HTML block being parsed
stateMarkdownAttribute :: Bool, -- ^ True if in markdown=1 context
- stateWarnings :: [String] -- ^ Warnings generated by the parser
+ stateWarnings :: [String], -- ^ Warnings generated by the parser
+ stateInFootnote :: Bool -- ^ True if in a footnote block.
}
instance Default ParserState where
@@ -977,7 +990,6 @@ defaultParserState =
stateNotes = [],
stateNotes' = [],
stateMeta = nullMeta,
- stateMeta' = return nullMeta,
stateHeaderTable = [],
stateHeaders = M.empty,
stateIdentifiers = [],
@@ -990,7 +1002,8 @@ defaultParserState =
stateCaption = Nothing,
stateInHtmlBlock = Nothing,
stateMarkdownAttribute = False,
- stateWarnings = []}
+ stateWarnings = [],
+ stateInFootnote = False }
-- | Succeed only if the extension is enabled.
guardEnabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m ()
@@ -1029,7 +1042,7 @@ data QuoteContext
type NoteTable = [(String, String)]
-type NoteTable' = [(String, F Blocks)] -- used in markdown reader
+type NoteTable' = [(String, Blocks)] -- used in markdown reader
newtype Key = Key String deriving (Show, Read, Eq, Ord)
@@ -1178,7 +1191,7 @@ citeKey = try $ do
guard =<< notAfterString
suppress_author <- option False (char '-' *> return True)
char '@'
- firstChar <- letter <|> char '_'
+ firstChar <- alphaNum <|> char '_'
let regchar = satisfy (\c -> isAlphaNum c || c == '_')
let internal p = try $ p <* lookAhead regchar
rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/")
@@ -1223,3 +1236,17 @@ applyMacros' target = do
then do macros <- extractMacros <$> getState
return $ applyMacros macros target
else return target
+
+-- | Append a warning to the log.
+addWarning :: (Stream s m c) => Maybe SourcePos -> String -> ParserT s ParserState m ()
+addWarning mbpos msg =
+ updateState $ \st -> st{
+ stateWarnings = (msg ++ maybe "" (\pos -> " " ++ show pos) mbpos) :
+ stateWarnings st }
+
+generalize :: (Monad m) => Parser s st a -> ParserT s st m a
+generalize m = mkPT (\ s -> (return $ (return . runIdentity) <$> runIdentity (runParsecT m s)))
+
+infixr 5 <+?>
+(<+?>) :: (Monoid a, Monad m) => ParserT s st m a -> ParserT s st m a -> ParserT s st m a
+a <+?> b = a >>= flip fmap (try b <|> return mempty) . (<>)
diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs
index 1e72c2040..9a97dfc21 100644
--- a/src/Text/Pandoc/Pretty.hs
+++ b/src/Text/Pandoc/Pretty.hs
@@ -250,6 +250,11 @@ renderDoc :: (IsString a, Monoid a)
=> Doc -> DocState a
renderDoc = renderList . toList . unDoc
+data IsBlock = IsBlock Int [String]
+
+-- This would be nicer with a pattern synonym
+-- pattern VBlock i s <- mkIsBlock -> Just (IsBlock ..)
+
renderList :: (IsString a, Monoid a)
=> [D] -> DocState a
renderList [] = return ()
@@ -286,6 +291,9 @@ renderList (BlankLines num : xs) = do
| otherwise -> replicateM_ (1 + num - newlines st) (outp (-1) "\n")
renderList xs
+renderList (CarriageReturn : BlankLines m : xs) =
+ renderList (BlankLines m : xs)
+
renderList (CarriageReturn : xs) = do
st <- get
if newlines st > 0 || null xs
@@ -320,11 +328,11 @@ renderList (BreakingSpace : xs) = do
outp 1 " "
renderList xs'
-renderList (b1@Block{} : b2@Block{} : xs) =
- renderList (mergeBlocks False b1 b2 : xs)
+renderList (Block i1 s1 : Block i2 s2 : xs) =
+ renderList (mergeBlocks False (IsBlock i1 s1) (IsBlock i2 s2) : xs)
-renderList (b1@Block{} : BreakingSpace : b2@Block{} : xs) =
- renderList (mergeBlocks True b1 b2 : xs)
+renderList (Block i1 s1 : BreakingSpace : Block i2 s2 : xs) =
+ renderList (mergeBlocks True (IsBlock i1 s1) (IsBlock i2 s2) : xs)
renderList (Block width lns : xs) = do
st <- get
@@ -336,15 +344,14 @@ renderList (Block width lns : xs) = do
modify $ \s -> s{ prefix = oldPref }
renderList xs
-mergeBlocks :: Bool -> D -> D -> D
-mergeBlocks addSpace (Block w1 lns1) (Block w2 lns2) =
+mergeBlocks :: Bool -> IsBlock -> IsBlock -> D
+mergeBlocks addSpace (IsBlock w1 lns1) (IsBlock w2 lns2) =
Block (w1 + w2 + if addSpace then 1 else 0) $
zipWith (\l1 l2 -> pad w1 l1 ++ l2) (lns1 ++ empties) (map sp lns2 ++ empties)
where empties = replicate (abs $ length lns1 - length lns2) ""
pad n s = s ++ replicate (n - realLength s) ' '
sp "" = ""
sp xs = if addSpace then (' ' : xs) else xs
-mergeBlocks _ _ _ = error "mergeBlocks tried on non-Block!"
blockToDoc :: Int -> [String] -> Doc
blockToDoc _ lns = text $ intercalate "\n" lns
@@ -531,4 +538,4 @@ charWidth c =
-- | Get real length of string, taking into account combining and double-wide
-- characters.
realLength :: String -> Int
-realLength = sum . map charWidth
+realLength = foldr (\a b -> charWidth a + b) 0
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
new file mode 100644
index 000000000..51a35c8ad
--- /dev/null
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -0,0 +1,119 @@
+{-
+Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.CommonMark
+ Copyright : Copyright (C) 2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of CommonMark-formatted plain text to 'Pandoc' document.
+
+CommonMark is a strongly specified variant of Markdown: http://commonmark.org.
+-}
+module Text.Pandoc.Readers.CommonMark (readCommonMark)
+where
+
+import CMark
+import Data.Text (unpack, pack)
+import Data.List (groupBy)
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Error
+
+-- | Parse a CommonMark formatted string into a 'Pandoc' structure.
+readCommonMark :: ReaderOptions -> String -> Either PandocError Pandoc
+readCommonMark opts = Right . nodeToPandoc . commonmarkToNode opts' . pack
+ where opts' = if readerSmart opts
+ then [optNormalize, optSmart]
+ else [optNormalize]
+
+nodeToPandoc :: Node -> Pandoc
+nodeToPandoc (Node _ DOCUMENT nodes) =
+ Pandoc nullMeta $ foldr addBlock [] nodes
+nodeToPandoc n = -- shouldn't happen
+ Pandoc nullMeta $ foldr addBlock [] [n]
+
+addBlocks :: [Node] -> [Block]
+addBlocks = foldr addBlock []
+
+addBlock :: Node -> [Block] -> [Block]
+addBlock (Node _ PARAGRAPH nodes) =
+ (Para (addInlines nodes) :)
+addBlock (Node _ HRULE _) =
+ (HorizontalRule :)
+addBlock (Node _ BLOCK_QUOTE nodes) =
+ (BlockQuote (addBlocks nodes) :)
+addBlock (Node _ (HTML t) _) =
+ (RawBlock (Format "html") (unpack t) :)
+addBlock (Node _ (CODE_BLOCK info t) _) =
+ (CodeBlock ("", take 1 (words (unpack info)), []) (unpack t) :)
+addBlock (Node _ (HEADER lev) nodes) =
+ (Header lev ("",[],[]) (addInlines nodes) :)
+addBlock (Node _ (LIST listAttrs) nodes) =
+ (constructor (map (setTightness . addBlocks . children) nodes) :)
+ where constructor = case listType listAttrs of
+ BULLET_LIST -> BulletList
+ ORDERED_LIST -> OrderedList
+ (start, DefaultStyle, delim)
+ start = listStart listAttrs
+ setTightness = if listTight listAttrs
+ then map paraToPlain
+ else id
+ paraToPlain (Para xs) = Plain (xs)
+ paraToPlain x = x
+ delim = case listDelim listAttrs of
+ PERIOD_DELIM -> Period
+ PAREN_DELIM -> OneParen
+addBlock (Node _ ITEM _) = id -- handled in LIST
+addBlock _ = id
+
+children :: Node -> [Node]
+children (Node _ _ ns) = ns
+
+addInlines :: [Node] -> [Inline]
+addInlines = foldr addInline []
+
+addInline :: Node -> [Inline] -> [Inline]
+addInline (Node _ (TEXT t) _) = (map toinl clumps ++)
+ where raw = unpack t
+ clumps = groupBy samekind raw
+ samekind ' ' ' ' = True
+ samekind ' ' _ = False
+ samekind _ ' ' = False
+ samekind _ _ = True
+ toinl (' ':_) = Space
+ toinl xs = Str xs
+addInline (Node _ LINEBREAK _) = (LineBreak :)
+addInline (Node _ SOFTBREAK _) = (Space :)
+addInline (Node _ (INLINE_HTML t) _) =
+ (RawInline (Format "html") (unpack t) :)
+addInline (Node _ (CODE t) _) =
+ (Code ("",[],[]) (unpack t) :)
+addInline (Node _ EMPH nodes) =
+ (Emph (addInlines nodes) :)
+addInline (Node _ STRONG nodes) =
+ (Strong (addInlines nodes) :)
+addInline (Node _ (LINK url title) nodes) =
+ (Link (addInlines nodes) (unpack url, unpack title) :)
+addInline (Node _ (IMAGE url title) nodes) =
+ (Image (addInlines nodes) (unpack url, unpack title) :)
+addInline _ = id
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 59ff3e717..98a142840 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -15,6 +15,9 @@ import Control.Applicative ((<$>))
import Data.List (intersperse)
import Data.Maybe (fromMaybe)
import Text.TeXMath (readMathML, writeTeX)
+import Text.Pandoc.Error (PandocError)
+import Text.Pandoc.Compat.Except
+import Data.Default
{-
@@ -70,8 +73,8 @@ List of all DocBook tags, with [x] indicating implemented,
[x] book - A book
[x] bookinfo - Meta-information for a Book
[x] bridgehead - A free-floating heading
-[ ] callout - A “called out” description of a marked Area
-[ ] calloutlist - A list of Callouts
+[x] callout - A “called out” description of a marked Area
+[x] calloutlist - A list of Callouts
[x] caption - A caption
[x] caution - A note of caution
[x] chapter - A chapter, as of a book
@@ -81,7 +84,7 @@ List of all DocBook tags, with [x] indicating implemented,
[ ] citerefentry - A citation to a reference page
[ ] citetitle - The title of a cited work
[ ] city - The name of a city in an address
-[ ] classname - The name of a class, in the object-oriented programming sense
+[x] classname - The name of a class, in the object-oriented programming sense
[ ] classsynopsis - The syntax summary for a class definition
[ ] classsynopsisinfo - Information supplementing the contents of
a ClassSynopsis
@@ -169,9 +172,9 @@ List of all DocBook tags, with [x] indicating implemented,
[ ] guibutton - The text on a button in a GUI
[ ] guiicon - Graphic and/or text appearing as a icon in a GUI
[ ] guilabel - The text of a label in a GUI
-[ ] guimenu - The name of a menu in a GUI
-[ ] guimenuitem - The name of a terminal menu item in a GUI
-[ ] guisubmenu - The name of a submenu in a GUI
+[x] guimenu - The name of a menu in a GUI
+[x] guimenuitem - The name of a terminal menu item in a GUI
+[x] guisubmenu - The name of a submenu in a GUI
[ ] hardware - A physical part of a computer system
[ ] highlights - A summary of the main points of the discussed component
[ ] holder - The name of the individual or organization that holds a copyright
@@ -206,10 +209,10 @@ List of all DocBook tags, with [x] indicating implemented,
other dingbat
[ ] itermset - A set of index terms in the meta-information of a document
[ ] jobtitle - The title of an individual in an organization
-[ ] keycap - The text printed on a key on a keyboard
+[x] keycap - The text printed on a key on a keyboard
[ ] keycode - The internal, frequently numeric, identifier for a key
on a keyboard
-[ ] keycombo - A combination of input actions
+[x] keycombo - A combination of input actions
[ ] keysym - The symbolic name of a key on a keyboard
[ ] keyword - One of a set of keywords describing the content of a document
[ ] keywordset - A set of keywords describing the content of a document
@@ -237,7 +240,7 @@ List of all DocBook tags, with [x] indicating implemented,
[x] mediaobject - A displayed media object (video, audio, image, etc.)
[ ] mediaobjectco - A media object that contains callouts
[x] member - An element of a simple list
-[ ] menuchoice - A selection or series of selections from a menu
+[x] menuchoice - A selection or series of selections from a menu
[ ] methodname - The name of a method
[ ] methodparam - Parameters to a method
[ ] methodsynopsis - A syntax summary for a method
@@ -471,7 +474,7 @@ List of all DocBook tags, with [x] indicating implemented,
[ ] token - A unit of information
[x] tr - A row in an HTML table
[ ] trademark - A trademark
-[ ] type - The classification of a value
+[x] type - The classification of a value
[x] ulink - A link that addresses its target by means of a URL
(Uniform Resource Locator)
[x] uri - A Uniform Resource Identifier
@@ -497,7 +500,7 @@ List of all DocBook tags, with [x] indicating implemented,
[x] ?asciidoc-br? - line break from asciidoc docbook output
-}
-type DB = State DBState
+type DB = ExceptT PandocError (State DBState)
data DBState = DBState{ dbSectionLevel :: Int
, dbQuoteType :: QuoteType
@@ -507,16 +510,18 @@ data DBState = DBState{ dbSectionLevel :: Int
, dbFigureTitle :: Inlines
} deriving Show
-readDocBook :: ReaderOptions -> String -> Pandoc
-readDocBook _ inp = Pandoc (dbMeta st') (toList $ mconcat bs)
- where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp')
- DBState{ dbSectionLevel = 0
- , dbQuoteType = DoubleQuote
- , dbMeta = mempty
- , dbAcceptsMeta = False
- , dbBook = False
- , dbFigureTitle = mempty
- }
+instance Default DBState where
+ def = DBState{ dbSectionLevel = 0
+ , dbQuoteType = DoubleQuote
+ , dbMeta = mempty
+ , dbAcceptsMeta = False
+ , dbBook = False
+ , dbFigureTitle = mempty }
+
+
+readDocBook :: ReaderOptions -> String -> Either PandocError Pandoc
+readDocBook _ inp = (\blocks -> Pandoc (dbMeta st') (toList . mconcat $ blocks)) <$> bs
+ where (bs , st') = flip runState def . runExceptT . mapM parseBlock . normalizeTree . parseXML $ inp'
inp' = handleInstructions inp
-- We treat <?asciidoc-br?> specially (issue #1236), converting it
@@ -603,7 +608,7 @@ isBlockElement (Elem e) = qName (elName e) `elem` blocktags
"important","caution","note","tip","warning","qandadiv",
"question","answer","abstract","itemizedlist","orderedlist",
"variablelist","article","book","table","informaltable",
- "screen","programlisting","example"]
+ "screen","programlisting","example","calloutlist"]
isBlockElement _ = False
-- Trim leading and trailing newline characters
@@ -712,6 +717,7 @@ parseBlock (Elem e) =
"question" -> addToStart (strong (str "Q:") <> str " ") <$> getBlocks e
"answer" -> addToStart (strong (str "A:") <> str " ") <$> getBlocks e
"abstract" -> blockQuote <$> getBlocks e
+ "calloutlist" -> bulletList <$> callouts
"itemizedlist" -> bulletList <$> listitems
"orderedlist" -> do
let listStyle = case attrValue "numeration" e of
@@ -772,11 +778,6 @@ parseBlock (Elem e) =
x -> [x]
return $ codeBlockWith (attrValue "id" e, classes', [])
$ trimNl $ strContentRecursive e
- strContentRecursive = strContent . (\e' -> e'{ elContent =
- map elementToStr $ elContent e' })
- elementToStr :: Content -> Content
- elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
- elementToStr x = x
parseBlockquote = do
attrib <- case filterChild (named "attribution") e of
Nothing -> return mempty
@@ -785,6 +786,7 @@ parseBlock (Elem e) =
contents <- getBlocks e
return $ blockQuote (contents <> attrib)
listitems = mapM getBlocks $ filterChildren (named "listitem") e
+ callouts = mapM getBlocks $ filterChildren (named "callout") e
deflistitems = mapM parseVarListEntry $ filterChildren
(named "varlistentry") e
parseVarListEntry e' = do
@@ -866,18 +868,29 @@ parseBlock (Elem e) =
parseRow = mapM (parseMixed plain . elContent) . filterChildren isEntry
sect n = do isbook <- gets dbBook
let n' = if isbook || n == 0 then n + 1 else n
- headerText <- case filterChild (named "title") e of
+ headerText <- case filterChild (named "title") e `mplus`
+ (filterChild (named "info") e >>=
+ filterChild (named "title")) of
Just t -> getInlines t
Nothing -> return mempty
modify $ \st -> st{ dbSectionLevel = n }
b <- getBlocks e
+ let ident = attrValue "id" e
modify $ \st -> st{ dbSectionLevel = n - 1 }
- return $ header n' headerText <> b
+ return $ headerWith (ident,[],[]) n' headerText <> b
metaBlock = acceptingMetadata (getBlocks e) >> return mempty
getInlines :: Element -> DB Inlines
getInlines e' = (trimInlines . mconcat) <$> (mapM parseInline $ elContent e')
+strContentRecursive :: Element -> String
+strContentRecursive = strContent .
+ (\e' -> e'{ elContent = map elementToStr $ elContent e' })
+
+elementToStr :: Content -> Content
+elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
+elementToStr x = x
+
parseInline :: Content -> DB Inlines
parseInline (Text (CData _ s _)) = return $ text s
parseInline (CRef ref) =
@@ -901,6 +914,7 @@ parseInline (Elem e) =
else doubleQuoted contents
"simplelist" -> simpleList
"segmentedlist" -> segmentedList
+ "classname" -> codeWithLang
"code" -> codeWithLang
"filename" -> codeWithLang
"literal" -> codeWithLang
@@ -920,6 +934,10 @@ parseInline (Elem e) =
"constant" -> codeWithLang
"userinput" -> codeWithLang
"varargs" -> return $ code "(...)"
+ "keycap" -> return (str $ strContent e)
+ "keycombo" -> keycombo <$> (mapM parseInline $ elContent e)
+ "menuchoice" -> menuchoice <$> (mapM parseInline $
+ filter isGuiMenu $ elContent e)
"xref" -> return $ str "?" -- so at least you know something is there
"email" -> return $ link ("mailto:" ++ strContent e) ""
$ str $ strContent e
@@ -959,7 +977,7 @@ parseInline (Elem e) =
let classes' = case attrValue "language" e of
"" -> []
l -> [l]
- return $ codeWith (attrValue "id" e,classes',[]) $ strContent e
+ return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e
simpleList = (mconcat . intersperse (str "," <> space)) <$> mapM getInlines
(filterChildren (named "member") e)
segmentedList = do
@@ -974,3 +992,10 @@ parseInline (Elem e) =
then mempty
else strong tit <> linebreak
return $ linebreak <> tit' <> segs
+ keycombo = spanWith ("",["keycombo"],[]) .
+ mconcat . intersperse (str "+")
+ menuchoice = spanWith ("",["menuchoice"],[]) .
+ mconcat . intersperse (text " > ")
+ isGuiMenu (Elem x) = named "guimenu" x || named "guisubmenu" x ||
+ named "guimenuitem" x
+ isGuiMenu _ = False
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 8ebe59569..67a97ae85 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -84,8 +84,7 @@ import Text.Pandoc.Readers.Docx.Lists
import Text.Pandoc.Readers.Docx.Reducible
import Text.Pandoc.Shared
import Text.Pandoc.MediaBag (insertMedia, MediaBag)
-import Data.Maybe (isJust)
-import Data.List (delete, stripPrefix, (\\), intersect)
+import Data.List (delete, (\\), intersect)
import Data.Monoid
import Text.TeXMath (writeTeX)
import Data.Default (Default)
@@ -97,14 +96,17 @@ import Control.Applicative ((<$>))
import Data.Sequence (ViewL(..), viewl)
import qualified Data.Sequence as Seq (null)
+import Text.Pandoc.Error
+import Text.Pandoc.Compat.Except
+
readDocx :: ReaderOptions
-> B.ByteString
- -> (Pandoc, MediaBag)
+ -> Either PandocError (Pandoc, MediaBag)
readDocx opts bytes =
case archiveToDocx (toArchive bytes) of
- Right docx -> (Pandoc meta blks, mediaBag) where
- (meta, blks, mediaBag) = (docxToOutput opts docx)
- Left _ -> error $ "couldn't parse docx file"
+ Right docx -> (\(meta, blks, mediaBag) -> (Pandoc meta blks, mediaBag))
+ <$> (docxToOutput opts docx)
+ Left _ -> Left (ParseFailure "couldn't parse docx file")
data DState = DState { docxAnchorMap :: M.Map String String
, docxMediaBag :: MediaBag
@@ -123,10 +125,10 @@ data DEnv = DEnv { docxOptions :: ReaderOptions
instance Default DEnv where
def = DEnv def False
-type DocxContext = ReaderT DEnv (State DState)
+type DocxContext = ExceptT PandocError (ReaderT DEnv (State DState))
-evalDocxContext :: DocxContext a -> DEnv -> DState -> a
-evalDocxContext ctx env st = evalState (runReaderT ctx env) st
+evalDocxContext :: DocxContext a -> DEnv -> DState -> Either PandocError a
+evalDocxContext ctx env st = flip evalState st . flip runReaderT env . runExceptT $ ctx
-- This is empty, but we put it in for future-proofing.
spansToKeep :: [String]
@@ -197,9 +199,6 @@ fixAuthors mv = mv
codeStyles :: [String]
codeStyles = ["VerbatimChar"]
-blockQuoteDivs :: [String]
-blockQuoteDivs = ["Quote", "BlockQuote", "BlockQuotation"]
-
codeDivs :: [String]
codeDivs = ["SourceCode"]
@@ -281,7 +280,13 @@ runToInlines :: Run -> DocxContext Inlines
runToInlines (Run rs runElems)
| Just (s, _) <- rStyle rs
, s `elem` codeStyles =
- return $ code $ concatMap runElemToString runElems
+ let rPr = resolveDependentRunStyle rs
+ codeString = code $ concatMap runElemToString runElems
+ in
+ return $ case rVertAlign rPr of
+ Just SupScrpt -> superscript codeString
+ Just SubScrpt -> subscript codeString
+ _ -> codeString
| otherwise = do
let ils = concatReduce (map runElemToInlines runElems)
return $ (runStyleToTransform $ resolveDependentRunStyle rs) ils
@@ -401,7 +406,9 @@ singleParaToPlain blks
singleParaToPlain blks = blks
cellToBlocks :: Cell -> DocxContext Blocks
-cellToBlocks (Cell bps) = concatReduce <$> mapM bodyPartToBlocks bps
+cellToBlocks (Cell bps) = do
+ blks <- concatReduce <$> mapM bodyPartToBlocks bps
+ return $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks
rowToBlocksList :: Row -> DocxContext [Blocks]
rowToBlocksList (Row cells) = do
@@ -427,9 +434,9 @@ parStyleToTransform pPr
let pPr' = pPr { pStyle = cs, indentation = Nothing}
in
(divWith ("", [c], [])) . (parStyleToTransform pPr')
- | (c:cs) <- pStyle pPr
- , c `elem` blockQuoteDivs =
- let pPr' = pPr { pStyle = cs \\ blockQuoteDivs }
+ | (_:cs) <- pStyle pPr
+ , Just True <- pBlockQuote pPr =
+ let pPr' = pPr { pStyle = cs }
in
blockQuote . (parStyleToTransform pPr')
| (_:cs) <- pStyle pPr =
@@ -460,13 +467,11 @@ bodyPartToBlocks (Paragraph pPr parparts)
$ parStyleToTransform pPr
$ codeBlock
$ concatMap parPartToString parparts
- | (c : cs) <- filter (isJust . isHeaderClass) $ pStyle pPr
- , Just n <- isHeaderClass c = do
+ | Just (style, n) <- pHeading pPr = do
ils <- local (\s-> s{docxInHeaderBlock=True}) $
(concatReduce <$> mapM parPartToInlines parparts)
-
makeHeaderAnchor $
- headerWith ("", delete ("Heading" ++ show n) cs, []) n ils
+ headerWith ("", delete style (pStyle pPr), []) n ils
| otherwise = do
ils <- concatReduce <$> mapM parPartToInlines parparts >>=
(return . fromList . trimLineBreaks . normalizeSpaces . toList)
@@ -535,34 +540,21 @@ rewriteLink' l@(Link ils ('#':target, title)) = do
Nothing -> l
rewriteLink' il = return il
-rewriteLink :: Blocks -> DocxContext Blocks
-rewriteLink ils = case viewl $ unMany ils of
- (x :< xs) -> do
- x' <- walkM rewriteLink' x
- xs' <- rewriteLink $ Many xs
- return $ (singleton x') <> xs'
- EmptyL -> return ils
+rewriteLinks :: [Block] -> DocxContext [Block]
+rewriteLinks = mapM (walkM rewriteLink')
bodyToOutput :: Body -> DocxContext (Meta, [Block], MediaBag)
bodyToOutput (Body bps) = do
let (metabps, blkbps) = sepBodyParts bps
meta <- bodyPartsToMeta metabps
blks <- concatReduce <$> mapM bodyPartToBlocks blkbps
- blks' <- rewriteLink blks
+ blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks
mediaBag <- gets docxMediaBag
return $ (meta,
- blocksToDefinitions $ blocksToBullets $ toList blks',
+ blks',
mediaBag)
-docxToOutput :: ReaderOptions -> Docx -> (Meta, [Block], MediaBag)
+docxToOutput :: ReaderOptions -> Docx -> Either PandocError (Meta, [Block], MediaBag)
docxToOutput opts (Docx (Document _ body)) =
let dEnv = def { docxOptions = opts} in
evalDocxContext (bodyToOutput body) dEnv def
-
-isHeaderClass :: String -> Maybe Int
-isHeaderClass s | Just s' <- stripPrefix "Heading" s =
- case reads s' :: [(Int, String)] of
- [] -> Nothing
- ((n, "") : []) -> Just n
- _ -> Nothing
-isHeaderClass _ = Nothing
diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs
index ea195c14a..c265ad074 100644
--- a/src/Text/Pandoc/Readers/Docx/Lists.hs
+++ b/src/Text/Pandoc/Readers/Docx/Lists.hs
@@ -160,8 +160,14 @@ flatToBullets' num xs@(b : elems)
flatToBullets :: [Block] -> [Block]
flatToBullets elems = flatToBullets' (-1) elems
+singleItemHeaderToHeader :: Block -> Block
+singleItemHeaderToHeader (OrderedList _ [[h@(Header _ _ _)]]) = h
+singleItemHeaderToHeader blk = blk
+
+
blocksToBullets :: [Block] -> [Block]
blocksToBullets blks =
+ map singleItemHeaderToHeader $
bottomUp removeListDivs $
flatToBullets $ (handleListParagraphs blks)
@@ -221,7 +227,3 @@ removeListDivs = concatMap removeListDivs'
blocksToDefinitions :: [Block] -> [Block]
blocksToDefinitions = blocksToDefinitions' [] []
-
-
-
-
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index e7a6c3ffb..cce80fb48 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE PatternGuards, ViewPatterns #-}
+{-# LANGUAGE PatternGuards, ViewPatterns, FlexibleInstances #-}
{-
Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu>
@@ -65,7 +65,8 @@ import Text.Pandoc.Compat.Except
import Text.TeXMath.Readers.OMML (readOMML)
import Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..))
import Text.TeXMath (Exp)
-import Data.Char (readLitChar, ord, chr)
+import Text.Pandoc.Readers.Docx.Util
+import Data.Char (readLitChar, ord, chr, isDigit)
data ReaderEnv = ReaderEnv { envNotes :: Notes
, envNumbering :: Numbering
@@ -73,6 +74,7 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes
, envMedia :: Media
, envFont :: Maybe Font
, envCharStyles :: CharStyleMap
+ , envParStyles :: ParStyleMap
}
deriving Show
@@ -107,8 +109,6 @@ mapD f xs =
in
concatMapM handler xs
-type NameSpaces = [(String, String)]
-
data Docx = Docx Document
deriving Show
@@ -122,8 +122,12 @@ type Media = [(FilePath, B.ByteString)]
type CharStyle = (String, RunStyle)
+type ParStyle = (String, ParStyleData)
+
type CharStyleMap = M.Map String RunStyle
+type ParStyleMap = M.Map String ParStyleData
+
data Numbering = Numbering NameSpaces [Numb] [AbstractNumb]
deriving Show
@@ -152,6 +156,9 @@ data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer
data ParagraphStyle = ParagraphStyle { pStyle :: [String]
, indentation :: Maybe ParIndentation
, dropCap :: Bool
+ , pHeading :: Maybe (String, Int)
+ , pNumInfo :: Maybe (String, String)
+ , pBlockQuote :: Maybe Bool
}
deriving Show
@@ -159,6 +166,9 @@ defaultParagraphStyle :: ParagraphStyle
defaultParagraphStyle = ParagraphStyle { pStyle = []
, indentation = Nothing
, dropCap = False
+ , pHeading = Nothing
+ , pNumInfo = Nothing
+ , pBlockQuote = Nothing
}
@@ -213,6 +223,12 @@ data RunStyle = RunStyle { isBold :: Maybe Bool
, rStyle :: Maybe CharStyle}
deriving Show
+data ParStyleData = ParStyleData { headingLev :: Maybe (String, Int)
+ , isBlockQuote :: Maybe Bool
+ , numInfo :: Maybe (String, String)
+ , psStyle :: Maybe ParStyle}
+ deriving Show
+
defaultRunStyle :: RunStyle
defaultRunStyle = RunStyle { isBold = Nothing
, isItalic = Nothing
@@ -232,18 +248,14 @@ type ChangeId = String
type Author = String
type ChangeDate = String
-attrToNSPair :: Attr -> Maybe (String, String)
-attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
-attrToNSPair _ = Nothing
-
archiveToDocx :: Archive -> Either DocxError Docx
archiveToDocx archive = do
let notes = archiveToNotes archive
numbering = archiveToNumbering archive
rels = archiveToRelationships archive
media = archiveToMedia archive
- styles = archiveToStyles archive
- rEnv = ReaderEnv notes numbering rels media Nothing styles
+ (styles, parstyles) = archiveToStyles archive
+ rEnv = ReaderEnv notes numbering rels media Nothing styles parstyles
doc <- runD (archiveToDocument archive) rEnv
return $ Docx doc
@@ -252,7 +264,7 @@ archiveToDocument :: Archive -> D Document
archiveToDocument zf = do
entry <- maybeToD $ findEntryByPath "word/document.xml" zf
docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
- let namespaces = mapMaybe attrToNSPair (elAttribs docElem)
+ let namespaces = elemToNameSpaces docElem
bodyElem <- maybeToD $ findChild (elemName namespaces "w" "body") docElem
body <- elemToBody namespaces bodyElem
return $ Document namespaces body
@@ -263,47 +275,69 @@ elemToBody ns element | isElem ns "w" "body" element =
(\bps -> return $ Body bps)
elemToBody _ _ = throwError WrongElem
-archiveToStyles :: Archive -> CharStyleMap
+archiveToStyles :: Archive -> (CharStyleMap, ParStyleMap)
archiveToStyles zf =
let stylesElem = findEntryByPath "word/styles.xml" zf >>=
(parseXMLDoc . UTF8.toStringLazy . fromEntry)
in
case stylesElem of
- Nothing -> M.empty
+ Nothing -> (M.empty, M.empty)
Just styElem ->
- let namespaces = mapMaybe attrToNSPair (elAttribs styElem)
+ let namespaces = elemToNameSpaces styElem
in
- M.fromList $ buildBasedOnList namespaces styElem Nothing
+ ( M.fromList $ buildBasedOnList namespaces styElem
+ (Nothing :: Maybe CharStyle),
+ M.fromList $ buildBasedOnList namespaces styElem
+ (Nothing :: Maybe ParStyle) )
-isBasedOnStyle :: NameSpaces -> Element -> Maybe CharStyle -> Bool
+isBasedOnStyle :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> Bool
isBasedOnStyle ns element parentStyle
| isElem ns "w" "style" element
- , Just "character" <- findAttr (elemName ns "w" "type") element
+ , Just styleType <- findAttr (elemName ns "w" "type") element
+ , styleType == cStyleType parentStyle
, Just basedOnVal <- findChild (elemName ns "w" "basedOn") element >>=
findAttr (elemName ns "w" "val")
- , Just (parentId, _) <- parentStyle = (basedOnVal == parentId)
+ , Just ps <- parentStyle = (basedOnVal == getStyleId ps)
| isElem ns "w" "style" element
- , Just "character" <- findAttr (elemName ns "w" "type") element
+ , Just styleType <- findAttr (elemName ns "w" "type") element
+ , styleType == cStyleType parentStyle
, Nothing <- findChild (elemName ns "w" "basedOn") element
, Nothing <- parentStyle = True
| otherwise = False
-elemToCharStyle :: NameSpaces -> Element -> Maybe CharStyle -> Maybe CharStyle
-elemToCharStyle ns element parentStyle
- | isElem ns "w" "style" element
- , Just "character" <- findAttr (elemName ns "w" "type") element
- , Just styleId <- findAttr (elemName ns "w" "styleId") element =
- Just (styleId, elemToRunStyle ns element parentStyle)
- | otherwise = Nothing
-
-getStyleChildren :: NameSpaces -> Element -> Maybe CharStyle -> [CharStyle]
+class ElemToStyle a where
+ cStyleType :: Maybe a -> String
+ elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a
+ getStyleId :: a -> String
+
+instance ElemToStyle CharStyle where
+ cStyleType _ = "character"
+ elemToStyle ns element parentStyle
+ | isElem ns "w" "style" element
+ , Just "character" <- findAttr (elemName ns "w" "type") element
+ , Just styleId <- findAttr (elemName ns "w" "styleId") element =
+ Just (styleId, elemToRunStyle ns element parentStyle)
+ | otherwise = Nothing
+ getStyleId s = fst s
+
+instance ElemToStyle ParStyle where
+ cStyleType _ = "paragraph"
+ elemToStyle ns element parentStyle
+ | isElem ns "w" "style" element
+ , Just "paragraph" <- findAttr (elemName ns "w" "type") element
+ , Just styleId <- findAttr (elemName ns "w" "styleId") element =
+ Just (styleId, elemToParStyleData ns element parentStyle)
+ | otherwise = Nothing
+ getStyleId s = fst s
+
+getStyleChildren :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a]
getStyleChildren ns element parentStyle
| isElem ns "w" "styles" element =
- mapMaybe (\e -> elemToCharStyle ns e parentStyle) $
+ mapMaybe (\e -> elemToStyle ns e parentStyle) $
filterChildren (\e' -> isBasedOnStyle ns e' parentStyle) element
| otherwise = []
-buildBasedOnList :: NameSpaces -> Element -> Maybe CharStyle -> [CharStyle]
+buildBasedOnList :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a]
buildBasedOnList ns element rootStyle =
case (getStyleChildren ns element rootStyle) of
[] -> []
@@ -317,10 +351,10 @@ archiveToNotes zf =
enElem = findEntryByPath "word/endnotes.xml" zf
>>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
fn_namespaces = case fnElem of
- Just e -> mapMaybe attrToNSPair (elAttribs e)
+ Just e -> elemToNameSpaces e
Nothing -> []
en_namespaces = case enElem of
- Just e -> mapMaybe attrToNSPair (elAttribs e)
+ Just e -> elemToNameSpaces e
Nothing -> []
ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
fn = fnElem >>= (elemToNotes ns "footnote")
@@ -420,7 +454,7 @@ archiveToNumbering' zf = do
Nothing -> Just $ Numbering [] [] []
Just entry -> do
numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
- let namespaces = mapMaybe attrToNSPair (elAttribs numberingElem)
+ let namespaces = elemToNameSpaces numberingElem
numElems = findChildren
(QName "num" (lookup "w" namespaces) (Just "w"))
numberingElem
@@ -449,15 +483,6 @@ elemToNotes _ _ _ = Nothing
---------------------------------------------
---------------------------------------------
-elemName :: NameSpaces -> String -> String -> QName
-elemName ns prefix name = (QName name (lookup prefix ns) (Just prefix))
-
-isElem :: NameSpaces -> String -> String -> Element -> Bool
-isElem ns prefix name element =
- qName (elName element) == name &&
- qURI (elName element) == (lookup prefix ns)
-
-
elemToTblGrid :: NameSpaces -> Element -> D TblGrid
elemToTblGrid ns element | isElem ns "w" "tblGrid" element =
let cols = findChildren (elemName ns "w" "gridCol") element
@@ -510,20 +535,6 @@ elemToParIndentation ns element | isElem ns "w" "ind" element =
stringToInteger}
elemToParIndentation _ _ = Nothing
-
-elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String)
-elemToNumInfo ns element | isElem ns "w" "p" element = do
- let pPr = findChild (elemName ns "w" "pPr") element
- numPr = pPr >>= findChild (elemName ns "w" "numPr")
- lvl <- numPr >>=
- findChild (elemName ns "w" "ilvl") >>=
- findAttr (elemName ns "w" "val")
- numId <- numPr >>=
- findChild (elemName ns "w" "numId") >>=
- findAttr (elemName ns "w" "val")
- return (numId, lvl)
-elemToNumInfo _ _ = Nothing
-
testBitMask :: String -> Int -> Bool
testBitMask bitMaskS n =
case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of
@@ -542,18 +553,28 @@ elemToBodyPart ns element
return $ OMathPara expsLst
elemToBodyPart ns element
| isElem ns "w" "p" element
- , Just (numId, lvl) <- elemToNumInfo ns element = do
- let parstyle = elemToParagraphStyle ns element
+ , Just (numId, lvl) <- getNumInfo ns element = do
+ sty <- asks envParStyles
+ let parstyle = elemToParagraphStyle ns element sty
parparts <- mapD (elemToParPart ns) (elChildren element)
num <- asks envNumbering
case lookupLevel numId lvl num of
- Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts
- Nothing -> throwError WrongElem
+ Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts
+ Nothing -> throwError WrongElem
elemToBodyPart ns element
| isElem ns "w" "p" element = do
- let parstyle = elemToParagraphStyle ns element
- parparts <- mapD (elemToParPart ns) (elChildren element)
- return $ Paragraph parstyle parparts
+ sty <- asks envParStyles
+ let parstyle = elemToParagraphStyle ns element sty
+ parparts <- mapD (elemToParPart ns) (elChildren element)
+ case pNumInfo parstyle of
+ Just (numId, lvl) -> do
+ num <- asks envNumbering
+ case lookupLevel numId lvl num of
+ Just levelInfo ->
+ return $ ListItem parstyle numId lvl levelInfo parparts
+ Nothing ->
+ throwError WrongElem
+ Nothing -> return $ Paragraph parstyle parparts
elemToBodyPart ns element
| isElem ns "w" "tbl" element = do
let caption' = findChild (elemName ns "w" "tblPr") element
@@ -584,7 +605,7 @@ expandDrawingId s = do
target <- asks (lookupRelationship s . envRelationships)
case target of
Just filepath -> do
- bytes <- asks (lookup (combine "word" filepath) . envMedia)
+ bytes <- asks (lookup ("word/" ++ filepath) . envMedia)
case bytes of
Just bs -> return (filepath, bs)
Nothing -> throwError DocxError
@@ -601,6 +622,16 @@ elemToParPart ns element
case drawing of
Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp bs)
Nothing -> throwError WrongElem
+-- The below is an attempt to deal with images in deprecated vml format.
+elemToParPart ns element
+ | isElem ns "w" "r" element
+ , Just _ <- findChild (elemName ns "w" "pict") element =
+ let drawing = findElement (elemName ns "v" "imagedata") element
+ >>= findAttr (elemName ns "r" "id")
+ in
+ case drawing of
+ Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp bs)
+ Nothing -> throwError WrongElem
elemToParPart ns element
| isElem ns "w" "r" element =
elemToRun ns element >>= (\r -> return $ PlainRun r)
@@ -625,17 +656,20 @@ elemToParPart ns element
return $ BookMark bmId bmName
elemToParPart ns element
| isElem ns "w" "hyperlink" element
- , Just anchor <- findAttr (elemName ns "w" "anchor") element = do
+ , Just relId <- findAttr (elemName ns "r" "id") element = do
runs <- mapD (elemToRun ns) (elChildren element)
- return $ InternalHyperLink anchor runs
+ rels <- asks envRelationships
+ case lookupRelationship relId rels of
+ Just target -> do
+ case findAttr (elemName ns "w" "anchor") element of
+ Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs
+ Nothing -> return $ ExternalHyperLink target runs
+ Nothing -> return $ ExternalHyperLink "" runs
elemToParPart ns element
| isElem ns "w" "hyperlink" element
- , Just relId <- findAttr (elemName ns "r" "id") element = do
+ , Just anchor <- findAttr (elemName ns "w" "anchor") element = do
runs <- mapD (elemToRun ns) (elChildren element)
- rels <- asks envRelationships
- return $ case lookupRelationship relId rels of
- Just target -> ExternalHyperLink target runs
- Nothing -> ExternalHyperLink "" runs
+ return $ InternalHyperLink anchor runs
elemToParPart ns element
| isElem ns "m" "oMath" element =
(eitherToD $ readOMML $ showElement element) >>= (return . PlainOMath)
@@ -684,14 +718,30 @@ elemToRun ns element
return $ Run runStyle runElems
elemToRun _ _ = throwError WrongElem
-elemToParagraphStyle :: NameSpaces -> Element -> ParagraphStyle
-elemToParagraphStyle ns element
+getParentStyleValue :: (ParStyleData -> Maybe a) -> ParStyleData -> Maybe a
+getParentStyleValue field style
+ | Just value <- field style = Just value
+ | Just parentStyle <- psStyle style
+ = getParentStyleValue field (snd parentStyle)
+getParentStyleValue _ _ = Nothing
+
+getParStyleField :: (ParStyleData -> Maybe a) -> ParStyleMap -> [String] ->
+ Maybe a
+getParStyleField field stylemap styles
+ | x <- mapMaybe (\x -> M.lookup x stylemap) styles
+ , (y:_) <- mapMaybe (getParentStyleValue field) x
+ = Just y
+getParStyleField _ _ _ = Nothing
+
+elemToParagraphStyle :: NameSpaces -> Element -> ParStyleMap -> ParagraphStyle
+elemToParagraphStyle ns element sty
| Just pPr <- findChild (elemName ns "w" "pPr") element =
- ParagraphStyle
- {pStyle =
+ let style =
mapMaybe
(findAttr (elemName ns "w" "val"))
(findChildren (elemName ns "w" "pStyle") pPr)
+ in ParagraphStyle
+ {pStyle = style
, indentation =
findChild (elemName ns "w" "ind") pPr >>=
elemToParIndentation ns
@@ -703,8 +753,11 @@ elemToParagraphStyle ns element
Just "none" -> False
Just _ -> True
Nothing -> False
+ , pHeading = getParStyleField headingLev sty style
+ , pNumInfo = getParStyleField numInfo sty style
+ , pBlockQuote = getParStyleField isBlockQuote sty style
}
-elemToParagraphStyle _ _ = defaultParagraphStyle
+elemToParagraphStyle _ _ _ = defaultParagraphStyle
checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool
checkOnOff ns rPr tag
@@ -758,6 +811,59 @@ elemToRunStyle ns element parentStyle
}
elemToRunStyle _ _ _ = defaultRunStyle
+isNumericNotNull :: String -> Bool
+isNumericNotNull str = (str /= []) && (all isDigit str)
+
+getHeaderLevel :: NameSpaces -> Element -> Maybe (String,Int)
+getHeaderLevel ns element
+ | Just styleId <- findAttr (elemName ns "w" "styleId") element
+ , Just index <- stripPrefix "Heading" styleId
+ , isNumericNotNull index = Just (styleId, read index)
+ | Just styleId <- findAttr (elemName ns "w" "styleId") element
+ , Just index <- findChild (elemName ns "w" "name") element >>=
+ findAttr (elemName ns "w" "val") >>=
+ stripPrefix "heading "
+ , isNumericNotNull index = Just (styleId, read index)
+getHeaderLevel _ _ = Nothing
+
+blockQuoteStyleIds :: [String]
+blockQuoteStyleIds = ["Quote", "BlockQuote", "BlockQuotation"]
+
+blockQuoteStyleNames :: [String]
+blockQuoteStyleNames = ["Quote", "Block Text"]
+
+getBlockQuote :: NameSpaces -> Element -> Maybe Bool
+getBlockQuote ns element
+ | Just styleId <- findAttr (elemName ns "w" "styleId") element
+ , styleId `elem` blockQuoteStyleIds = Just True
+ | Just styleName <- findChild (elemName ns "w" "name") element >>=
+ findAttr (elemName ns "w" "val")
+ , styleName `elem` blockQuoteStyleNames = Just True
+getBlockQuote _ _ = Nothing
+
+getNumInfo :: NameSpaces -> Element -> Maybe (String, String)
+getNumInfo ns element = do
+ let numPr = findChild (elemName ns "w" "pPr") element >>=
+ findChild (elemName ns "w" "numPr")
+ lvl = fromMaybe "0" (numPr >>=
+ findChild (elemName ns "w" "ilvl") >>=
+ findAttr (elemName ns "w" "val"))
+ numId <- numPr >>=
+ findChild (elemName ns "w" "numId") >>=
+ findAttr (elemName ns "w" "val")
+ return (numId, lvl)
+
+
+elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> ParStyleData
+elemToParStyleData ns element parentStyle =
+ ParStyleData
+ {
+ headingLev = getHeaderLevel ns element
+ , isBlockQuote = getBlockQuote ns element
+ , numInfo = getNumInfo ns element
+ , psStyle = parentStyle
+ }
+
elemToRunElem :: NameSpaces -> Element -> D RunElem
elemToRunElem ns element
| isElem ns "w" "t" element
diff --git a/src/Text/Pandoc/Readers/Docx/StyleMap.hs b/src/Text/Pandoc/Readers/Docx/StyleMap.hs
new file mode 100644
index 000000000..2901ea2a3
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx/StyleMap.hs
@@ -0,0 +1,106 @@
+module Text.Pandoc.Readers.Docx.StyleMap ( StyleMaps(..)
+ , defaultStyleMaps
+ , getStyleMaps
+ , getStyleId
+ , hasStyleName
+ ) where
+
+import Text.XML.Light
+import Text.Pandoc.Readers.Docx.Util
+import Control.Monad.State
+import Data.Char (toLower)
+import qualified Data.Map as M
+
+newtype ParaStyleMap = ParaStyleMap ( M.Map String String )
+newtype CharStyleMap = CharStyleMap ( M.Map String String )
+
+class StyleMap a where
+ alterMap :: (M.Map String String -> M.Map String String) -> a -> a
+ getMap :: a -> M.Map String String
+
+instance StyleMap ParaStyleMap where
+ alterMap f (ParaStyleMap m) = ParaStyleMap $ f m
+ getMap (ParaStyleMap m) = m
+
+instance StyleMap CharStyleMap where
+ alterMap f (CharStyleMap m) = CharStyleMap $ f m
+ getMap (CharStyleMap m) = m
+
+insert :: (StyleMap a) => Maybe String -> Maybe String -> a -> a
+insert (Just k) (Just v) m = alterMap (M.insert k v) m
+insert _ _ m = m
+
+getStyleId :: (StyleMap a) => String -> a -> String
+getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s) . getMap
+
+hasStyleName :: (StyleMap a) => String -> a -> Bool
+hasStyleName styleName = M.member (map toLower styleName) . getMap
+
+data StyleMaps = StyleMaps { sNameSpaces :: NameSpaces
+ , sParaStyleMap :: ParaStyleMap
+ , sCharStyleMap :: CharStyleMap
+ }
+
+data StyleType = ParaStyle | CharStyle
+
+defaultStyleMaps :: StyleMaps
+defaultStyleMaps = StyleMaps { sNameSpaces = []
+ , sParaStyleMap = ParaStyleMap M.empty
+ , sCharStyleMap = CharStyleMap M.empty
+ }
+
+type StateM a = State StyleMaps a
+
+getStyleMaps :: Element -> StyleMaps
+getStyleMaps docElem = execState genStyleMap state'
+ where
+ state' = defaultStyleMaps {sNameSpaces = elemToNameSpaces docElem}
+ genStyleItem e = do
+ styleType <- getStyleType e
+ styleId <- getAttrStyleId e
+ nameValLowercase <- fmap (map toLower) `fmap` getNameVal e
+ case styleType of
+ Just ParaStyle -> modParaStyleMap $ insert nameValLowercase styleId
+ Just CharStyle -> modCharStyleMap $ insert nameValLowercase styleId
+ _ -> return ()
+ genStyleMap = do
+ style <- elemName' "style"
+ let styles = findChildren style docElem
+ forM_ styles genStyleItem
+
+modParaStyleMap :: (ParaStyleMap -> ParaStyleMap) -> StateM ()
+modParaStyleMap f = modify $ \s ->
+ s {sParaStyleMap = f $ sParaStyleMap s}
+
+modCharStyleMap :: (CharStyleMap -> CharStyleMap) -> StateM ()
+modCharStyleMap f = modify $ \s ->
+ s {sCharStyleMap = f $ sCharStyleMap s}
+
+getStyleType :: Element -> StateM (Maybe StyleType)
+getStyleType e = do
+ styleTypeStr <- getAttrType e
+ case styleTypeStr of
+ Just "paragraph" -> return $ Just ParaStyle
+ Just "character" -> return $ Just CharStyle
+ _ -> return Nothing
+
+getAttrType :: Element -> StateM (Maybe String)
+getAttrType el = do
+ name <- elemName' "type"
+ return $ findAttr name el
+
+getAttrStyleId :: Element -> StateM (Maybe String)
+getAttrStyleId el = do
+ name <- elemName' "styleId"
+ return $ findAttr name el
+
+getNameVal :: Element -> StateM (Maybe String)
+getNameVal el = do
+ name <- elemName' "name"
+ val <- elemName' "val"
+ return $ findChild name el >>= findAttr val
+
+elemName' :: String -> StateM QName
+elemName' name = do
+ namespaces <- gets sNameSpaces
+ return $ elemName namespaces "w" name
diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs
new file mode 100644
index 000000000..891f107b0
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx/Util.hs
@@ -0,0 +1,26 @@
+module Text.Pandoc.Readers.Docx.Util (
+ NameSpaces
+ , elemName
+ , isElem
+ , elemToNameSpaces
+ ) where
+
+import Text.XML.Light
+import Data.Maybe (mapMaybe)
+
+type NameSpaces = [(String, String)]
+
+elemToNameSpaces :: Element -> NameSpaces
+elemToNameSpaces = mapMaybe attrToNSPair . elAttribs
+
+attrToNSPair :: Attr -> Maybe (String, String)
+attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
+attrToNSPair _ = Nothing
+
+elemName :: NameSpaces -> String -> String -> QName
+elemName ns prefix name = QName name (lookup prefix ns) (Just prefix)
+
+isElem :: NameSpaces -> String -> String -> Element -> Bool
+isElem ns prefix name element =
+ qName (elName element) == name &&
+ qURI (elName element) == lookup prefix ns
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index b061d8683..338540533 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -35,18 +35,20 @@ import Control.DeepSeq.Generics (deepseq, NFData)
import Debug.Trace (trace)
+import Text.Pandoc.Error
+
type Items = M.Map String (FilePath, MimeType)
-readEPUB :: ReaderOptions -> BL.ByteString -> (Pandoc, MediaBag)
+readEPUB :: ReaderOptions -> BL.ByteString -> Either PandocError (Pandoc, MediaBag)
readEPUB opts bytes = runEPUB (archiveToEPUB opts $ toArchive bytes)
-runEPUB :: Except String a -> a
-runEPUB = either error id . runExcept
+runEPUB :: Except PandocError a -> Either PandocError a
+runEPUB = runExcept
-- Note that internal reference are aggresively normalised so that all ids
-- are of the form "filename#id"
--
-archiveToEPUB :: (MonadError String m) => ReaderOptions -> Archive -> m (Pandoc, MediaBag)
+archiveToEPUB :: (MonadError PandocError m) => ReaderOptions -> Archive -> m (Pandoc, MediaBag)
archiveToEPUB os archive = do
-- root is path to folder with manifest file in
(root, content) <- getManifest archive
@@ -64,19 +66,20 @@ archiveToEPUB os archive = do
return $ (ast, mediaBag)
where
os' = os {readerParseRaw = True}
- parseSpineElem :: MonadError String m => FilePath -> (FilePath, MimeType) -> m Pandoc
+ parseSpineElem :: MonadError PandocError m => FilePath -> (FilePath, MimeType) -> m Pandoc
parseSpineElem (normalise -> r) (normalise -> path, mime) = do
when (readerTrace os) (traceM path)
doc <- mimeToReader mime r path
let docSpan = B.doc $ B.para $ B.spanWith (takeFileName path, [], []) mempty
return $ docSpan <> doc
- mimeToReader :: MonadError String m => MimeType -> FilePath -> FilePath -> m Pandoc
+ mimeToReader :: MonadError PandocError m => MimeType -> FilePath -> FilePath -> m Pandoc
mimeToReader "application/xhtml+xml" (normalise -> root) (normalise -> path) = do
fname <- findEntryByPathE (root </> path) archive
- return $ fixInternalReferences path .
+ html <- either throwError return .
readHtml os' .
UTF8.toStringLazy $
fromEntry fname
+ return $ fixInternalReferences path html
mimeToReader s _ path
| s `elem` imageMimes = return $ imageToPandoc path
| otherwise = return $ mempty
@@ -114,7 +117,7 @@ imageMimes = ["image/gif", "image/jpeg", "image/png"]
type CoverImage = FilePath
-parseManifest :: (MonadError String m) => Element -> m (Maybe CoverImage, Items)
+parseManifest :: (MonadError PandocError m) => Element -> m (Maybe CoverImage, Items)
parseManifest content = do
manifest <- findElementE (dfName "manifest") content
let items = findChildren (dfName "item") manifest
@@ -130,7 +133,7 @@ parseManifest content = do
mime <- findAttrE (emptyName "media-type") e
return (uid, (href, mime))
-parseSpine :: MonadError String m => Items -> Element -> m [(FilePath, MimeType)]
+parseSpine :: MonadError PandocError m => Items -> Element -> m [(FilePath, MimeType)]
parseSpine is e = do
spine <- findElementE (dfName "spine") e
let itemRefs = findChildren (dfName "itemref") spine
@@ -141,7 +144,7 @@ parseSpine is e = do
guard linear
findAttr (emptyName "idref") ref
-parseMeta :: MonadError String m => Element -> m Meta
+parseMeta :: MonadError PandocError m => Element -> m Meta
parseMeta content = do
meta <- findElementE (dfName "metadata") content
let dcspace (QName _ (Just "http://purl.org/dc/elements/1.1/") (Just "dc")) = True
@@ -159,7 +162,7 @@ renameMeta :: String -> String
renameMeta "creator" = "author"
renameMeta s = s
-getManifest :: MonadError String m => Archive -> m (String, Element)
+getManifest :: MonadError PandocError m => Archive -> m (String, Element)
getManifest archive = do
metaEntry <- findEntryByPathE ("META-INF" </> "container.xml") archive
docElem <- (parseXMLDocE . UTF8.toStringLazy . fromEntry) metaEntry
@@ -266,18 +269,18 @@ emptyName s = QName s Nothing Nothing
-- Convert Maybe interface to Either
-findAttrE :: MonadError String m => QName -> Element -> m String
+findAttrE :: MonadError PandocError m => QName -> Element -> m String
findAttrE q e = mkE "findAttr" $ findAttr q e
-findEntryByPathE :: MonadError String m => FilePath -> Archive -> m Entry
+findEntryByPathE :: MonadError PandocError m => FilePath -> Archive -> m Entry
findEntryByPathE (normalise -> path) a =
mkE ("No entry on path: " ++ path) $ findEntryByPath path a
-parseXMLDocE :: MonadError String m => String -> m Element
+parseXMLDocE :: MonadError PandocError m => String -> m Element
parseXMLDocE doc = mkE "Unable to parse XML doc" $ parseXMLDoc doc
-findElementE :: MonadError String m => QName -> Element -> m Element
+findElementE :: MonadError PandocError m => QName -> Element -> m Element
findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x
-mkE :: MonadError String m => String -> Maybe a -> m a
-mkE s = maybe (throwError s) return
+mkE :: MonadError PandocError m => String -> Maybe a -> m a
+mkE s = maybe (throwError . ParseFailure $ s) return
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 4ea5f41d5..52358e51e 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -1,4 +1,5 @@
-{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses #-}
+{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses,
+ViewPatterns#-}
{-
Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
@@ -43,7 +44,7 @@ import Text.Pandoc.Definition
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..))
import Text.Pandoc.Shared ( extractSpaces, renderTags'
- , escapeURI, safeRead )
+ , escapeURI, safeRead, mapLeft )
import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace)
, Extension (Ext_epub_html_exts,
Ext_native_divs, Ext_native_spans))
@@ -62,15 +63,18 @@ import Text.TeXMath (readMathML, writeTeX)
import Data.Default (Default (..), def)
import Control.Monad.Reader (Reader,ask, asks, local, runReader)
+import Text.Pandoc.Error
+
+import Text.Parsec.Error
+
-- | Convert HTML-formatted string to 'Pandoc' document.
readHtml :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assumes @'\n'@ line endings)
- -> Pandoc
+ -> Either PandocError Pandoc
readHtml opts inp =
- case flip runReader def $ runParserT parseDoc (HTMLState def{ stateOptions = opts } []) "source" tags of
- Left err' -> error $ "\nError at " ++ show err'
- Right result -> result
+ mapLeft (ParseFailure . getError) . flip runReader def $
+ runParserT parseDoc (HTMLState def{ stateOptions = opts } []) "source" tags
where tags = stripPrefixes . canonicalizeTags $
parseTagsOptions parseOptions{ optTagPosition = True } inp
parseDoc = do
@@ -78,6 +82,9 @@ readHtml opts inp =
meta <- stateMeta . parserState <$> getState
bs' <- replaceNotes (B.toList blocks)
return $ Pandoc meta bs'
+ getError (errorMessages -> ms) = case ms of
+ [] -> ""
+ (m:_) -> messageString m
replaceNotes :: [Block] -> TagParser [Block]
replaceNotes = walkM replaceNotes'
@@ -374,12 +381,20 @@ pTable = try $ do
caption <- option mempty $ pInTags "caption" inline <* skipMany pBlank
-- TODO actually read these and take width information from them
widths' <- pColgroup <|> many pCol
- head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th")
- skipMany pBlank
- rows <- pOptInTag "tbody"
- $ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td")
- skipMany pBlank
+ let pTh = option [] $ pInTags "tr" (pCell "th")
+ pTr = try $ skipMany pBlank >> pInTags "tr" (pCell "td" <|> pCell "th")
+ pTBody = do pOptInTag "tbody" $ many1 pTr
+ head'' <- pOptInTag "thead" pTh
+ head' <- pOptInTag "tbody" $ do
+ if null head''
+ then pTh
+ else return head''
+ rowsLs <- many pTBody
+ rows' <- pOptInTag "tfoot" $ many pTr
TagClose _ <- pSatisfy (~== TagClose "table")
+ let rows = (concat rowsLs) ++ rows'
+ -- fail on empty table
+ guard $ not $ null head' && null rows
let isSinglePlain x = case B.toList x of
[Plain _] -> True
_ -> False
@@ -440,7 +455,7 @@ pCodeBlock :: TagParser Blocks
pCodeBlock = try $ do
TagOpen _ attr <- pSatisfy (~== TagOpen "pre" [])
contents <- manyTill pAnyTag (pCloses "pre" <|> eof)
- let rawText = concatMap fromTagText $ filter isTagText contents
+ let rawText = concatMap tagToString contents
-- drop leading newline if any
let result' = case rawText of
'\n':xs -> xs
@@ -451,6 +466,11 @@ pCodeBlock = try $ do
_ -> result'
return $ B.codeBlockWith (mkAttr attr) result
+tagToString :: Tag String -> String
+tagToString (TagText s) = s
+tagToString (TagOpen "br" _) = "\n"
+tagToString _ = ""
+
inline :: TagParser Inlines
inline = choice
[ eNoteref
@@ -619,14 +639,17 @@ pInTags tagtype parser = try $ do
pSatisfy (~== TagOpen tagtype [])
mconcat <$> manyTill parser (pCloses tagtype <|> eof)
-pOptInTag :: String -> TagParser a
- -> TagParser a
-pOptInTag tagtype parser = try $ do
- open <- option False (pSatisfy (~== TagOpen tagtype []) >> return True)
+-- parses p, preceeded by an optional opening tag
+-- and followed by an optional closing tags
+pOptInTag :: String -> TagParser a -> TagParser a
+pOptInTag tagtype p = try $ do
+ skipMany pBlank
+ optional $ pSatisfy (~== TagOpen tagtype [])
+ skipMany pBlank
+ x <- p
skipMany pBlank
- x <- parser
+ optional $ pSatisfy (~== TagClose tagtype)
skipMany pBlank
- when open $ pCloses tagtype
return x
pCloses :: String -> TagParser ()
@@ -735,7 +758,7 @@ pSpace = many1 (satisfy isSpace) >> return B.space
--
eitherBlockOrInline :: [String]
-eitherBlockOrInline = ["audio", "applet", "button", "iframe",
+eitherBlockOrInline = ["audio", "applet", "button", "iframe", "embed",
"del", "ins",
"progress", "map", "area", "noscript", "script",
"object", "svg", "video", "source"]
@@ -753,7 +776,7 @@ blockHtmlTags :: [String]
blockHtmlTags = ["?xml", "!DOCTYPE", "address", "article", "aside",
"blockquote", "body", "button", "canvas",
"caption", "center", "col", "colgroup", "dd", "dir", "div",
- "dl", "dt", "embed", "fieldset", "figcaption", "figure",
+ "dl", "dt", "fieldset", "figcaption", "figure",
"footer", "form", "h1", "h2", "h3", "h4",
"h5", "h6", "head", "header", "hgroup", "hr", "html",
"isindex", "menu", "noframes", "ol", "output", "p", "pre",
@@ -864,7 +887,7 @@ htmlTag :: Monad m
=> (Tag String -> Bool)
-> ParserT [Char] st m (Tag String, String)
htmlTag f = try $ do
- lookAhead $ char '<' >> (oneOf "/!?" <|> letter)
+ lookAhead $ char '<' >> ((oneOf "/!?" >> nonspaceChar) <|> letter)
(next : _) <- getInput >>= return . canonicalizeTags . parseTags
guard $ f next
-- advance the parser
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index 4b46c869d..aa2534afc 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE CPP #-}
{- |
Module : Text.Pandoc.Readers.Haddock
Copyright : Copyright (C) 2013 David Lazar
@@ -25,11 +26,18 @@ import Documentation.Haddock.Parser
import Documentation.Haddock.Types
import Debug.Trace (trace)
+import Text.Pandoc.Error
+
-- | Parse Haddock markup and return a 'Pandoc' document.
readHaddock :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse
- -> Pandoc
-readHaddock opts = B.doc . docHToBlocks . trace' . parseParas
+ -> Either PandocError Pandoc
+readHaddock opts =
+#if MIN_VERSION_haddock_library(1,2,0)
+ Right . B.doc . docHToBlocks . trace' . _doc . parseParas
+#else
+ Right . B.doc . docHToBlocks . trace' . parseParas
+#endif
where trace' x = if readerTrace opts
then trace (show x) x
else x
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 9f51e9a8f..08aa0b20e 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -42,26 +42,25 @@ import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding ((<|>), many, optional, space,
mathDisplay, mathInline)
import qualified Text.Pandoc.UTF8 as UTF8
-import Data.Char ( chr, ord )
+import Data.Char ( chr, ord, isLetter, isAlphaNum )
import Control.Monad.Trans (lift)
import Control.Monad
import Text.Pandoc.Builder
-import Data.Char (isLetter, isAlphaNum)
import Control.Applicative
import Data.Monoid
-import Data.Maybe (fromMaybe)
+import Data.Maybe (fromMaybe, maybeToList)
import System.Environment (getEnv)
-import System.FilePath (replaceExtension, (</>))
-import Data.List (intercalate, intersperse)
+import System.FilePath (replaceExtension, (</>), takeExtension, addExtension)
+import Data.List (intercalate)
import qualified Data.Map as M
import qualified Control.Exception as E
-import System.FilePath (takeExtension, addExtension)
import Text.Pandoc.Highlighting (fromListingsLanguage)
+import Text.Pandoc.Error
-- | Parse LaTeX from string and return 'Pandoc' document.
readLaTeX :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assumes @'\n'@ line endings)
- -> Pandoc
+ -> Either PandocError Pandoc
readLaTeX opts = readWith parseLaTeX def{ stateOptions = opts }
parseLaTeX :: LP Pandoc
@@ -73,17 +72,16 @@ parseLaTeX = do
let (Pandoc _ bs') = doc bs
return $ Pandoc meta bs'
-type LP = Parser [Char] ParserState
+type LP = Parser String ParserState
anyControlSeq :: LP String
anyControlSeq = do
char '\\'
next <- option '\n' anyChar
- name <- case next of
- '\n' -> return ""
- c | isLetter c -> (c:) <$> (many letter <* optional sp)
- | otherwise -> return [c]
- return name
+ case next of
+ '\n' -> return ""
+ c | isLetter c -> (c:) <$> (many letter <* optional sp)
+ | otherwise -> return [c]
controlSeq :: String -> LP String
controlSeq name = try $ do
@@ -103,7 +101,7 @@ dimenarg = try $ do
sp :: LP ()
sp = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t')
- <|> (try $ newline <* lookAhead anyChar <* notFollowedBy blankline)
+ <|> try (newline <* lookAhead anyChar <* notFollowedBy blankline)
isLowerHex :: Char -> Bool
isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
@@ -161,49 +159,48 @@ mathInline :: LP String -> LP Inlines
mathInline p = math <$> (try p >>= applyMacros')
mathChars :: LP String
-mathChars = concat <$>
- many ( many1 (satisfy (\c -> c /= '$' && c /='\\'))
- <|> (\c -> ['\\',c]) <$> (try $ char '\\' *> anyChar)
- )
+mathChars = (concat <$>) $
+ many $
+ many1 (satisfy (\c -> c /= '$' && c /='\\'))
+ <|> (\c -> ['\\',c]) <$> try (char '\\' *> anyChar)
quoted' :: (Inlines -> Inlines) -> LP String -> LP () -> LP Inlines
quoted' f starter ender = do
startchs <- starter
try ((f . mconcat) <$> manyTill inline ender) <|> lit startchs
-double_quote :: LP Inlines
-double_quote =
- ( quoted' doubleQuoted (try $ string "``") (void $ try $ string "''")
+doubleQuote :: LP Inlines
+doubleQuote =
+ quoted' doubleQuoted (try $ string "``") (void $ try $ string "''")
<|> quoted' doubleQuoted (string "“") (void $ char '”')
-- the following is used by babel for localized quotes:
<|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'")
<|> quoted' doubleQuoted (string "\"") (void $ char '"')
- )
-single_quote :: LP Inlines
-single_quote =
- ( quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter)
+singleQuote :: LP Inlines
+singleQuote =
+ quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter)
<|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter)
- )
inline :: LP Inlines
inline = (mempty <$ comment)
<|> (space <$ sp)
<|> inlineText
<|> inlineCommand
+ <|> inlineEnvironment
<|> inlineGroup
<|> (char '-' *> option (str "-")
- ((char '-') *> option (str "–") (str "—" <$ char '-')))
- <|> double_quote
- <|> single_quote
+ (char '-' *> option (str "–") (str "—" <$ char '-')))
+ <|> doubleQuote
+ <|> singleQuote
<|> (str "”" <$ try (string "''"))
<|> (str "”" <$ char '”')
<|> (str "’" <$ char '\'')
<|> (str "’" <$ char '’')
<|> (str "\160" <$ char '~')
- <|> (mathDisplay $ string "$$" *> mathChars <* string "$$")
- <|> (mathInline $ char '$' *> mathChars <* char '$')
- <|> (superscript <$> (char '^' *> tok))
+ <|> mathDisplay (string "$$" *> mathChars <* string "$$")
+ <|> mathInline (char '$' *> mathChars <* char '$')
+ <|> try (superscript <$> (char '^' *> tok))
<|> (subscript <$> (char '_' *> tok))
<|> (guardEnabled Ext_literate_haskell *> char '|' *> doLHSverb)
<|> (str . (:[]) <$> tildeEscape)
@@ -237,20 +234,32 @@ block = (mempty <$ comment)
blocks :: LP Blocks
blocks = mconcat <$> many block
+getRawCommand :: String -> LP String
+getRawCommand name' = do
+ rawargs <- withRaw (skipopts *> option "" dimenarg *> many braced)
+ return $ '\\' : name' ++ snd rawargs
+
+lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v
+lookupListDefault d = (fromMaybe d .) . lookupList
+ where
+ lookupList l m = msum $ map (`M.lookup` m) l
+
blockCommand :: LP Blocks
blockCommand = try $ do
name <- anyControlSeq
guard $ name /= "begin" && name /= "end"
star <- option "" (string "*" <* optional sp)
let name' = name ++ star
- case M.lookup name' blockCommands of
- Just p -> p
- Nothing -> case M.lookup name blockCommands of
- Just p -> p
- Nothing -> mzero
+ let raw = do
+ rawcommand <- getRawCommand name'
+ transformed <- applyMacros' rawcommand
+ guard $ transformed /= rawcommand
+ notFollowedBy $ parseFromString inlines transformed
+ parseFromString blocks transformed
+ lookupListDefault raw [name',name] blockCommands
inBrackets :: Inlines -> Inlines
-inBrackets x = (str "[") <> x <> (str "]")
+inBrackets x = str "[" <> x <> str "]"
-- eat an optional argument and one or more arguments in braces
ignoreInlines :: String -> (String, LP Inlines)
@@ -258,19 +267,21 @@ ignoreInlines name = (name, doraw <|> (mempty <$ optargs))
where optargs = skipopts *> skipMany (try $ optional sp *> braced)
contseq = '\\':name
doraw = (rawInline "latex" . (contseq ++) . snd) <$>
- (getOption readerParseRaw >>= guard >> (withRaw optargs))
+ (getOption readerParseRaw >>= guard >> withRaw optargs)
ignoreBlocks :: String -> (String, LP Blocks)
ignoreBlocks name = (name, doraw <|> (mempty <$ optargs))
where optargs = skipopts *> skipMany (try $ optional sp *> braced)
contseq = '\\':name
doraw = (rawBlock "latex" . (contseq ++) . snd) <$>
- (getOption readerParseRaw >>= guard >> (withRaw optargs))
+ (getOption readerParseRaw >>= guard >> withRaw optargs)
blockCommands :: M.Map String (LP Blocks)
blockCommands = M.fromList $
[ ("par", mempty <$ skipopts)
- , ("title", mempty <$ (skipopts *> tok >>= addMeta "title"))
+ , ("title", mempty <$ (skipopts *>
+ (grouped inline >>= addMeta "title")
+ <|> (grouped block >>= addMeta "title")))
, ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle"))
, ("author", mempty <$ (skipopts *> authors))
-- -- in letter class, temp. store address & sig as title, author
@@ -301,10 +312,10 @@ blockCommands = M.fromList $
--
, ("hrule", pure horizontalRule)
, ("rule", skipopts *> tok *> tok *> pure horizontalRule)
- , ("item", skipopts *> loose_item)
+ , ("item", skipopts *> looseItem)
, ("documentclass", skipopts *> braced *> preamble)
, ("centerline", (para . trimInlines) <$> (skipopts *> tok))
- , ("caption", skipopts *> tok >>= setCaption)
+ , ("caption", skipopts *> setCaption)
, ("PandocStartInclude", startInclude)
, ("PandocEndInclude", endInclude)
, ("bibliography", mempty <$ (skipopts *> braced >>=
@@ -336,9 +347,16 @@ addMeta field val = updateState $ \st ->
splitBibs :: String -> [Inlines]
splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',')
-setCaption :: Inlines -> LP Blocks
-setCaption ils = do
- updateState $ \st -> st{ stateCaption = Just ils }
+setCaption :: LP Blocks
+setCaption = do
+ ils <- tok
+ mblabel <- option Nothing $
+ try $ spaces' >> controlSeq "label" >> (Just <$> tok)
+ let ils' = case mblabel of
+ Just lab -> ils <> spanWith
+ ("",[],[("data-label", stringify lab)]) mempty
+ Nothing -> ils
+ updateState $ \st -> st{ stateCaption = Just ils' }
return mempty
resetCaption :: LP ()
@@ -361,7 +379,7 @@ section (ident, classes, kvs) lvl = do
let lvl' = if hasChapters then lvl + 1 else lvl
skipopts
contents <- grouped inline
- lab <- option ident $ try (spaces >> controlSeq "label" >> spaces >> braced)
+ lab <- option ident $ try (spaces' >> controlSeq "label" >> spaces' >> braced)
attr' <- registerHeader (lab, classes, kvs) contents
return $ headerWith attr' lvl' contents
@@ -374,25 +392,39 @@ inlineCommand = try $ do
star <- option "" (string "*")
let name' = name ++ star
let raw = do
- rawargs <- withRaw (skipopts *> option "" dimenarg *> many braced)
- let rawcommand = '\\' : name ++ star ++ snd rawargs
+ rawcommand <- getRawCommand name'
transformed <- applyMacros' rawcommand
if transformed /= rawcommand
then parseFromString inlines transformed
else if parseRaw
then return $ rawInline "latex" rawcommand
else return mempty
- case M.lookup name' inlineCommands of
- Just p -> p <|> raw
- Nothing -> case M.lookup name inlineCommands of
- Just p -> p <|> raw
- Nothing -> raw
+ lookupListDefault mzero [name',name] inlineCommands
+ <|> raw
unlessParseRaw :: LP ()
unlessParseRaw = getOption readerParseRaw >>= guard . not
isBlockCommand :: String -> Bool
-isBlockCommand s = maybe False (const True) $ M.lookup s blockCommands
+isBlockCommand s = s `M.member` blockCommands
+
+
+inlineEnvironments :: M.Map String (LP Inlines)
+inlineEnvironments = M.fromList
+ [ ("displaymath", mathEnv id Nothing "displaymath")
+ , ("equation", mathEnv id Nothing "equation")
+ , ("equation*", mathEnv id Nothing "equation*")
+ , ("gather", mathEnv id (Just "gathered") "gather")
+ , ("gather*", mathEnv id (Just "gathered") "gather*")
+ , ("multline", mathEnv id (Just "gathered") "multline")
+ , ("multline*", mathEnv id (Just "gathered") "multline*")
+ , ("eqnarray", mathEnv id (Just "aligned") "eqnarray")
+ , ("eqnarray*", mathEnv id (Just "aligned") "eqnarray*")
+ , ("align", mathEnv id (Just "aligned") "align")
+ , ("align*", mathEnv id (Just "aligned") "align*")
+ , ("alignat", mathEnv id (Just "aligned") "alignat")
+ , ("alignat*", mathEnv id (Just "aligned") "alignat*")
+ ]
inlineCommands :: M.Map String (LP Inlines)
inlineCommands = M.fromList $
@@ -414,9 +446,14 @@ inlineCommands = M.fromList $
, ("sim", lit "~")
, ("label", unlessParseRaw >> (inBrackets <$> tok))
, ("ref", unlessParseRaw >> (inBrackets <$> tok))
+ , ("noindent", unlessParseRaw >> return mempty)
+ , ("textgreek", tok)
+ , ("sep", lit ",")
+ , ("cref", unlessParseRaw >> (inBrackets <$> tok)) -- from cleveref.sty
, ("(", mathInline $ manyTill anyChar (try $ string "\\)"))
, ("[", mathDisplay $ manyTill anyChar (try $ string "\\]"))
- , ("ensuremath", mathInline $ braced)
+ , ("ensuremath", mathInline braced)
+ , ("texorpdfstring", (\_ x -> x) <$> tok <*> tok)
, ("P", lit "¶")
, ("S", lit "§")
, ("$", lit "$")
@@ -464,7 +501,7 @@ inlineCommands = M.fromList $
, ("v", option (str "v") $ try $ tok >>= accent hacek)
, ("u", option (str "u") $ try $ tok >>= accent breve)
, ("i", lit "i")
- , ("\\", linebreak <$ (optional (bracketed inline) *> optional sp))
+ , ("\\", linebreak <$ (optional (bracketed inline) *> spaces'))
, (",", pure mempty)
, ("@", pure mempty)
, (" ", lit "\160")
@@ -477,7 +514,7 @@ inlineCommands = M.fromList $
, ("thanks", (note . mconcat) <$> (char '{' *> manyTill block (char '}')))
, ("footnote", (note . mconcat) <$> (char '{' *> manyTill block (char '}')))
, ("verb", doverb)
- , ("lstinline", doverb)
+ , ("lstinline", skipopts *> doverb)
, ("Verb", doverb)
, ("texttt", (code . stringify . toList) <$> tok)
, ("url", (unescapeURL <$> braced) >>= \url ->
@@ -494,6 +531,7 @@ inlineCommands = M.fromList $
, ("citealp", citation "citealp" NormalCitation False)
, ("citealp*", citation "citealp*" NormalCitation False)
, ("autocite", citation "autocite" NormalCitation False)
+ , ("smartcite", citation "smartcite" NormalCitation False)
, ("footcite", inNote <$> citation "footcite" NormalCitation False)
, ("parencite", citation "parencite" NormalCitation False)
, ("supercite", citation "supercite" NormalCitation False)
@@ -516,6 +554,7 @@ inlineCommands = M.fromList $
, ("supercites", citation "supercites" NormalCitation True)
, ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True)
, ("Autocite", citation "Autocite" NormalCitation False)
+ , ("Smartcite", citation "Smartcite" NormalCitation False)
, ("Footcite", citation "Footcite" NormalCitation False)
, ("Parencite", citation "Parencite" NormalCitation False)
, ("Supercite", citation "Supercite" NormalCitation False)
@@ -542,7 +581,7 @@ inlineCommands = M.fromList $
] ++ map ignoreInlines
-- these commands will be ignored unless --parse-raw is specified,
-- in which case they will appear as raw latex blocks:
- [ "noindent", "index" ]
+ [ "index" ]
mkImage :: String -> LP Inlines
mkImage src = do
@@ -559,7 +598,7 @@ inNote ils =
unescapeURL :: String -> String
unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs
- where isEscapable c = c `elem` "#$%&~_^\\{}"
+ where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String)
unescapeURL (x:xs) = x:unescapeURL xs
unescapeURL [] = ""
@@ -585,7 +624,7 @@ lit = pure . str
accent :: (Char -> String) -> Inlines -> LP Inlines
accent f ils =
case toList ils of
- (Str (x:xs) : ys) -> return $ fromList $ (Str (f x ++ xs) : ys)
+ (Str (x:xs) : ys) -> return $ fromList (Str (f x ++ xs) : ys)
[] -> mzero
_ -> return ils
@@ -774,7 +813,7 @@ breve 'u' = "ŭ"
breve c = [c]
tok :: LP Inlines
-tok = try $ grouped inline <|> inlineCommand <|> str <$> (count 1 $ inlineChar)
+tok = try $ grouped inline <|> inlineCommand <|> str <$> count 1 inlineChar
opt :: LP Inlines
opt = bracketed inline <* optional sp
@@ -792,9 +831,14 @@ environment :: LP Blocks
environment = do
controlSeq "begin"
name <- braced
- case M.lookup name environments of
- Just p -> p <|> rawEnv name
- Nothing -> rawEnv name
+ M.findWithDefault mzero name environments
+ <|> rawEnv name
+
+inlineEnvironment :: LP Inlines
+inlineEnvironment = try $ do
+ controlSeq "begin"
+ name <- braced
+ M.findWithDefault mzero name inlineEnvironments
rawEnv :: String -> LP Blocks
rawEnv name = do
@@ -807,15 +851,11 @@ rawEnv name = do
----
-type IncludeParser = ParserT [Char] [String] IO String
+type IncludeParser = ParserT String [String] IO String
-- | Replace "include" commands with file contents.
-handleIncludes :: String -> IO String
-handleIncludes s = do
- res <- runParserT includeParser' [] "input" s
- case res of
- Right s' -> return s'
- Left e -> error $ show e
+handleIncludes :: String -> IO (Either PandocError String)
+handleIncludes s = mapLeft (ParsecError s) <$> runParserT includeParser' [] "input" s
includeParser' :: IncludeParser
includeParser' =
@@ -857,6 +897,12 @@ backslash' = string "\\"
braced' :: IncludeParser
braced' = try $ char '{' *> manyTill (satisfy (/='}')) (char '}')
+maybeAddExtension :: String -> FilePath -> FilePath
+maybeAddExtension ext fp =
+ if null (takeExtension fp)
+ then addExtension fp ext
+ else fp
+
include' :: IncludeParser
include' = do
fs' <- try $ do
@@ -865,11 +911,11 @@ include' = do
<|> try (string "input")
<|> string "usepackage"
-- skip options
- skipMany $ try $ char '[' *> (manyTill anyChar (char ']'))
+ skipMany $ try $ char '[' *> manyTill anyChar (char ']')
fs <- (map trim . splitBy (==',')) <$> braced'
return $ if name == "usepackage"
- then map (flip replaceExtension ".sty") fs
- else map (flip replaceExtension ".tex") fs
+ then map (maybeAddExtension ".sty") fs
+ else map (maybeAddExtension ".tex") fs
pos <- getPosition
containers <- getState
let fn = case containers of
@@ -938,14 +984,14 @@ keyvals = try $ char '[' *> manyTill keyval (char ']')
alltt :: String -> LP Blocks
alltt t = walk strToCode <$> parseFromString blocks
(substitute " " "\\ " $ substitute "%" "\\%" $
- concat $ intersperse "\\\\\n" $ lines t)
+ intercalate "\\\\\n" $ lines t)
where strToCode (Str s) = Code nullAttr s
strToCode x = x
-rawLaTeXBlock :: Parser [Char] ParserState String
+rawLaTeXBlock :: LP String
rawLaTeXBlock = snd <$> try (withRaw (environment <|> blockCommand))
-rawLaTeXInline :: Parser [Char] ParserState Inline
+rawLaTeXInline :: LP Inline
rawLaTeXInline = do
raw <- (snd <$> withRaw inlineCommand) <|> (snd <$> withRaw blockCommand)
RawInline "latex" <$> applyMacros' raw
@@ -954,41 +1000,42 @@ addImageCaption :: Blocks -> LP Blocks
addImageCaption = walkM go
where go (Image alt (src,tit)) = do
mbcapt <- stateCaption <$> getState
- case mbcapt of
- Just ils -> return (Image (toList ils) (src, "fig:"))
- Nothing -> return (Image alt (src,tit))
+ return $ case mbcapt of
+ Just ils -> Image (toList ils) (src, "fig:")
+ Nothing -> Image alt (src,tit)
go x = return x
addTableCaption :: Blocks -> LP Blocks
addTableCaption = walkM go
where go (Table c als ws hs rs) = do
mbcapt <- stateCaption <$> getState
- case mbcapt of
- Just ils -> return (Table (toList ils) als ws hs rs)
- Nothing -> return (Table c als ws hs rs)
+ return $ case mbcapt of
+ Just ils -> Table (toList ils) als ws hs rs
+ Nothing -> Table c als ws hs rs
go x = return x
environments :: M.Map String (LP Blocks)
environments = M.fromList
[ ("document", env "document" blocks <* skipMany anyChar)
- , ("letter", env "letter" letter_contents)
+ , ("letter", env "letter" letterContents)
, ("figure", env "figure" $
resetCaption *> skipopts *> blocks >>= addImageCaption)
, ("center", env "center" blocks)
, ("table", env "table" $
resetCaption *> skipopts *> blocks >>= addTableCaption)
- , ("tabular", env "tabular" simpTable)
+ , ("tabular*", env "tabular" $ simpTable True)
+ , ("tabular", env "tabular" $ simpTable False)
, ("quote", blockQuote <$> env "quote" blocks)
, ("quotation", blockQuote <$> env "quotation" blocks)
, ("verse", blockQuote <$> env "verse" blocks)
, ("itemize", bulletList <$> listenv "itemize" (many item))
, ("description", definitionList <$> listenv "description" (many descItem))
- , ("enumerate", ordered_list)
+ , ("enumerate", orderedList')
, ("alltt", alltt =<< verbEnv "alltt")
, ("code", guardEnabled Ext_literate_haskell *>
(codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
verbEnv "code"))
- , ("verbatim", codeBlock <$> (verbEnv "verbatim"))
+ , ("verbatim", codeBlock <$> verbEnv "verbatim")
, ("Verbatim", do options <- option [] keyvals
let kvs = [ (if k == "firstnumber"
then "startFrom"
@@ -996,17 +1043,17 @@ environments = M.fromList
let classes = [ "numberLines" |
lookup "numbers" options == Just "left" ]
let attr = ("",classes,kvs)
- codeBlockWith attr <$> (verbEnv "Verbatim"))
+ codeBlockWith attr <$> verbEnv "Verbatim")
, ("lstlisting", do options <- option [] keyvals
let kvs = [ (if k == "firstnumber"
then "startFrom"
else k, v) | (k,v) <- options ]
let classes = [ "numberLines" |
lookup "numbers" options == Just "left" ]
- ++ maybe [] (:[]) (lookup "language" options
+ ++ maybeToList (lookup "language" options
>>= fromListingsLanguage)
let attr = (fromMaybe "" (lookup "label" options),classes,kvs)
- codeBlockWith attr <$> (verbEnv "lstlisting"))
+ codeBlockWith attr <$> verbEnv "lstlisting")
, ("minted", do options <- option [] keyvals
lang <- grouped (many1 $ satisfy (/='}'))
let kvs = [ (if k == "firstnumber"
@@ -1016,27 +1063,27 @@ environments = M.fromList
[ "numberLines" |
lookup "linenos" options == Just "true" ]
let attr = ("",classes,kvs)
- codeBlockWith attr <$> (verbEnv "minted"))
+ codeBlockWith attr <$> verbEnv "minted")
, ("obeylines", parseFromString
(para . trimInlines . mconcat <$> many inline) =<<
intercalate "\\\\\n" . lines <$> verbEnv "obeylines")
- , ("displaymath", mathEnv Nothing "displaymath")
- , ("equation", mathEnv Nothing "equation")
- , ("equation*", mathEnv Nothing "equation*")
- , ("gather", mathEnv (Just "gathered") "gather")
- , ("gather*", mathEnv (Just "gathered") "gather*")
- , ("multline", mathEnv (Just "gathered") "multline")
- , ("multline*", mathEnv (Just "gathered") "multline*")
- , ("eqnarray", mathEnv (Just "aligned") "eqnarray")
- , ("eqnarray*", mathEnv (Just "aligned") "eqnarray*")
- , ("align", mathEnv (Just "aligned") "align")
- , ("align*", mathEnv (Just "aligned") "align*")
- , ("alignat", mathEnv (Just "aligned") "alignat")
- , ("alignat*", mathEnv (Just "aligned") "alignat*")
+ , ("displaymath", mathEnv para Nothing "displaymath")
+ , ("equation", mathEnv para Nothing "equation")
+ , ("equation*", mathEnv para Nothing "equation*")
+ , ("gather", mathEnv para (Just "gathered") "gather")
+ , ("gather*", mathEnv para (Just "gathered") "gather*")
+ , ("multline", mathEnv para (Just "gathered") "multline")
+ , ("multline*", mathEnv para (Just "gathered") "multline*")
+ , ("eqnarray", mathEnv para (Just "aligned") "eqnarray")
+ , ("eqnarray*", mathEnv para (Just "aligned") "eqnarray*")
+ , ("align", mathEnv para (Just "aligned") "align")
+ , ("align*", mathEnv para (Just "aligned") "align*")
+ , ("alignat", mathEnv para (Just "aligned") "alignat")
+ , ("alignat*", mathEnv para (Just "aligned") "alignat*")
]
-letter_contents :: LP Blocks
-letter_contents = do
+letterContents :: LP Blocks
+letterContents = do
bs <- blocks
st <- getState
-- add signature (author) and address (title)
@@ -1063,8 +1110,8 @@ closing = do
item :: LP Blocks
item = blocks *> controlSeq "item" *> skipopts *> blocks
-loose_item :: LP Blocks
-loose_item = do
+looseItem :: LP Blocks
+looseItem = do
ctx <- stateParserContext `fmap` getState
if ctx == ListItemState
then mzero
@@ -1092,8 +1139,8 @@ listenv name p = try $ do
updateState $ \st -> st{ stateParserContext = oldCtx }
return res
-mathEnv :: Maybe String -> String -> LP Blocks
-mathEnv innerEnv name = para <$> mathDisplay (inner <$> verbEnv name)
+mathEnv :: (Inlines -> a) -> Maybe String -> String -> LP a
+mathEnv f innerEnv name = f <$> mathDisplay (inner <$> verbEnv name)
where inner x = case innerEnv of
Nothing -> x
Just y -> "\\begin{" ++ y ++ "}\n" ++ x ++
@@ -1107,8 +1154,8 @@ verbEnv name = do
res <- manyTill anyChar endEnv
return $ stripTrailingNewlines res
-ordered_list :: LP Blocks
-ordered_list = do
+orderedList' :: LP Blocks
+orderedList' = do
optional sp
(_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
try $ char '[' *> anyOrderedListMarker <* char ']'
@@ -1120,7 +1167,7 @@ ordered_list = do
optional sp
num <- grouped (many1 digit)
spaces
- return $ (read num + 1 :: Int)
+ return (read num + 1 :: Int)
bs <- listenv "enumerate" (many item)
return $ orderedListWith (start, style, delim) bs
@@ -1134,14 +1181,14 @@ paragraph = do
preamble :: LP Blocks
preamble = mempty <$> manyTill preambleBlock beginDoc
where beginDoc = lookAhead $ try $ controlSeq "begin" *> string "{document}"
- preambleBlock = (void comment)
- <|> (void sp)
- <|> (void blanklines)
- <|> (void macro)
- <|> (void blockCommand)
- <|> (void anyControlSeq)
- <|> (void braced)
- <|> (void anyChar)
+ preambleBlock = void comment
+ <|> void sp
+ <|> void blanklines
+ <|> void macro
+ <|> void blockCommand
+ <|> void anyControlSeq
+ <|> void braced
+ <|> void anyChar
-------
@@ -1183,7 +1230,7 @@ citationLabel = optional sp *>
<* optional sp
<* optional (char ',')
<* optional sp)
- where isBibtexKeyChar c = isAlphaNum c || c `elem` ".:;?!`'()/*@_+=-[]*"
+ where isBibtexKeyChar c = isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]*" :: String)
cites :: CitationMode -> Bool -> LP [Citation]
cites mode multi = try $ do
@@ -1217,7 +1264,7 @@ complexNatbibCitation mode = try $ do
suff <- ils
skipSpaces
optional $ char ';'
- return $ addPrefix pref $ addSuffix suff $ cits'
+ return $ addPrefix pref $ addSuffix suff cits'
(c:cits, raw) <- withRaw $ grouped parseOne
return $ cite (c{ citationMode = mode }:cits)
(rawInline "latex" $ "\\citetext" ++ raw)
@@ -1227,7 +1274,7 @@ complexNatbibCitation mode = try $ do
parseAligns :: LP [Alignment]
parseAligns = try $ do
char '{'
- let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ try (string "@{}")
+ let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ (char '@' >> braced)
maybeBar
let cAlign = AlignCenter <$ char 'c'
let lAlign = AlignLeft <$ char 'l'
@@ -1241,13 +1288,13 @@ parseAligns = try $ do
return aligns'
hline :: LP ()
-hline = () <$ (try $ spaces >> controlSeq "hline")
+hline = () <$ try (spaces' *> controlSeq "hline" <* spaces')
lbreak :: LP ()
-lbreak = () <$ (try $ spaces *> controlSeq "\\")
+lbreak = () <$ try (spaces' *> controlSeq "\\" <* spaces')
amp :: LP ()
-amp = () <$ (try $ spaces *> char '&')
+amp = () <$ try (spaces' *> char '&')
parseTableRow :: Int -- ^ number of columns
-> LP [Blocks]
@@ -1260,19 +1307,22 @@ parseTableRow cols = try $ do
guard $ cells' /= [mempty]
-- note: a & b in a three-column table leaves an empty 3rd cell:
let cells'' = cells' ++ replicate (cols - numcells) mempty
- spaces
+ spaces'
return cells''
-simpTable :: LP Blocks
-simpTable = try $ do
- spaces
+spaces' :: LP ()
+spaces' = spaces *> skipMany (comment *> spaces)
+
+simpTable :: Bool -> LP Blocks
+simpTable hasWidthParameter = try $ do
+ when hasWidthParameter $ () <$ (spaces' >> tok)
+ skipopts
aligns <- parseAligns
let cols = length aligns
optional hline
header' <- option [] $ try (parseTableRow cols <* lbreak <* hline)
rows <- sepEndBy (parseTableRow cols) (lbreak <* optional hline)
- spaces
- skipMany (comment *> spaces)
+ spaces'
let header'' = if null header'
then replicate cols mempty
else header'
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 02a787670..5e0cef4f8 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7
+{-# LANGUAGE ScopedTypeVariables #-}
{-
Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
@@ -31,11 +32,11 @@ Conversion of markdown-formatted plain text to 'Pandoc' document.
module Text.Pandoc.Readers.Markdown ( readMarkdown,
readMarkdownWithWarnings ) where
-import Data.List ( transpose, sortBy, findIndex, intersperse, intercalate )
+import Data.List ( transpose, sortBy, intersperse, intercalate, elemIndex)
import qualified Data.Map as M
import Data.Scientific (coefficient, base10Exponent)
import Data.Ord ( comparing )
-import Data.Char ( isAlphaNum, toLower )
+import Data.Char ( isSpace, isAlphaNum, toLower )
import Data.Maybe
import Text.Pandoc.Definition
import qualified Data.Text as T
@@ -55,38 +56,41 @@ import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag,
isTextTag, isCommentTag )
import Data.Monoid (mconcat, mempty)
-import Control.Applicative ((<$>), (<*), (*>), (<$))
+import Control.Applicative ((<$>), (<*), (*>), (<$), (<*>))
import Control.Monad
+import Control.Monad.Reader
import System.FilePath (takeExtension, addExtension)
import Text.HTML.TagSoup
import Text.HTML.TagSoup.Match (tagOpen)
import qualified Data.Set as Set
import Text.Printf (printf)
import Debug.Trace (trace)
+import Text.Pandoc.Error
-type MarkdownParser = Parser [Char] ParserState
+type MarkdownParser a = ParserT [Char] ParserState (Reader ParserState) a
-- | Read markdown from an input string and return a Pandoc document.
readMarkdown :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Pandoc
+ -> Either PandocError Pandoc
readMarkdown opts s =
- (readWith parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n")
+ runMarkdown opts s parseMarkdown
-- | Read markdown from an input string and return a pair of a Pandoc document
-- and a list of warnings.
readMarkdownWithWarnings :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> (Pandoc, [String])
-readMarkdownWithWarnings opts s =
- (readWith parseMarkdownWithWarnings) def{ stateOptions = opts } (s ++ "\n\n")
- where parseMarkdownWithWarnings = do
- doc <- parseMarkdown
- warnings <- stateWarnings <$> getState
- return (doc, warnings)
-
-trimInlinesF :: F Inlines -> F Inlines
-trimInlinesF = liftM trimInlines
+ -> Either PandocError (Pandoc, [String])
+readMarkdownWithWarnings opts s = runMarkdown opts s (returnWarnings parseMarkdown)
+
+runMarkdown :: forall a . ReaderOptions -> String -> MarkdownParser a -> Either PandocError a
+runMarkdown opts inp p = fst <$> res
+ where
+ imd = readWithM (returnState p) def{ stateOptions = opts } (inp ++ "\n\n")
+ res :: Either PandocError (a, ParserState)
+ res = runReader imd s
+ s :: ParserState
+ s = either def snd res
--
-- Constants and data structure definitions
@@ -117,10 +121,16 @@ isBlank _ = False
-- auxiliary functions
--
-isNull :: F Inlines -> Bool
-isNull ils = B.isNull $ runF ils def
+-- | Succeeds when we're in list context.
+inList :: MarkdownParser ()
+inList = do
+ ctx <- stateParserContext <$> getState
+ guard (ctx == ListItemState)
+
+isNull :: Inlines -> Bool
+isNull = B.isNull
-spnl :: Parser [Char] st ()
+spnl :: Monad m => ParserT [Char] st m ()
spnl = try $ do
skipSpaces
optional newline
@@ -160,9 +170,9 @@ litChar = escapedChar'
-- | Parse a sequence of inline elements between square brackets,
-- including inlines between balanced pairs of square brackets.
-inlinesInBalancedBrackets :: MarkdownParser (F Inlines)
+inlinesInBalancedBrackets :: MarkdownParser Inlines
inlinesInBalancedBrackets = charsInBalancedBrackets >>=
- parseFromString (trimInlinesF . mconcat <$> many inline)
+ parseFromString (trimInlines . mconcat <$> many inline)
charsInBalancedBrackets :: MarkdownParser [Char]
charsInBalancedBrackets = do
@@ -179,16 +189,16 @@ charsInBalancedBrackets = do
-- document structure
--
-titleLine :: MarkdownParser (F Inlines)
+titleLine :: MarkdownParser Inlines
titleLine = try $ do
char '%'
skipSpaces
res <- many $ (notFollowedBy newline >> inline)
<|> try (endline >> whitespace)
newline
- return $ trimInlinesF $ mconcat res
+ return $ trimInlines $ mconcat res
-authorsLine :: MarkdownParser (F [Inlines])
+authorsLine :: MarkdownParser [Inlines]
authorsLine = try $ do
char '%'
skipSpaces
@@ -197,13 +207,13 @@ authorsLine = try $ do
(char ';' <|>
try (newline >> notFollowedBy blankline >> spaceChar))
newline
- return $ sequence $ filter (not . isNull) $ map (trimInlinesF . mconcat) authors
+ return $ filter (not . isNull) $ map (trimInlines . mconcat) authors
-dateLine :: MarkdownParser (F Inlines)
+dateLine :: MarkdownParser Inlines
dateLine = try $ do
char '%'
skipSpaces
- trimInlinesF . mconcat <$> manyTill inline newline
+ trimInlines . mconcat <$> manyTill inline newline
titleBlock :: MarkdownParser ()
titleBlock = pandocTitleBlock <|> mmdTitleBlock
@@ -213,20 +223,16 @@ pandocTitleBlock = try $ do
guardEnabled Ext_pandoc_title_block
lookAhead (char '%')
title <- option mempty titleLine
- author <- option (return []) authorsLine
+ author <- option [] authorsLine
date <- option mempty dateLine
optional blanklines
- let meta' = do title' <- title
- author' <- author
- date' <- date
- return $
- (if B.isNull title' then id else B.setMeta "title" title')
- . (if null author' then id else B.setMeta "author" author')
- . (if B.isNull date' then id else B.setMeta "date" date')
- $ nullMeta
- updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
-
-yamlMetaBlock :: MarkdownParser (F Blocks)
+ let meta' = (if B.isNull title then id else B.setMeta "title" title)
+ . (if null author then id else B.setMeta "author" author)
+ . (if B.isNull date then id else B.setMeta "date" date)
+ $ nullMeta
+ updateState $ \st -> st{ stateMeta = stateMeta st <> meta' }
+
+yamlMetaBlock :: MarkdownParser Blocks
yamlMetaBlock = try $ do
guardEnabled Ext_yaml_metadata_block
pos <- getPosition
@@ -239,17 +245,18 @@ yamlMetaBlock = try $ do
optional blanklines
opts <- stateOptions <$> getState
meta' <- case Yaml.decodeEither' $ UTF8.fromString rawYaml of
- Right (Yaml.Object hashmap) -> return $ return $
+ Right (Yaml.Object hashmap) -> return $
H.foldrWithKey (\k v m ->
if ignorable k
then m
- else B.setMeta (T.unpack k)
- (yamlToMeta opts v) m)
+ else case yamlToMeta opts v of
+ Left _ -> m
+ Right v' -> B.setMeta (T.unpack k) v' m)
nullMeta hashmap
- Right Yaml.Null -> return $ return nullMeta
+ Right Yaml.Null -> return nullMeta
Right _ -> do
addWarning (Just pos) "YAML header is not an object"
- return $ return nullMeta
+ return nullMeta
Left err' -> do
case err' of
InvalidYaml (Just YamlParseException{
@@ -268,41 +275,50 @@ yamlMetaBlock = try $ do
_ -> addWarning (Just pos)
$ "Could not parse YAML header: " ++
show err'
- return $ return nullMeta
- updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
+ return nullMeta
+ updateState $ \st -> st{ stateMeta = stateMeta st <> meta' }
return mempty
-- ignore fields ending with _
ignorable :: Text -> Bool
-ignorable t = (T.pack "_") `T.isSuffixOf` t
-
-toMetaValue :: ReaderOptions -> Text -> MetaValue
-toMetaValue opts x =
- case readMarkdown opts (T.unpack x) of
- Pandoc _ [Plain xs] -> MetaInlines xs
- Pandoc _ [Para xs]
+ignorable t = T.pack "_" `T.isSuffixOf` t
+
+toMetaValue :: ReaderOptions -> Text -> Either PandocError MetaValue
+toMetaValue opts x = toMeta <$> readMarkdown opts' (T.unpack x)
+ where
+ toMeta p =
+ case p of
+ Pandoc _ [Plain xs] -> MetaInlines xs
+ Pandoc _ [Para xs]
| endsWithNewline x -> MetaBlocks [Para xs]
| otherwise -> MetaInlines xs
- Pandoc _ bs -> MetaBlocks bs
- where endsWithNewline t = (T.pack "\n") `T.isSuffixOf` t
-
-yamlToMeta :: ReaderOptions -> Yaml.Value -> MetaValue
+ Pandoc _ bs -> MetaBlocks bs
+ endsWithNewline t = T.pack "\n" `T.isSuffixOf` t
+ opts' = opts{readerExtensions=readerExtensions opts `Set.difference` meta_exts}
+ meta_exts = Set.fromList [ Ext_pandoc_title_block
+ , Ext_mmd_title_block
+ , Ext_yaml_metadata_block
+ ]
+
+yamlToMeta :: ReaderOptions -> Yaml.Value -> Either PandocError MetaValue
yamlToMeta opts (Yaml.String t) = toMetaValue opts t
yamlToMeta _ (Yaml.Number n)
-- avoid decimal points for numbers that don't need them:
- | base10Exponent n >= 0 = MetaString $ show
+ | base10Exponent n >= 0 = return $ MetaString $ show
$ coefficient n * (10 ^ base10Exponent n)
- | otherwise = MetaString $ show n
-yamlToMeta _ (Yaml.Bool b) = MetaBool b
-yamlToMeta opts (Yaml.Array xs) = B.toMetaValue $ map (yamlToMeta opts)
- $ V.toList xs
-yamlToMeta opts (Yaml.Object o) = MetaMap $ H.foldrWithKey (\k v m ->
+ | otherwise = return $ MetaString $ show n
+yamlToMeta _ (Yaml.Bool b) = return $ MetaBool b
+yamlToMeta opts (Yaml.Array xs) = B.toMetaValue <$> mapM (yamlToMeta opts)
+ (V.toList xs)
+yamlToMeta opts (Yaml.Object o) = MetaMap <$> H.foldrWithKey (\k v m ->
if ignorable k
then m
- else M.insert (T.unpack k)
- (yamlToMeta opts v) m)
- M.empty o
-yamlToMeta _ _ = MetaString ""
+ else (do
+ v' <- yamlToMeta opts v
+ m' <- m
+ return (M.insert (T.unpack k) v' m')))
+ (return M.empty) o
+yamlToMeta _ _ = return $ MetaString ""
stopLine :: MarkdownParser ()
stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
@@ -312,17 +328,21 @@ mmdTitleBlock = try $ do
guardEnabled Ext_mmd_title_block
kvPairs <- many1 kvPair
blanklines
- updateState $ \st -> st{ stateMeta' = stateMeta' st <>
- return (Meta $ M.fromList kvPairs) }
+ updateState $ \st -> st{ stateMeta = stateMeta st <>
+ (Meta $ M.fromList kvPairs) }
kvPair :: MarkdownParser (String, MetaValue)
kvPair = try $ do
key <- many1Till (alphaNum <|> oneOf "_- ") (char ':')
+ skipMany1 spaceNoNewline
val <- manyTill anyChar
(try $ newline >> lookAhead (blankline <|> nonspaceChar))
+ guard $ not . null . trim $ val
let key' = concat $ words $ map toLower key
let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ trim val
return (key',val')
+ where
+ spaceNoNewline = satisfy (\x -> isSpace x && (x/='\n') && (x/='\r'))
parseMarkdown :: MarkdownParser Pandoc
parseMarkdown = do
@@ -333,17 +353,11 @@ parseMarkdown = do
optional titleBlock
blocks <- parseBlocks
st <- getState
- let meta = runF (stateMeta' st) st
- let Pandoc _ bs = B.doc $ runF blocks st
+ let meta = stateMeta st
+ let Pandoc _ bs = B.doc blocks
return $ Pandoc meta bs
-addWarning :: Maybe SourcePos -> String -> MarkdownParser ()
-addWarning mbpos msg =
- updateState $ \st -> st{
- stateWarnings = (msg ++ maybe "" (\pos -> " " ++ show pos) mbpos) :
- stateWarnings st }
-
-referenceKey :: MarkdownParser (F Blocks)
+referenceKey :: MarkdownParser Blocks
referenceKey = try $ do
pos <- getPosition
skipNonindentSpaces
@@ -370,7 +384,7 @@ referenceKey = try $ do
Just _ -> addWarning (Just pos) $ "Duplicate link reference `" ++ raw ++ "'"
Nothing -> return ()
updateState $ \s -> s { stateKeys = M.insert key target oldkeys }
- return $ return mempty
+ return mempty
referenceTitle :: MarkdownParser String
referenceTitle = try $ do
@@ -390,7 +404,7 @@ quotedTitle c = try $ do
-- | PHP Markdown Extra style abbreviation key. Currently
-- we just skip them, since Pandoc doesn't have an element for
-- an abbreviation.
-abbrevKey :: MarkdownParser (F Blocks)
+abbrevKey :: MarkdownParser Blocks
abbrevKey = do
guardEnabled Ext_abbreviations
try $ do
@@ -399,7 +413,7 @@ abbrevKey = do
char ':'
skipMany (satisfy (/= '\n'))
blanklines
- return $ return mempty
+ return mempty
noteMarker :: MarkdownParser String
noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']')
@@ -417,7 +431,7 @@ rawLines = do
rest <- many rawLine
return $ unlines (first:rest)
-noteBlock :: MarkdownParser (F Blocks)
+noteBlock :: MarkdownParser Blocks
noteBlock = try $ do
pos <- getPosition
skipNonindentSpaces
@@ -429,7 +443,7 @@ noteBlock = try $ do
rest <- many $ try $ blanklines >> indentSpaces >> rawLines
let raw = unlines (first:rest) ++ "\n"
optional blanklines
- parsed <- parseFromString parseBlocks raw
+ parsed <- parseFromString (inFootnote parseBlocks) raw
let newnote = (ref, parsed)
oldnotes <- stateNotes' <$> getState
case lookup ref oldnotes of
@@ -438,32 +452,40 @@ noteBlock = try $ do
updateState $ \s -> s { stateNotes' = newnote : oldnotes }
return mempty
+inFootnote :: MarkdownParser a -> MarkdownParser a
+inFootnote p = do
+ st <- stateInFootnote <$> getState
+ updateState (\s -> s { stateInFootnote = True } )
+ r <- p
+ updateState (\s -> s { stateInFootnote = st } )
+ return r
+
--
-- parsing blocks
--
-parseBlocks :: MarkdownParser (F Blocks)
+parseBlocks :: MarkdownParser Blocks
parseBlocks = mconcat <$> manyTill block eof
-block :: MarkdownParser (F Blocks)
+block :: MarkdownParser Blocks
block = do
tr <- getOption readerTrace
pos <- getPosition
res <- choice [ mempty <$ blanklines
, codeBlockFenced
, yamlMetaBlock
- , guardEnabled Ext_latex_macros *> (macro >>= return . return)
+ , guardEnabled Ext_latex_macros *> macro
-- note: bulletList needs to be before header because of
-- the possibility of empty list items: -
, bulletList
, header
, lhsCodeBlock
- , rawTeXBlock
, divHtml
, htmlBlock
, table
- , lineBlock
, codeBlockIndented
+ , rawTeXBlock
+ , lineBlock
, blockQuote
, hrule
, orderedList
@@ -474,29 +496,28 @@ block = do
, para
, plain
] <?> "block"
- when tr $ do
- st <- getState
+ when tr $
trace (printf "line %d: %s" (sourceLine pos)
- (take 60 $ show $ B.toList $ runF res st)) (return ())
+ (take 60 . show . B.toList $ res)) (return ())
return res
--
-- header blocks
--
-header :: MarkdownParser (F Blocks)
+header :: MarkdownParser Blocks
header = setextHeader <|> atxHeader <?> "header"
-atxHeader :: MarkdownParser (F Blocks)
+atxHeader :: MarkdownParser Blocks
atxHeader = try $ do
- level <- many1 (char '#') >>= return . length
+ level <- length <$> many1 (char '#')
notFollowedBy $ guardEnabled Ext_fancy_lists >>
(char '.' <|> char ')') -- this would be a list
skipSpaces
- text <- trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline)
+ text <- trimInlines . mconcat <$> many (notFollowedBy atxClosing >> inline)
attr <- atxClosing
- attr' <- registerHeader attr (runF text defaultParserState)
- return $ B.headerWith attr' level <$> text
+ attr' <- registerHeader attr text
+ return $ B.headerWith attr' level text
atxClosing :: MarkdownParser Attr
atxClosing = try $ do
@@ -523,25 +544,25 @@ mmdHeaderIdentifier = do
skipSpaces
return (ident,[],[])
-setextHeader :: MarkdownParser (F Blocks)
+setextHeader :: MarkdownParser Blocks
setextHeader = try $ do
-- This lookahead prevents us from wasting time parsing Inlines
-- unless necessary -- it gives a significant performance boost.
lookAhead $ anyLine >> many1 (oneOf setextHChars) >> blankline
- text <- trimInlinesF . mconcat <$> many1 (notFollowedBy setextHeaderEnd >> inline)
+ text <- trimInlines . mconcat <$> many1 (notFollowedBy setextHeaderEnd >> inline)
attr <- setextHeaderEnd
underlineChar <- oneOf setextHChars
many (char underlineChar)
blanklines
- let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1
- attr' <- registerHeader attr (runF text defaultParserState)
- return $ B.headerWith attr' level <$> text
+ let level = (fromMaybe 0 $ elemIndex underlineChar setextHChars) + 1
+ attr' <- registerHeader attr text
+ return $ B.headerWith attr' level text
--
-- hrule block
--
-hrule :: Parser [Char] st (F Blocks)
+hrule :: Monad m => ParserT [Char] st m Blocks
hrule = try $ do
skipSpaces
start <- satisfy isHruleChar
@@ -549,24 +570,24 @@ hrule = try $ do
skipMany (spaceChar <|> char start)
newline
optional blanklines
- return $ return B.horizontalRule
+ return B.horizontalRule
--
-- code blocks
--
indentedLine :: MarkdownParser String
-indentedLine = indentSpaces >> anyLine >>= return . (++ "\n")
+indentedLine = indentSpaces >> ((++ "\n") <$> anyLine)
-blockDelimiter :: (Char -> Bool)
+blockDelimiter :: Monad m
+ => (Char -> Bool)
-> Maybe Int
- -> Parser [Char] st Int
+ -> ParserT [Char] st m Int
blockDelimiter f len = try $ do
c <- lookAhead (satisfy f)
case len of
Just l -> count l (char c) >> many (char c) >> return l
- Nothing -> count 3 (char c) >> many (char c) >>=
- return . (+ 3) . length
+ Nothing -> count 3 (char c) >> ((+ 3) . length <$> many (char c))
attributes :: MarkdownParser Attr
attributes = try $ do
@@ -611,7 +632,7 @@ specialAttr = do
char '-'
return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs)
-codeBlockFenced :: MarkdownParser (F Blocks)
+codeBlockFenced :: MarkdownParser Blocks
codeBlockFenced = try $ do
c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~'))
<|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`'))
@@ -623,7 +644,7 @@ codeBlockFenced = try $ do
blankline
contents <- manyTill anyLine (blockDelimiter (== c) (Just size))
blanklines
- return $ return $ B.codeBlockWith attr $ intercalate "\n" contents
+ return $ B.codeBlockWith attr $ intercalate "\n" contents
-- correctly handle github language identifiers
toLanguageId :: String -> String
@@ -632,7 +653,7 @@ toLanguageId = map toLower . go
go "objective-c" = "objectivec"
go x = x
-codeBlockIndented :: MarkdownParser (F Blocks)
+codeBlockIndented :: MarkdownParser Blocks
codeBlockIndented = do
contents <- many1 (indentedLine <|>
try (do b <- blanklines
@@ -640,15 +661,15 @@ codeBlockIndented = do
return $ b ++ l))
optional blanklines
classes <- getOption readerIndentedCodeClasses
- return $ return $ B.codeBlockWith ("", classes, []) $
+ return $ B.codeBlockWith ("", classes, []) $
stripTrailingNewlines $ concat contents
-lhsCodeBlock :: MarkdownParser (F Blocks)
+lhsCodeBlock :: MarkdownParser Blocks
lhsCodeBlock = do
guardEnabled Ext_literate_haskell
- (return . B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
+ (B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
(lhsCodeBlockBird <|> lhsCodeBlockLaTeX))
- <|> (return . B.codeBlockWith ("",["sourceCode","haskell"],[]) <$>
+ <|> (B.codeBlockWith ("",["sourceCode","haskell"],[]) <$>
lhsCodeBlockInverseBird)
lhsCodeBlockLaTeX :: MarkdownParser String
@@ -677,7 +698,7 @@ lhsCodeBlockBirdWith c = try $ do
blanklines
return $ intercalate "\n" lns'
-birdTrackLine :: Char -> Parser [Char] st String
+birdTrackLine :: Monad m => Char -> ParserT [Char] st m String
birdTrackLine c = try $ do
char c
-- allow html tags on left margin:
@@ -705,12 +726,12 @@ emailBlockQuote = try $ do
optional blanklines
return raw
-blockQuote :: MarkdownParser (F Blocks)
+blockQuote :: MarkdownParser Blocks
blockQuote = do
raw <- emailBlockQuote
-- parse the extracted block, which may contain various block elements:
- contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n"
- return $ B.blockQuote <$> contents
+ contents <- parseFromString parseBlocks $ intercalate "\n" raw ++ "\n\n"
+ return $ B.blockQuote contents
--
-- list blocks
@@ -735,9 +756,9 @@ anyOrderedListStart = try $ do
skipNonindentSpaces
notFollowedBy $ string "p." >> spaceChar >> digit -- page number
res <- do guardDisabled Ext_fancy_lists
- many1 digit
+ start <- many1 digit >>= safeRead
char '.'
- return (1, DefaultStyle, DefaultDelim)
+ return (start, DefaultStyle, DefaultDelim)
<|> do (num, style, delim) <- anyOrderedListMarker
-- if it could be an abbreviated first name,
-- insist on more than one space
@@ -753,7 +774,7 @@ anyOrderedListStart = try $ do
return res
listStart :: MarkdownParser ()
-listStart = bulletListStart <|> (anyOrderedListStart >> return ())
+listStart = bulletListStart <|> void anyOrderedListStart
listLine :: MarkdownParser String
listLine = try $ do
@@ -808,7 +829,7 @@ listContinuationLine = try $ do
return $ result ++ "\n"
listItem :: MarkdownParser a
- -> MarkdownParser (F Blocks)
+ -> MarkdownParser Blocks
listItem start = try $ do
first <- rawListItem start
continuations <- many listContinuation
@@ -824,14 +845,14 @@ listItem start = try $ do
updateState (\st -> st {stateParserContext = oldContext})
return contents
-orderedList :: MarkdownParser (F Blocks)
+orderedList :: MarkdownParser Blocks
orderedList = try $ do
(start, style, delim) <- lookAhead anyOrderedListStart
unless (style `elem` [DefaultStyle, Decimal, Example] &&
delim `elem` [DefaultDelim, Period]) $
guardEnabled Ext_fancy_lists
when (style == Example) $ guardEnabled Ext_example_lists
- items <- fmap sequence $ many1 $ listItem
+ items <- many1 $ listItem
( try $ do
optional newline -- if preceded by Plain block in a list
startpos <- sourceColumn <$> getPosition
@@ -843,12 +864,12 @@ orderedList = try $ do
atMostSpaces (tabStop - (endpos - startpos))
return res )
start' <- option 1 $ guardEnabled Ext_startnum >> return start
- return $ B.orderedListWith (start', style, delim) <$> fmap compactify' items
+ return $ B.orderedListWith (start', style, delim) (compactify' items)
-bulletList :: MarkdownParser (F Blocks)
+bulletList :: MarkdownParser Blocks
bulletList = do
- items <- fmap sequence $ many1 $ listItem bulletListStart
- return $ B.bulletList <$> fmap compactify' items
+ items <- many1 $ listItem bulletListStart
+ return $ B.bulletList (compactify' items)
-- definition lists
@@ -863,14 +884,14 @@ defListMarker = do
else mzero
return ()
-definitionListItem :: Bool -> MarkdownParser (F (Inlines, [Blocks]))
+definitionListItem :: Bool -> MarkdownParser (Inlines, [Blocks])
definitionListItem compact = try $ do
rawLine' <- anyLine
raw <- many1 $ defRawBlock compact
- term <- parseFromString (trimInlinesF . mconcat <$> many inline) rawLine'
+ term <- parseFromString (trimInlines . mconcat <$> many inline) rawLine'
contents <- mapM (parseFromString parseBlocks) raw
optional blanklines
- return $ liftM2 (,) term (sequence contents)
+ return (term, contents)
defRawBlock :: Bool -> MarkdownParser String
defRawBlock compact = try $ do
@@ -893,32 +914,32 @@ defRawBlock compact = try $ do
return $ trimr (firstline ++ "\n" ++ unlines rawlines ++ cont) ++
if hasBlank || not (null cont) then "\n\n" else ""
-definitionList :: MarkdownParser (F Blocks)
+definitionList :: MarkdownParser Blocks
definitionList = try $ do
lookAhead (anyLine >> optional blankline >> defListMarker)
compactDefinitionList <|> normalDefinitionList
-compactDefinitionList :: MarkdownParser (F Blocks)
+compactDefinitionList :: MarkdownParser Blocks
compactDefinitionList = do
guardEnabled Ext_compact_definition_lists
- items <- fmap sequence $ many1 $ definitionListItem True
- return $ B.definitionList <$> fmap compactify'DL items
+ items <- many1 $ definitionListItem True
+ return $ B.definitionList (compactify'DL items)
-normalDefinitionList :: MarkdownParser (F Blocks)
+normalDefinitionList :: MarkdownParser Blocks
normalDefinitionList = do
guardEnabled Ext_definition_lists
- items <- fmap sequence $ many1 $ definitionListItem False
- return $ B.definitionList <$> items
+ items <- many1 $ definitionListItem False
+ return $ B.definitionList items
--
-- paragraph block
--
-para :: MarkdownParser (F Blocks)
+para :: MarkdownParser Blocks
para = try $ do
exts <- getOption readerExtensions
- result <- trimInlinesF . mconcat <$> many1 inline
- option (B.plain <$> result)
+ result <- trimInlines . mconcat <$> many1 inline
+ option (B.plain result)
$ try $ do
newline
(blanklines >> return mempty)
@@ -926,6 +947,8 @@ para = try $ do
<|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced)
<|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header)
<|> (guardEnabled Ext_lists_without_preceding_blankline >>
+ -- Avoid creating a paragraph in a nested list.
+ notFollowedBy' inList >>
() <$ lookAhead listStart)
<|> do guardEnabled Ext_native_divs
inHtmlBlock <- stateInHtmlBlock <$> getState
@@ -933,18 +956,17 @@ para = try $ do
Just "div" -> () <$
lookAhead (htmlTag (~== TagClose "div"))
_ -> mzero
- return $ do
- result' <- result
- case B.toList result' of
+ return $
+ case B.toList result of
[Image alt (src,tit)]
| Ext_implicit_figures `Set.member` exts ->
-- the fig: at beginning of title indicates a figure
- return $ B.para $ B.singleton
+ B.para $ B.singleton
$ Image alt (src,'f':'i':'g':':':tit)
- _ -> return $ B.para result'
+ _ -> B.para result
-plain :: MarkdownParser (F Blocks)
-plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline
+plain :: MarkdownParser Blocks
+plain = B.plain . trimInlines . mconcat <$> many1 inline
--
-- raw html
@@ -955,13 +977,13 @@ htmlElement = rawVerbatimBlock
<|> strictHtmlBlock
<|> liftM snd (htmlTag isBlockTag)
-htmlBlock :: MarkdownParser (F Blocks)
+htmlBlock :: MarkdownParser Blocks
htmlBlock = do
guardEnabled Ext_raw_html
try (do
(TagOpen t attrs) <- lookAhead $ fst <$> htmlTag isBlockTag
(guard (t `elem` ["pre","style","script"]) >>
- (return . B.rawBlock "html") <$> rawVerbatimBlock)
+ B.rawBlock "html" <$> rawVerbatimBlock)
<|> (do guardEnabled Ext_markdown_attribute
oldMarkdownAttribute <- stateMarkdownAttribute <$> getState
markdownAttribute <-
@@ -980,35 +1002,35 @@ htmlBlock = do
<|> (guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks))
<|> htmlBlock'
-htmlBlock' :: MarkdownParser (F Blocks)
+htmlBlock' :: MarkdownParser Blocks
htmlBlock' = try $ do
first <- htmlElement
skipMany spaceChar
optional blanklines
- return $ return $ B.rawBlock "html" first
+ return $ B.rawBlock "html" first
strictHtmlBlock :: MarkdownParser String
strictHtmlBlock = htmlInBalanced (not . isInlineTag)
rawVerbatimBlock :: MarkdownParser String
rawVerbatimBlock = try $ do
- (TagOpen tag _, open) <- htmlTag (tagOpen (flip elem
- ["pre", "style", "script"])
- (const True))
+ (TagOpen tag _, open) <-
+ htmlTag (tagOpen (`elem` ["pre", "style", "script"])
+ (const True))
contents <- manyTill anyChar (htmlTag (~== TagClose tag))
return $ open ++ contents ++ renderTags' [TagClose tag]
-rawTeXBlock :: MarkdownParser (F Blocks)
+rawTeXBlock :: MarkdownParser Blocks
rawTeXBlock = do
guardEnabled Ext_raw_tex
result <- (B.rawBlock "latex" . concat <$>
- rawLaTeXBlock `sepEndBy1` blankline)
+ generalize rawLaTeXBlock `sepEndBy1` blankline)
<|> (B.rawBlock "context" . concat <$>
rawConTeXtEnvironment `sepEndBy1` blankline)
spaces
- return $ return result
+ return result
-rawHtmlBlocks :: MarkdownParser (F Blocks)
+rawHtmlBlocks :: MarkdownParser Blocks
rawHtmlBlocks = do
(TagOpen tagtype _, raw) <- htmlTag isBlockTag
-- try to find closing tag
@@ -1020,10 +1042,10 @@ rawHtmlBlocks = do
contents <- mconcat <$> many (notFollowedBy' closer >> block)
result <-
(closer >>= \(_, rawcloser) -> return (
- return (B.rawBlock "html" $ stripMarkdownAttribute raw) <>
+ (B.rawBlock "html" $ stripMarkdownAttribute raw) <>
contents <>
- return (B.rawBlock "html" rawcloser)))
- <|> return (return (B.rawBlock "html" raw) <> contents)
+ (B.rawBlock "html" rawcloser)))
+ <|> return (B.rawBlock "html" raw <> contents)
updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
return result
@@ -1038,12 +1060,12 @@ stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s
-- line block
--
-lineBlock :: MarkdownParser (F Blocks)
+lineBlock :: MarkdownParser Blocks
lineBlock = try $ do
guardEnabled Ext_line_blocks
lines' <- lineBlockLines >>=
- mapM (parseFromString (trimInlinesF . mconcat <$> many inline))
- return $ B.para <$> (mconcat $ intersperse (return B.linebreak) lines')
+ mapM (parseFromString (trimInlines . mconcat <$> many inline))
+ return $ B.para (mconcat $ intersperse B.linebreak lines')
--
-- Tables
@@ -1051,17 +1073,19 @@ lineBlock = try $ do
-- Parse a dashed line with optional trailing spaces; return its length
-- and the length including trailing space.
-dashedLine :: Char
- -> Parser [Char] st (Int, Int)
+dashedLine :: Monad m => Char
+ -> ParserT [Char] st m (Int, Int)
dashedLine ch = do
dashes <- many1 (char ch)
sp <- many spaceChar
- return $ (length dashes, length $ dashes ++ sp)
+ let lengthDashes = length dashes
+ lengthSp = length sp
+ return (lengthDashes, lengthDashes + lengthSp)
-- Parse a table header with dashed lines of '-' preceded by
-- one (or zero) line of text.
simpleTableHeader :: Bool -- ^ Headerless table
- -> MarkdownParser (F [Blocks], [Alignment], [Int])
+ -> MarkdownParser ([Blocks], [Alignment], [Int])
simpleTableHeader headless = try $ do
rawContent <- if headless
then return ""
@@ -1080,9 +1104,8 @@ simpleTableHeader headless = try $ do
let rawHeads' = if headless
then replicate (length dashes) ""
else rawHeads
- heads <- fmap sequence
- $ mapM (parseFromString (mconcat <$> many plain))
- $ map trim rawHeads'
+ heads <-
+ mapM (parseFromString (mconcat <$> many plain) . trim) rawHeads'
return (heads, aligns, indices)
-- Returns an alignment type for a table, based on a list of strings
@@ -1123,30 +1146,30 @@ rawTableLine indices = do
-- Parse a table line and return a list of lists of blocks (columns).
tableLine :: [Int]
- -> MarkdownParser (F [Blocks])
+ -> MarkdownParser [Blocks]
tableLine indices = rawTableLine indices >>=
- fmap sequence . mapM (parseFromString (mconcat <$> many plain))
+ mapM (parseFromString (mconcat <$> many plain))
-- Parse a multiline table row and return a list of blocks (columns).
multilineRow :: [Int]
- -> MarkdownParser (F [Blocks])
+ -> MarkdownParser [Blocks]
multilineRow indices = do
colLines <- many1 (rawTableLine indices)
let cols = map unlines $ transpose colLines
- fmap sequence $ mapM (parseFromString (mconcat <$> many plain)) cols
+ mapM (parseFromString (mconcat <$> many plain)) cols
-- Parses a table caption: inlines beginning with 'Table:'
-- and followed by blank lines.
-tableCaption :: MarkdownParser (F Inlines)
+tableCaption :: MarkdownParser Inlines
tableCaption = try $ do
guardEnabled Ext_table_captions
skipNonindentSpaces
string ":" <|> string "Table:"
- trimInlinesF . mconcat <$> many1 inline <* blanklines
+ trimInlines . mconcat <$> many1 inline <* blanklines
-- Parse a simple table with '---' header and one line per row.
simpleTable :: Bool -- ^ Headerless table
- -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]])
+ -> MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]])
simpleTable headless = do
(aligns, _widths, heads', lines') <-
tableWith (simpleTableHeader headless) tableLine
@@ -1160,12 +1183,12 @@ simpleTable headless = do
-- which may be multiline, separated by blank lines, and
-- ending with a footer (dashed line followed by blank line).
multilineTable :: Bool -- ^ Headerless table
- -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]])
+ -> MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]])
multilineTable headless =
tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter
multilineTableHeader :: Bool -- ^ Headerless table
- -> MarkdownParser (F [Blocks], [Alignment], [Int])
+ -> MarkdownParser ([Blocks], [Alignment], [Int])
multilineTableHeader headless = try $ do
unless headless $
tableSep >> notFollowedBy blankline
@@ -1187,7 +1210,7 @@ multilineTableHeader headless = try $ do
let rawHeads = if headless
then replicate (length dashes) ""
else map (unlines . map trim) rawHeadsList
- heads <- fmap sequence $
+ heads <-
mapM (parseFromString (mconcat <$> many plain)) $
map trim rawHeads
return (heads, aligns, indices)
@@ -1197,7 +1220,7 @@ multilineTableHeader headless = try $ do
-- which may be grid, separated by blank lines, and
-- ending with a footer (dashed line followed by blank line).
gridTable :: Bool -- ^ Headerless table
- -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]])
+ -> MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]])
gridTable headless =
tableWith (gridTableHeader headless) gridTableRow
(gridTableSep '-') gridTableFooter
@@ -1206,13 +1229,14 @@ gridTableSplitLine :: [Int] -> String -> [String]
gridTableSplitLine indices line = map removeFinalBar $ tail $
splitStringByIndices (init indices) $ trimr line
-gridPart :: Char -> Parser [Char] st (Int, Int)
+gridPart :: Monad m => Char -> ParserT [Char] st m (Int, Int)
gridPart ch = do
dashes <- many1 (char ch)
char '+'
- return (length dashes, length dashes + 1)
+ let lengthDashes = length dashes
+ return (lengthDashes, lengthDashes + 1)
-gridDashedLines :: Char -> Parser [Char] st [(Int,Int)]
+gridDashedLines :: Monad m => Char -> ParserT [Char] st m [(Int,Int)]
gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
removeFinalBar :: String -> String
@@ -1225,7 +1249,7 @@ gridTableSep ch = try $ gridDashedLines ch >> return '\n'
-- | Parse header for a grid table.
gridTableHeader :: Bool -- ^ Headerless table
- -> MarkdownParser (F [Blocks], [Alignment], [Int])
+ -> MarkdownParser ([Blocks], [Alignment], [Int])
gridTableHeader headless = try $ do
optional blanklines
dashes <- gridDashedLines '-'
@@ -1234,9 +1258,7 @@ gridTableHeader headless = try $ do
else many1
(notFollowedBy (gridTableSep '=') >> char '|' >>
many1Till anyChar newline)
- if headless
- then return ()
- else gridTableSep '=' >> return ()
+ unless headless (void $ gridTableSep '=')
let lines' = map snd dashes
let indices = scanl (+) 0 lines'
let aligns = replicate (length lines') AlignDefault
@@ -1245,7 +1267,7 @@ gridTableHeader headless = try $ do
then replicate (length dashes) ""
else map (unlines . map trim) $ transpose
$ map (gridTableSplitLine indices) rawContent
- heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads
+ heads <- mapM (parseFromString parseBlocks . trim) rawHeads
return (heads, aligns, indices)
gridTableRawLine :: [Int] -> MarkdownParser [String]
@@ -1256,12 +1278,12 @@ gridTableRawLine indices = do
-- | Parse row of grid table.
gridTableRow :: [Int]
- -> MarkdownParser (F [Blocks])
+ -> MarkdownParser [Blocks]
gridTableRow indices = do
colLines <- many1 (gridTableRawLine indices)
let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $
transpose colLines
- fmap compactify' <$> fmap sequence (mapM (parseFromString parseBlocks) cols)
+ compactify' <$> mapM (parseFromString parseBlocks) cols
removeOneLeadingSpace :: [String] -> [String]
removeOneLeadingSpace xs =
@@ -1287,16 +1309,12 @@ pipeBreak = try $ do
blankline
return (first:rest)
-pipeTable :: MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]])
+pipeTable :: MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]])
pipeTable = try $ do
- (heads,aligns) <- try ( pipeBreak >>= \als ->
- return (return $ replicate (length als) mempty, als))
- <|> ( pipeTableRow >>= \row -> pipeBreak >>= \als ->
-
- return (row, als) )
- lines' <- sequence <$> many1 pipeTableRow
+ (heads,aligns) <- (,) <$> pipeTableRow <*> pipeBreak
+ lines' <- many pipeTableRow
let widths = replicate (length aligns) 0.0
- return $ (aligns, widths, heads, lines')
+ return (aligns, widths, heads, lines')
sepPipe :: MarkdownParser ()
sepPipe = try $ do
@@ -1304,7 +1322,7 @@ sepPipe = try $ do
notFollowedBy blankline
-- parse a row, also returning probable alignments for org-table cells
-pipeTableRow :: MarkdownParser (F [Blocks])
+pipeTableRow :: MarkdownParser [Blocks]
pipeTableRow = do
nonindentSpaces
openPipe <- (True <$ char '|') <|> return False
@@ -1316,16 +1334,14 @@ pipeTableRow = do
guard $ not (null rest && not openPipe)
optional (char '|')
blankline
- let cells = sequence (first:rest)
- return $ do
- cells' <- cells
- return $ map
- (\ils ->
+ let cells = first:rest
+ return $
+ map (\ils ->
case trimInlines ils of
ils' | B.isNull ils' -> mempty
- | otherwise -> B.plain $ ils') cells'
+ | otherwise -> B.plain ils') cells
-pipeTableHeaderPart :: Parser [Char] st Alignment
+pipeTableHeaderPart :: Monad m => ParserT [Char] st m Alignment
pipeTableHeaderPart = try $ do
skipMany spaceChar
left <- optionMaybe (char ':')
@@ -1340,7 +1356,7 @@ pipeTableHeaderPart = try $ do
(Just _,Just _) -> AlignCenter
-- Succeed only if current line contains a pipe.
-scanForPipe :: Parser [Char] st ()
+scanForPipe :: Monad m => ParserT [Char] st m ()
scanForPipe = do
inp <- getInput
case break (\c -> c == '\n' || c == '|') inp of
@@ -1350,22 +1366,22 @@ scanForPipe = do
-- | Parse a table using 'headerParser', 'rowParser',
-- 'lineParser', and 'footerParser'. Variant of the version in
-- Text.Pandoc.Parsing.
-tableWith :: MarkdownParser (F [Blocks], [Alignment], [Int])
- -> ([Int] -> MarkdownParser (F [Blocks]))
+tableWith :: MarkdownParser ([Blocks], [Alignment], [Int])
+ -> ([Int] -> MarkdownParser [Blocks])
-> MarkdownParser sep
-> MarkdownParser end
- -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]])
+ -> MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]])
tableWith headerParser rowParser lineParser footerParser = try $ do
(heads, aligns, indices) <- headerParser
- lines' <- fmap sequence $ rowParser indices `sepEndBy1` lineParser
+ lines' <- rowParser indices `sepEndBy1` lineParser
footerParser
numColumns <- getOption readerColumns
- let widths = if (indices == [])
- then replicate (length aligns) 0.0
- else widthsFromIndices numColumns indices
- return $ (aligns, widths, heads, lines')
+ let widths = case indices of
+ [] -> replicate (length aligns) 0.0
+ _ -> widthsFromIndices numColumns indices
+ return (aligns, widths, heads, lines')
-table :: MarkdownParser (F Blocks)
+table :: MarkdownParser Blocks
table = try $ do
frontCaption <- option Nothing (Just <$> tableCaption)
(aligns, widths, heads, lns) <-
@@ -1380,19 +1396,15 @@ table = try $ do
(gridTable False <|> gridTable True)) <?> "table"
optional blanklines
caption <- case frontCaption of
- Nothing -> option (return mempty) tableCaption
+ Nothing -> option mempty tableCaption
Just c -> return c
- return $ do
- caption' <- caption
- heads' <- heads
- lns' <- lns
- return $ B.table caption' (zip aligns widths) heads' lns'
+ return $ B.table caption (zip aligns widths) heads lns
--
-- inline
--
-inline :: MarkdownParser (F Inlines)
+inline :: MarkdownParser Inlines
inline = choice [ whitespace
, bareURL
, str
@@ -1415,7 +1427,7 @@ inline = choice [ whitespace
, rawLaTeXInline'
, exampleRef
, smart
- , return . B.singleton <$> charRef
+ , B.singleton <$> charRef
, symbol
, ltSign
] <?> "inline"
@@ -1426,43 +1438,42 @@ escapedChar' = try $ do
(guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum))
<|> oneOf "\\`*_{}[]()>#+-.!~\""
-escapedChar :: MarkdownParser (F Inlines)
+escapedChar :: MarkdownParser Inlines
escapedChar = do
result <- escapedChar'
case result of
- ' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space
+ ' ' -> return $ B.str "\160" -- "\ " is a nonbreaking space
'\n' -> guardEnabled Ext_escaped_line_breaks >>
- return (return B.linebreak) -- "\[newline]" is a linebreak
- _ -> return $ return $ B.str [result]
+ return B.linebreak -- "\[newline]" is a linebreak
+ _ -> return $ B.str [result]
-ltSign :: MarkdownParser (F Inlines)
+ltSign :: MarkdownParser Inlines
ltSign = do
guardDisabled Ext_raw_html
<|> (notFollowedByHtmlCloser >> notFollowedBy' (htmlTag isBlockTag))
char '<'
- return $ return $ B.str "<"
+ return $ B.str "<"
-exampleRef :: MarkdownParser (F Inlines)
+exampleRef :: MarkdownParser Inlines
exampleRef = try $ do
guardEnabled Ext_example_lists
char '@'
lab <- many1 (alphaNum <|> oneOf "-_")
- return $ do
- st <- askF
- return $ case M.lookup lab (stateExamples st) of
- Just n -> B.str (show n)
- Nothing -> B.str ('@':lab)
+ st <- ask
+ return $ case M.lookup lab (stateExamples st) of
+ Just n -> B.str (show n)
+ Nothing -> B.str ('@':lab)
-symbol :: MarkdownParser (F Inlines)
+symbol :: MarkdownParser Inlines
symbol = do
result <- noneOf "<\\\n\t "
<|> try (do lookAhead $ char '\\'
notFollowedBy' (() <$ rawTeXBlock)
char '\\')
- return $ return $ B.str [result]
+ return $ B.str [result]
-- parses inline code, between n `s and n `s
-code :: MarkdownParser (F Inlines)
+code :: MarkdownParser Inlines
code = try $ do
starts <- many1 (char '`')
skipSpaces
@@ -1472,16 +1483,17 @@ code = try $ do
notFollowedBy (char '`')))
attr <- option ([],[],[]) (try $ guardEnabled Ext_inline_code_attributes >>
optional whitespace >> attributes)
- return $ return $ B.codeWith attr $ trim $ concat result
+ return $ B.codeWith attr $ trim $ concat result
-math :: MarkdownParser (F Inlines)
-math = (return . B.displayMath <$> (mathDisplay >>= applyMacros'))
- <|> (return . B.math <$> (mathInline >>= applyMacros'))
+math :: MarkdownParser Inlines
+math = (B.displayMath <$> (mathDisplay >>= applyMacros'))
+ <|> ((B.math <$> (mathInline >>= applyMacros')) <+?>
+ ((getOption readerSmart >>= guard) *> apostrophe <* notFollowedBy space))
-- Parses material enclosed in *s, **s, _s, or __s.
-- Designed to avoid backtracking.
enclosure :: Char
- -> MarkdownParser (F Inlines)
+ -> MarkdownParser Inlines
enclosure c = do
-- we can't start an enclosure with _ if after a string and
-- the intraword_underscores extension is enabled:
@@ -1489,13 +1501,13 @@ enclosure c = do
<|> guard (c == '*')
<|> (guard =<< notAfterString)
cs <- many1 (char c)
- (return (B.str cs) <>) <$> whitespace
- <|> do
+ (B.str cs <>) <$> whitespace
+ <|>
case length cs of
3 -> three c
2 -> two c mempty
1 -> one c mempty
- _ -> return (return $ B.str cs)
+ _ -> return $ B.str cs
ender :: Char -> Int -> MarkdownParser ()
ender c n = try $ do
@@ -1508,74 +1520,74 @@ ender c n = try $ do
-- If one c, emit emph and then parse two.
-- If two cs, emit strong and then parse one.
-- Otherwise, emit ccc then the results.
-three :: Char -> MarkdownParser (F Inlines)
+three :: Char -> MarkdownParser Inlines
three c = do
contents <- mconcat <$> many (notFollowedBy (ender c 1) >> inline)
- (ender c 3 >> return ((B.strong . B.emph) <$> contents))
- <|> (ender c 2 >> one c (B.strong <$> contents))
- <|> (ender c 1 >> two c (B.emph <$> contents))
- <|> return (return (B.str [c,c,c]) <> contents)
+ (ender c 3 >> return ((B.strong . B.emph) contents))
+ <|> (ender c 2 >> one c (B.strong contents))
+ <|> (ender c 1 >> two c (B.emph contents))
+ <|> return (B.str [c,c,c] <> contents)
-- Parse inlines til you hit two c's, and emit strong.
-- If you never do hit two cs, emit ** plus inlines parsed.
-two :: Char -> F Inlines -> MarkdownParser (F Inlines)
+two :: Char -> Inlines -> MarkdownParser Inlines
two c prefix' = do
contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline)
- (ender c 2 >> return (B.strong <$> (prefix' <> contents)))
- <|> return (return (B.str [c,c]) <> (prefix' <> contents))
+ (ender c 2 >> return (B.strong (prefix' <> contents)))
+ <|> return (B.str [c,c] <> (prefix' <> contents))
-- Parse inlines til you hit a c, and emit emph.
-- If you never hit a c, emit * plus inlines parsed.
-one :: Char -> F Inlines -> MarkdownParser (F Inlines)
+one :: Char -> Inlines -> MarkdownParser Inlines
one c prefix' = do
contents <- mconcat <$> many ( (notFollowedBy (ender c 1) >> inline)
<|> try (string [c,c] >>
notFollowedBy (ender c 1) >>
two c mempty) )
- (ender c 1 >> return (B.emph <$> (prefix' <> contents)))
- <|> return (return (B.str [c]) <> (prefix' <> contents))
+ (ender c 1 >> return (B.emph (prefix' <> contents)))
+ <|> return (B.str [c] <> (prefix' <> contents))
-strongOrEmph :: MarkdownParser (F Inlines)
+strongOrEmph :: MarkdownParser Inlines
strongOrEmph = enclosure '*' <|> enclosure '_'
--- | Parses a list of inlines between start and end delimiters.
+-- | Parses a list oInlines between start and end delimiters.
inlinesBetween :: (Show b)
=> MarkdownParser a
-> MarkdownParser b
- -> MarkdownParser (F Inlines)
+ -> MarkdownParser Inlines
inlinesBetween start end =
- (trimInlinesF . mconcat) <$> try (start >> many1Till inner end)
+ (trimInlines . mconcat) <$> try (start >> many1Till inner end)
where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
innerSpace = try $ whitespace <* notFollowedBy' end
-strikeout :: MarkdownParser (F Inlines)
-strikeout = fmap B.strikeout <$>
+strikeout :: MarkdownParser Inlines
+strikeout = B.strikeout <$>
(guardEnabled Ext_strikeout >> inlinesBetween strikeStart strikeEnd)
where strikeStart = string "~~" >> lookAhead nonspaceChar
>> notFollowedBy (char '~')
strikeEnd = try $ string "~~"
-superscript :: MarkdownParser (F Inlines)
-superscript = fmap B.superscript <$> try (do
+superscript :: MarkdownParser Inlines
+superscript = B.superscript <$> try (do
guardEnabled Ext_superscript
char '^'
mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '^'))
-subscript :: MarkdownParser (F Inlines)
-subscript = fmap B.subscript <$> try (do
+subscript :: MarkdownParser Inlines
+subscript = B.subscript <$> try (do
guardEnabled Ext_subscript
char '~'
mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '~'))
-whitespace :: MarkdownParser (F Inlines)
-whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
+whitespace :: MarkdownParser Inlines
+whitespace = spaceChar >> (lb <|> regsp) <?> "whitespace"
where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak)
regsp = skipMany spaceChar >> return B.space
-nonEndline :: Parser [Char] st Char
+nonEndline :: Monad m => ParserT [Char] st m Char
nonEndline = satisfy (/='\n')
-str :: MarkdownParser (F Inlines)
+str :: MarkdownParser Inlines
str = do
result <- many1 alphaNum
updateLastStrPos
@@ -1583,14 +1595,14 @@ str = do
isSmart <- getOption readerSmart
if isSmart
then case likelyAbbrev result of
- [] -> return $ return $ B.str result
+ [] -> return $ B.str result
xs -> choice (map (\x ->
try (string x >> oneOf " \n" >>
lookAhead alphaNum >>
- return (return $ B.str
- $ result ++ spacesToNbr x ++ "\160"))) xs)
- <|> (return $ return $ B.str result)
- else return $ return $ B.str result
+ return (B.str $
+ result ++ spacesToNbr x ++ "\160"))) xs)
+ <|> (return $ B.str result)
+ else return $ B.str result
-- | if the string matches the beginning of an abbreviation (before
-- the first period, return strings that would finish the abbreviation.
@@ -1605,13 +1617,12 @@ likelyAbbrev x =
in map snd $ filter (\(y,_) -> y == x) abbrPairs
-- an endline character that can be treated as a space, not a structural break
-endline :: MarkdownParser (F Inlines)
+endline :: MarkdownParser Inlines
endline = try $ do
newline
notFollowedBy blankline
-- parse potential list-starts differently if in a list:
- st <- getState
- when (stateParserContext st == ListItemState) $ notFollowedBy listStart
+ notFollowedBy (inList >> listStart)
guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart
guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart
guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header
@@ -1619,18 +1630,18 @@ endline = try $ do
notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced))
notFollowedByHtmlCloser
(eof >> return mempty)
- <|> (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak))
+ <|> (guardEnabled Ext_hard_line_breaks >> return B.linebreak)
<|> (guardEnabled Ext_ignore_line_breaks >> return mempty)
- <|> (return $ return B.space)
+ <|> return B.space
--
-- links
--
-- a reference label for a link
-reference :: MarkdownParser (F Inlines, String)
+reference :: MarkdownParser (Inlines, String)
reference = do notFollowedBy' (string "[^") -- footnote reference
- withRaw $ trimInlinesF <$> inlinesInBalancedBrackets
+ withRaw $ trimInlines <$> inlinesInBalancedBrackets
parenthesizedChars :: MarkdownParser [Char]
parenthesizedChars = do
@@ -1658,7 +1669,7 @@ source = do
linkTitle :: MarkdownParser String
linkTitle = quotedTitle '"' <|> quotedTitle '\''
-link :: MarkdownParser (F Inlines)
+link :: MarkdownParser Inlines
link = try $ do
st <- getState
guard $ stateAllowLinks st
@@ -1668,43 +1679,43 @@ link = try $ do
regLink B.link lab <|> referenceLink B.link (lab,raw)
regLink :: (String -> String -> Inlines -> Inlines)
- -> F Inlines -> MarkdownParser (F Inlines)
+ -> Inlines -> MarkdownParser Inlines
regLink constructor lab = try $ do
(src, tit) <- source
- return $ constructor src tit <$> lab
+ return $ constructor src tit lab
-- a link like [this][ref] or [this][] or [this]
referenceLink :: (String -> String -> Inlines -> Inlines)
- -> (F Inlines, String) -> MarkdownParser (F Inlines)
+ -> (Inlines, String) -> MarkdownParser Inlines
referenceLink constructor (lab, raw) = do
sp <- (True <$ lookAhead (char ' ')) <|> return False
- (ref,raw') <- try
- (skipSpaces >> optional (newline >> skipSpaces) >> reference)
- <|> return (mempty, "")
+ (ref,raw') <- option (mempty, "") $
+ lookAhead (try (spnl >> normalCite >> return (mempty, "")))
+ <|>
+ try (spnl >> reference)
+ when (raw' == "") $ guardEnabled Ext_shortcut_reference_links
let labIsRef = raw' == "" || raw' == "[]"
let key = toKey $ if labIsRef then raw else raw'
parsedRaw <- parseFromString (mconcat <$> many inline) raw'
fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw
implicitHeaderRefs <- option False $
True <$ guardEnabled Ext_implicit_header_references
- let makeFallback = do
- parsedRaw' <- parsedRaw
- fallback' <- fallback
- return $ B.str "[" <> fallback' <> B.str "]" <>
+ let makeFallback =
+ B.str "[" <> fallback <> B.str "]" <>
(if sp && not (null raw) then B.space else mempty) <>
- parsedRaw'
- return $ do
- keys <- asksF stateKeys
- case M.lookup key keys of
- Nothing -> do
- headers <- asksF stateHeaders
- ref' <- if labIsRef then lab else ref
- if implicitHeaderRefs
- then case M.lookup ref' headers of
- Just ident -> constructor ('#':ident) "" <$> lab
- Nothing -> makeFallback
- else makeFallback
- Just (src,tit) -> constructor src tit <$> lab
+ parsedRaw
+ keys <- asks stateKeys
+ headers <- asks stateHeaders
+ return $
+ case M.lookup key keys of
+ Nothing ->
+ let ref' = if labIsRef then lab else ref in
+ if implicitHeaderRefs
+ then case M.lookup ref' headers of
+ Just ident -> constructor ('#':ident) "" lab
+ Nothing -> makeFallback
+ else makeFallback
+ Just (src,tit) -> constructor src tit lab
dropBrackets :: String -> String
dropBrackets = reverse . dropRB . reverse . dropLB
@@ -1713,14 +1724,14 @@ dropBrackets = reverse . dropRB . reverse . dropLB
dropLB ('[':xs) = xs
dropLB xs = xs
-bareURL :: MarkdownParser (F Inlines)
+bareURL :: MarkdownParser Inlines
bareURL = try $ do
guardEnabled Ext_autolink_bare_uris
(orig, src) <- uri <|> emailAddress
notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a")
- return $ return $ B.link src "" (B.str orig)
+ return $ B.link src "" (B.str orig)
-autoLink :: MarkdownParser (F Inlines)
+autoLink :: MarkdownParser Inlines
autoLink = try $ do
char '<'
(orig, src) <- uri <|> emailAddress
@@ -1729,9 +1740,9 @@ autoLink = try $ do
-- final punctuation. for example: in `<http://hi---there>`,
-- the URI parser will stop before the dashes.
extra <- fromEntities <$> manyTill nonspaceChar (char '>')
- return $ return $ B.link (src ++ escapeURI extra) "" (B.str $ orig ++ extra)
+ return $ B.link (src ++ escapeURI extra) "" (B.str $ orig ++ extra)
-image :: MarkdownParser (F Inlines)
+image :: MarkdownParser Inlines
image = try $ do
char '!'
(lab,raw) <- reference
@@ -1741,38 +1752,33 @@ image = try $ do
_ -> B.image src
regLink constructor lab <|> referenceLink constructor (lab,raw)
-note :: MarkdownParser (F Inlines)
+note :: MarkdownParser Inlines
note = try $ do
guardEnabled Ext_footnotes
+ (stateInFootnote <$> getState) >>= guard . not
ref <- noteMarker
- return $ do
- notes <- asksF stateNotes'
+ notes <- asks stateNotes'
+ return $
case lookup ref notes of
- Nothing -> return $ B.str $ "[^" ++ ref ++ "]"
- Just contents -> do
- st <- askF
- -- process the note in a context that doesn't resolve
- -- notes, to avoid infinite looping with notes inside
- -- notes:
- let contents' = runF contents st{ stateNotes' = [] }
- return $ B.note contents'
-
-inlineNote :: MarkdownParser (F Inlines)
+ Nothing -> B.str $ "[^" ++ ref ++ "]"
+ Just contents -> B.note contents
+
+inlineNote :: MarkdownParser Inlines
inlineNote = try $ do
guardEnabled Ext_inline_notes
char '^'
contents <- inlinesInBalancedBrackets
- return $ B.note . B.para <$> contents
+ return . B.note . B.para $ contents
-rawLaTeXInline' :: MarkdownParser (F Inlines)
+rawLaTeXInline' :: MarkdownParser Inlines
rawLaTeXInline' = try $ do
guardEnabled Ext_raw_tex
lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env
- RawInline _ s <- rawLaTeXInline
- return $ return $ B.rawInline "tex" s
+ RawInline _ s <- generalize rawLaTeXInline
+ return $ B.rawInline "tex" s
-- "tex" because it might be context or latex
-rawConTeXtEnvironment :: Parser [Char] st String
+rawConTeXtEnvironment :: Monad m => ParserT [Char] st m String
rawConTeXtEnvironment = try $ do
string "\\start"
completion <- inBrackets (letter <|> digit <|> spaceChar)
@@ -1781,14 +1787,14 @@ rawConTeXtEnvironment = try $ do
(try $ string "\\stop" >> string completion)
return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion
-inBrackets :: (Parser [Char] st Char) -> Parser [Char] st String
+inBrackets :: Monad m => (ParserT [Char] st m Char) -> ParserT [Char] st m String
inBrackets parser = do
char '['
contents <- many parser
char ']'
return $ "[" ++ contents ++ "]"
-spanHtml :: MarkdownParser (F Inlines)
+spanHtml :: MarkdownParser Inlines
spanHtml = try $ do
guardEnabled Ext_native_spans
(TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" [])
@@ -1800,10 +1806,10 @@ spanHtml = try $ do
Just s | null ident && null classes &&
map toLower (filter (`notElem` " \t;") s) ==
"font-variant:small-caps"
- -> return $ B.smallcaps <$> contents
- _ -> return $ B.spanWith (ident, classes, keyvals) <$> contents
+ -> return $ B.smallcaps contents
+ _ -> return $ B.spanWith (ident, classes, keyvals) contents
-divHtml :: MarkdownParser (F Blocks)
+divHtml :: MarkdownParser Blocks
divHtml = try $ do
guardEnabled Ext_native_divs
(TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" [])
@@ -1821,11 +1827,11 @@ divHtml = try $ do
let ident = fromMaybe "" $ lookup "id" attrs
let classes = maybe [] words $ lookup "class" attrs
let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
- return $ B.divWith (ident, classes, keyvals) <$> contents
+ return $ B.divWith (ident, classes, keyvals) contents
else -- avoid backtracing
- return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents
+ return $ B.rawBlock "html" (rawtag <> bls) <> contents
-rawHtmlInline :: MarkdownParser (F Inlines)
+rawHtmlInline :: MarkdownParser Inlines
rawHtmlInline = do
guardEnabled Ext_raw_html
inHtmlBlock <- stateInHtmlBlock <$> getState
@@ -1840,19 +1846,17 @@ rawHtmlInline = do
then (\x -> isInlineTag x &&
not (isCloseBlockTag x))
else not . isTextTag
- return $ return $ B.rawInline "html" result
+ return $ B.rawInline "html" result
-- Citations
-cite :: MarkdownParser (F Inlines)
+cite :: MarkdownParser Inlines
cite = do
guardEnabled Ext_citations
- citations <- textualCite
- <|> do (cs, raw) <- withRaw normalCite
- return $ (flip B.cite (B.text raw)) <$> cs
- return citations
+ textualCite <|> do (cs, raw) <- withRaw normalCite
+ return $ B.cite cs (B.text raw)
-textualCite :: MarkdownParser (F Inlines)
+textualCite :: MarkdownParser Inlines
textualCite = try $ do
(_, key) <- citeKey
let first = Citation{ citationId = key
@@ -1866,29 +1870,26 @@ textualCite = try $ do
case mbrest of
Just (rest, raw) ->
return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw) . (first:))
- <$> rest
+ rest
Nothing ->
(do (cs, raw) <- withRaw $ bareloc first
- return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw)) <$> cs)
- <|> return (do st <- askF
- return $ case M.lookup key (stateExamples st) of
- Just n -> B.str (show n)
- _ -> B.cite [first] $ B.str $ '@':key)
+ return $ B.cite cs (B.text $ '@':key ++ " " ++ raw))
+ <|> do st <- ask
+ return $ case M.lookup key (stateExamples st) of
+ Just n -> B.str (show n)
+ _ -> B.cite [first] $ B.str $ '@':key
-bareloc :: Citation -> MarkdownParser (F [Citation])
+bareloc :: Citation -> MarkdownParser [Citation]
bareloc c = try $ do
spnl
char '['
suff <- suffix
- rest <- option (return []) $ try $ char ';' >> citeList
+ rest <- option [] $ try $ char ';' >> citeList
spnl
char ']'
- return $ do
- suff' <- suff
- rest' <- rest
- return $ c{ citationSuffix = B.toList suff' } : rest'
+ return $ c{ citationSuffix = B.toList suff } : rest
-normalCite :: MarkdownParser (F [Citation])
+normalCite :: MarkdownParser [Citation]
normalCite = try $ do
char '['
spnl
@@ -1897,60 +1898,57 @@ normalCite = try $ do
char ']'
return citations
-suffix :: MarkdownParser (F Inlines)
+suffix :: MarkdownParser Inlines
suffix = try $ do
hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
spnl
- rest <- trimInlinesF . mconcat <$> many (notFollowedBy (oneOf ";]") >> inline)
+ rest <- trimInlines . mconcat <$> many (notFollowedBy (oneOf ";]") >> inline)
return $ if hasSpace
- then (B.space <>) <$> rest
+ then B.space <> rest
else rest
-prefix :: MarkdownParser (F Inlines)
-prefix = trimInlinesF . mconcat <$>
+prefix :: MarkdownParser Inlines
+prefix = trimInlines . mconcat <$>
manyTill inline (char ']' <|> liftM (const ']') (lookAhead citeKey))
-citeList :: MarkdownParser (F [Citation])
-citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl)
+citeList :: MarkdownParser [Citation]
+citeList = sepBy1 citation (try $ char ';' >> spnl)
-citation :: MarkdownParser (F Citation)
+citation :: MarkdownParser Citation
citation = try $ do
pref <- prefix
(suppress_author, key) <- citeKey
suff <- suffix
- return $ do
- x <- pref
- y <- suff
- return $ Citation{ citationId = key
- , citationPrefix = B.toList x
- , citationSuffix = B.toList y
- , citationMode = if suppress_author
- then SuppressAuthor
- else NormalCitation
- , citationNoteNum = 0
- , citationHash = 0
- }
-
-smart :: MarkdownParser (F Inlines)
+ return Citation{ citationId = key
+ , citationPrefix = B.toList pref
+ , citationSuffix = B.toList suff
+ , citationMode = if suppress_author
+ then SuppressAuthor
+ else NormalCitation
+ , citationNoteNum = 0
+ , citationHash = 0
+ }
+
+smart :: MarkdownParser Inlines
smart = do
getOption readerSmart >>= guard
doubleQuoted <|> singleQuoted <|>
- choice (map (return <$>) [apostrophe, dash, ellipses])
+ choice [apostrophe, dash, ellipses]
-singleQuoted :: MarkdownParser (F Inlines)
+singleQuoted :: MarkdownParser Inlines
singleQuoted = try $ do
singleQuoteStart
withQuoteContext InSingleQuote $
- fmap B.singleQuoted . trimInlinesF . mconcat <$>
+ B.singleQuoted . trimInlines . mconcat <$>
many1Till inline singleQuoteEnd
-- doubleQuoted will handle regular double-quoted sections, as well
-- as dialogues with an open double-quote without a close double-quote
-- in the same paragraph.
-doubleQuoted :: MarkdownParser (F Inlines)
+doubleQuoted :: MarkdownParser Inlines
doubleQuoted = try $ do
doubleQuoteStart
contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
- (withQuoteContext InDoubleQuote $ doubleQuoteEnd >> return
- (fmap B.doubleQuoted . trimInlinesF $ contents))
- <|> (return $ return (B.str "\8220") <> contents)
+ (withQuoteContext InDoubleQuote doubleQuoteEnd >> return
+ (B.doubleQuoted . trimInlines $ contents))
+ <|> return (B.str "\8220" <> contents)
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index e43b8a86c..939d10fb2 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -58,21 +58,21 @@ import Data.Maybe (fromMaybe)
import Text.Printf (printf)
import Debug.Trace (trace)
+import Text.Pandoc.Error
+
-- | Read mediawiki from an input string and return a Pandoc document.
readMediaWiki :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Pandoc
+ -> Either PandocError Pandoc
readMediaWiki opts s =
- case runParser parseMediaWiki MWState{ mwOptions = opts
+ readWith parseMediaWiki MWState{ mwOptions = opts
, mwMaxNestingLevel = 4
, mwNextLinkNumber = 1
, mwCategoryLinks = []
, mwHeaderMap = M.empty
, mwIdentifierList = []
}
- "source" (s ++ "\n") of
- Left err' -> error $ "\nError:\n" ++ show err'
- Right result -> result
+ (s ++ "\n")
data MWState = MWState { mwOptions :: ReaderOptions
, mwMaxNestingLevel :: Int
@@ -593,11 +593,17 @@ imageOption =
<|> try (many1 (oneOf "x0123456789") <* string "px")
<|> try (oneOfStrings ["link=","alt=","page=","class="] <* many (noneOf "|]"))
+collapseUnderscores :: String -> String
+collapseUnderscores [] = []
+collapseUnderscores ('_':'_':xs) = collapseUnderscores ('_':xs)
+collapseUnderscores (x:xs) = x : collapseUnderscores xs
+
+addUnderscores :: String -> String
+addUnderscores = collapseUnderscores . intercalate "_" . words
+
internalLink :: MWParser Inlines
internalLink = try $ do
sym "[["
- let addUnderscores x = let (pref,suff) = break (=='#') x
- in pref ++ intercalate "_" (words suff)
pagename <- unwords . words <$> many (noneOf "|]")
label <- option (B.text pagename) $ char '|' *>
( (mconcat <$> many1 (notFollowedBy (char ']') *> inline))
diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs
index f4dfa62c1..fc6b3362a 100644
--- a/src/Text/Pandoc/Readers/Native.hs
+++ b/src/Text/Pandoc/Readers/Native.hs
@@ -3,7 +3,7 @@ Copyright (C) 2011-2014 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
+the Free Software Foundation; Either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
@@ -33,6 +33,9 @@ module Text.Pandoc.Readers.Native ( readNative ) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared (safeRead)
+import Text.Pandoc.Error
+import Control.Applicative
+
-- | Read native formatted text and return a Pandoc document.
-- The input may be a full pandoc document, a block list, a block,
-- an inline list, or an inline. Thus, for example,
@@ -44,33 +47,18 @@ import Text.Pandoc.Shared (safeRead)
-- > Pandoc nullMeta [Plain [Str "hi"]]
--
readNative :: String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Pandoc
-readNative s =
- case safeRead s of
- Just d -> d
- Nothing -> Pandoc nullMeta $ readBlocks s
+ -> Either PandocError Pandoc
+readNative s = maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s)
-readBlocks :: String -> [Block]
-readBlocks s =
- case safeRead s of
- Just d -> d
- Nothing -> [readBlock s]
+readBlocks :: String -> Either PandocError [Block]
+readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s)
-readBlock :: String -> Block
-readBlock s =
- case safeRead s of
- Just d -> d
- Nothing -> Plain $ readInlines s
+readBlock :: String -> Either PandocError Block
+readBlock s = maybe (Plain <$> readInlines s) Right (safeRead s)
-readInlines :: String -> [Inline]
-readInlines s =
- case safeRead s of
- Just d -> d
- Nothing -> [readInline s]
+readInlines :: String -> Either PandocError [Inline]
+readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead s)
-readInline :: String -> Inline
-readInline s =
- case safeRead s of
- Just d -> d
- Nothing -> error "Cannot parse document"
+readInline :: String -> Either PandocError Inline
+readInline s = maybe (Left . ParseFailure $ "Could not read: " ++ s) Right (safeRead s)
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index 35d01e877..19ddba36b 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE FlexibleContexts #-}
module Text.Pandoc.Readers.OPML ( readOPML ) where
import Data.Char (toUpper)
import Text.Pandoc.Options
@@ -11,8 +12,11 @@ import Data.Generics
import Data.Monoid
import Control.Monad.State
import Control.Applicative ((<$>), (<$))
+import Data.Default
+import Text.Pandoc.Compat.Except
+import Text.Pandoc.Error
-type OPML = State OPMLState
+type OPML = ExceptT PandocError (State OPMLState)
data OPMLState = OPMLState{
opmlSectionLevel :: Int
@@ -21,17 +25,19 @@ data OPMLState = OPMLState{
, opmlDocDate :: Inlines
} deriving Show
-readOPML :: ReaderOptions -> String -> Pandoc
+instance Default OPMLState where
+ def = OPMLState{ opmlSectionLevel = 0
+ , opmlDocTitle = mempty
+ , opmlDocAuthors = []
+ , opmlDocDate = mempty
+ }
+
+readOPML :: ReaderOptions -> String -> Either PandocError Pandoc
readOPML _ inp = setTitle (opmlDocTitle st')
- $ setAuthors (opmlDocAuthors st')
- $ setDate (opmlDocDate st')
- $ doc $ mconcat bs
- where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp)
- OPMLState{ opmlSectionLevel = 0
- , opmlDocTitle = mempty
- , opmlDocAuthors = []
- , opmlDocDate = mempty
- }
+ . setAuthors (opmlDocAuthors st')
+ . setDate (opmlDocDate st')
+ . doc . mconcat <$> bs
+ where (bs, st') = flip runState def . runExceptT $ (mapM parseBlock $ normalizeTree $ parseXML inp)
-- normalize input, consolidating adjacent Text and CRef elements
normalizeTree :: [Content] -> [Content]
@@ -58,14 +64,16 @@ attrValue attr elt =
Just z -> z
Nothing -> ""
-asHtml :: String -> Inlines
-asHtml s = case readHtml def s of
- Pandoc _ [Plain ils] -> fromList ils
- _ -> mempty
+exceptT :: Either PandocError a -> OPML a
+exceptT = either throwError return
+
+asHtml :: String -> OPML Inlines
+asHtml s = (\(Pandoc _ bs) -> case bs of
+ [Plain ils] -> fromList ils
+ _ -> mempty) <$> exceptT (readHtml def s)
-asMarkdown :: String -> Blocks
-asMarkdown s = fromList bs
- where Pandoc _ bs = readMarkdown def s
+asMarkdown :: String -> OPML Blocks
+asMarkdown s = (\(Pandoc _ bs) -> fromList bs) <$> exceptT (readMarkdown def s)
getBlocks :: Element -> OPML Blocks
getBlocks e = mconcat <$> (mapM parseBlock $ elContent e)
@@ -82,8 +90,8 @@ parseBlock (Elem e) =
"outline" -> gets opmlSectionLevel >>= sect . (+1)
"?xml" -> return mempty
_ -> getBlocks e
- where sect n = do let headerText = asHtml $ attrValue "text" e
- let noteBlocks = asMarkdown $ attrValue "_note" e
+ where sect n = do headerText <- asHtml $ attrValue "text" e
+ noteBlocks <- asMarkdown $ attrValue "_note" e
modify $ \st -> st{ opmlSectionLevel = n }
bs <- getBlocks e
modify $ \st -> st{ opmlSectionLevel = n - 1 }
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 62421d2fb..fc63cc11e 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -1,5 +1,9 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
{-
Copyright (C) 2014 Albert Krewinkel <tarleb@moltkeplatz.de>
@@ -35,47 +39,85 @@ import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..), (<>)
import Text.Pandoc.Definition
import Text.Pandoc.Options
import qualified Text.Pandoc.Parsing as P
-import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
- , newline, orderedListMarker
- , parseFromString
+import Text.Pandoc.Parsing hiding ( newline, orderedListMarker
+ , parseFromString, blanklines
)
import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
import Text.Pandoc.Shared (compactify', compactify'DL)
import Text.TeXMath (readTeX, writePandoc, DisplayType(..))
+import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
-import Control.Applicative ( Applicative, pure
+import Control.Applicative ( pure
, (<$>), (<$), (<*>), (<*), (*>) )
import Control.Arrow (first)
-import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when)
-import Control.Monad.Reader (Reader, runReader, ask, asks)
+import Control.Monad (guard, mplus, mzero, when)
+import Control.Monad.Reader (Reader, runReader, asks, local)
import Data.Char (isAlphaNum, toLower)
import Data.Default
-import Data.List (intersperse, isPrefixOf, isSuffixOf)
+import Data.List (intersperse, isPrefixOf, isSuffixOf, foldl')
import qualified Data.Map as M
import Data.Maybe (fromMaybe, isJust)
-import Data.Monoid (Monoid, mconcat, mempty, mappend)
+import Data.Monoid (mconcat, mempty, mappend)
import Network.HTTP (urlEncode)
+import Text.Pandoc.Error
+
-- | Parse org-mode string and return a Pandoc document.
readOrg :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Pandoc
-readOrg opts s = readWith parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
+ -> Either PandocError Pandoc
+readOrg opts s = runOrg opts s parseOrg
+
+data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext
+ , finalState :: OrgParserState }
-type OrgParser = Parser [Char] OrgParserState
+type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
+
+runOrg :: ReaderOptions -> String -> OrgParser a -> Either PandocError a
+runOrg opts inp p = fst <$> res
+ where
+ imd = readWithM (returnState p) def{ orgStateOptions = opts } (inp ++ "\n\n")
+ res = runReader imd def { finalState = s }
+ s :: OrgParserState
+ s = either def snd res
parseOrg :: OrgParser Pandoc
parseOrg = do
blocks' <- parseBlocks
st <- getState
- let meta = runF (orgStateMeta' st) st
- return $ Pandoc meta $ filter (/= Null) (B.toList $ runF blocks' st)
+ let meta = orgStateMeta st
+ let removeUnwantedBlocks = dropCommentTrees . filter (/= Null)
+ return $ Pandoc meta $ removeUnwantedBlocks (B.toList $ blocks')
+
+-- | Drop COMMENT headers and the document tree below those headers.
+dropCommentTrees :: [Block] -> [Block]
+dropCommentTrees [] = []
+dropCommentTrees blks@(b:bs) =
+ maybe blks (flip dropUntilHeaderAboveLevel bs) $ commentHeaderLevel b
+
+-- | Return the level of a header starting a comment tree and Nothing
+-- otherwise.
+commentHeaderLevel :: Block -> Maybe Int
+commentHeaderLevel blk =
+ case blk of
+ (Header level _ ((Str "COMMENT"):_)) -> Just level
+ _ -> Nothing
+
+-- | Drop blocks until a header on or above the given level is seen
+dropUntilHeaderAboveLevel :: Int -> [Block] -> [Block]
+dropUntilHeaderAboveLevel n = dropWhile (not . isHeaderLevelLowerEq n)
+
+isHeaderLevelLowerEq :: Int -> Block -> Bool
+isHeaderLevelLowerEq n blk =
+ case blk of
+ (Header level _ _) -> n >= level
+ _ -> False
--
-- Parser State for Org
--
-type OrgNoteRecord = (String, F Blocks)
+type OrgNoteRecord = (String, Blocks)
type OrgNoteTable = [OrgNoteRecord]
type OrgBlockAttributes = M.Map String String
@@ -94,10 +136,12 @@ data OrgParserState = OrgParserState
, orgStateLastStrPos :: Maybe SourcePos
, orgStateLinkFormatters :: OrgLinkFormatters
, orgStateMeta :: Meta
- , orgStateMeta' :: F Meta
, orgStateNotes' :: OrgNoteTable
}
+instance Default OrgParserLocal where
+ def = OrgParserLocal NoQuote def
+
instance HasReaderOptions OrgParserState where
extractReaderOptions = orgStateOptions
@@ -111,6 +155,10 @@ instance HasLastStrPosition OrgParserState where
getLastStrPos = orgStateLastStrPos
setLastStrPos pos st = st{ orgStateLastStrPos = Just pos }
+instance HasQuoteContext st (Reader OrgParserLocal) where
+ getQuoteContext = asks orgLocalQuoteContext
+ withQuoteContext q = local (\s -> s{orgLocalQuoteContext = q})
+
instance Default OrgParserState where
def = defaultOrgParserState
@@ -126,13 +174,13 @@ defaultOrgParserState = OrgParserState
, orgStateLastStrPos = Nothing
, orgStateLinkFormatters = M.empty
, orgStateMeta = nullMeta
- , orgStateMeta' = return nullMeta
, orgStateNotes' = []
}
recordAnchorId :: String -> OrgParser ()
recordAnchorId i = updateState $ \s ->
- s{ orgStateAnchorIds = i : (orgStateAnchorIds s) }
+ let as = orgStateAnchorIds s in
+ s{ orgStateAnchorIds = i : as }
addBlockAttribute :: String -> String -> OrgParser ()
addBlockAttribute key val = updateState $ \s ->
@@ -211,30 +259,6 @@ parseFromString parser str' = do
-- Adaptions and specializations of parsing utilities
--
-newtype F a = F { unF :: Reader OrgParserState a
- } deriving (Monad, Applicative, Functor)
-
-runF :: F a -> OrgParserState -> a
-runF = runReader . unF
-
-askF :: F OrgParserState
-askF = F ask
-
-asksF :: (OrgParserState -> a) -> F a
-asksF f = F $ asks f
-
-instance Monoid a => Monoid (F a) where
- mempty = return mempty
- mappend = liftM2 mappend
- mconcat = fmap mconcat . sequence
-
-trimInlinesF :: F Inlines -> F Inlines
-trimInlinesF = liftM trimInlines
-
-returnF :: a -> OrgParser (F a)
-returnF = return . return
-
-
-- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
newline :: OrgParser Char
newline =
@@ -242,14 +266,21 @@ newline =
<* updateLastPreCharPos
<* updateLastForbiddenCharPos
+-- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes.
+blanklines :: OrgParser [Char]
+blanklines =
+ P.blanklines
+ <* updateLastPreCharPos
+ <* updateLastForbiddenCharPos
+
--
-- parsing blocks
--
-parseBlocks :: OrgParser (F Blocks)
+parseBlocks :: OrgParser Blocks
parseBlocks = mconcat <$> manyTill block eof
-block :: OrgParser (F Blocks)
+block :: OrgParser Blocks
block = choice [ mempty <$ blanklines
, optionalAttributes $ choice
[ orgBlock
@@ -260,14 +291,14 @@ block = choice [ mempty <$ blanklines
, drawer
, specialLine
, header
- , return <$> hline
+ , hline
, list
, latexFragment
, noteBlock
, paraOrPlain
] <?> "block"
-optionalAttributes :: OrgParser (F Blocks) -> OrgParser (F Blocks)
+optionalAttributes :: OrgParser Blocks -> OrgParser Blocks
optionalAttributes parser = try $
resetBlockAttributes *> parseBlockAttributes *> parser
@@ -287,7 +318,7 @@ parseAndAddAttribute key value = do
let key' = map toLower key
() <$ addBlockAttribute key' value
-lookupInlinesAttr :: String -> OrgParser (Maybe (F Inlines))
+lookupInlinesAttr :: String -> OrgParser (Maybe Inlines)
lookupInlinesAttr attr = try $ do
val <- lookupBlockAttribute attr
maybe (return Nothing)
@@ -301,20 +332,20 @@ lookupInlinesAttr attr = try $ do
type BlockProperties = (Int, String) -- (Indentation, Block-Type)
-orgBlock :: OrgParser (F Blocks)
+orgBlock :: OrgParser Blocks
orgBlock = try $ do
blockProp@(_, blkType) <- blockHeaderStart
($ blockProp) $
case blkType of
"comment" -> withRaw' (const mempty)
- "html" -> withRaw' (return . (B.rawBlock blkType))
- "latex" -> withRaw' (return . (B.rawBlock blkType))
- "ascii" -> withRaw' (return . (B.rawBlock blkType))
- "example" -> withRaw' (return . exampleCode)
- "quote" -> withParsed (fmap B.blockQuote)
+ "html" -> withRaw' (B.rawBlock blkType)
+ "latex" -> withRaw' (B.rawBlock blkType)
+ "ascii" -> withRaw' (B.rawBlock blkType)
+ "example" -> withRaw' exampleCode
+ "quote" -> withParsed B.blockQuote
"verse" -> verseBlock
"src" -> codeBlock
- _ -> withParsed (fmap $ divWithClass blkType)
+ _ -> withParsed (divWithClass blkType)
blockHeaderStart :: OrgParser (Int, String)
blockHeaderStart = try $ (,) <$> indent <*> blockType
@@ -322,10 +353,10 @@ blockHeaderStart = try $ (,) <$> indent <*> blockType
indent = length <$> many spaceChar
blockType = map toLower <$> (stringAnyCase "#+begin_" *> orgArgWord)
-withRaw' :: (String -> F Blocks) -> BlockProperties -> OrgParser (F Blocks)
+withRaw' :: (String -> Blocks) -> BlockProperties -> OrgParser Blocks
withRaw' f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp))
-withParsed :: (F Blocks -> F Blocks) -> BlockProperties -> OrgParser (F Blocks)
+withParsed :: (Blocks -> Blocks) -> BlockProperties -> OrgParser Blocks
withParsed f blockProp = (ignHeaders *> (f <$> parsedBlockContent blockProp))
ignHeaders :: OrgParser ()
@@ -334,11 +365,11 @@ ignHeaders = (() <$ newline) <|> (() <$ anyLine)
divWithClass :: String -> Blocks -> Blocks
divWithClass cls = B.divWith ("", [cls], [])
-verseBlock :: BlockProperties -> OrgParser (F Blocks)
+verseBlock :: BlockProperties -> OrgParser Blocks
verseBlock blkProp = try $ do
ignHeaders
content <- rawBlockContent blkProp
- fmap B.para . mconcat . intersperse (pure B.linebreak)
+ B.para . mconcat . intersperse B.linebreak
<$> mapM (parseFromString parseInlines) (lines content)
exportsCode :: [(String, String)] -> Bool
@@ -355,7 +386,7 @@ followingResultsBlock =
*> blankline
*> (unlines <$> many1 exampleLine))
-codeBlock :: BlockProperties -> OrgParser (F Blocks)
+codeBlock :: BlockProperties -> OrgParser Blocks
codeBlock blkProp = do
skipSpaces
(classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders)
@@ -365,17 +396,15 @@ codeBlock blkProp = do
let includeCode = exportsCode kv
let includeResults = exportsResults kv
let codeBlck = B.codeBlockWith ( id', classes, kv ) content
- labelledBlck <- maybe (pure codeBlck)
- (labelDiv codeBlck)
+ labelledBlck <- maybe codeBlck (labelDiv codeBlck)
<$> lookupInlinesAttr "caption"
- let resultBlck = pure $ maybe mempty (exampleCode) resultsContent
+ let resultBlck = maybe mempty exampleCode resultsContent
return $ (if includeCode then labelledBlck else mempty)
<> (if includeResults then resultBlck else mempty)
where
labelDiv blk value =
- B.divWith nullAttr <$> (mappend <$> labelledBlock value
- <*> pure blk)
- labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
+ B.divWith nullAttr (labelledBlock value <> blk)
+ labelledBlock = B.plain . B.spanWith ("", ["label"], [])
rawBlockContent :: BlockProperties -> OrgParser String
rawBlockContent (indent, blockType) = try $
@@ -384,7 +413,7 @@ rawBlockContent (indent, blockType) = try $
indentedLine = try $ ("" <$ blankline) <|> (indentWith indent *> anyLine)
blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType)
-parsedBlockContent :: BlockProperties -> OrgParser (F Blocks)
+parsedBlockContent :: BlockProperties -> OrgParser Blocks
parsedBlockContent blkProps = try $ do
raw <- rawBlockContent blkProps
parseFromString parseBlocks (raw ++ "\n")
@@ -475,9 +504,9 @@ commaEscaped (',':cs@('*':_)) = cs
commaEscaped (',':cs@('#':'+':_)) = cs
commaEscaped cs = cs
-example :: OrgParser (F Blocks)
+example :: OrgParser Blocks
example = try $ do
- return . return . exampleCode =<< unlines <$> many1 exampleLine
+ return . exampleCode =<< unlines <$> many1 exampleLine
exampleCode :: String -> Blocks
exampleCode = B.codeBlockWith ("", ["example"], [])
@@ -486,7 +515,7 @@ exampleLine :: OrgParser String
exampleLine = try $ skipSpaces *> string ": " *> anyLine
-- Drawers for properties or a logbook
-drawer :: OrgParser (F Blocks)
+drawer :: OrgParser Blocks
drawer = try $ do
drawerStart
manyTill drawerLine (try drawerEnd)
@@ -512,14 +541,12 @@ drawerEnd = try $
--
-- Figures (Image on a line by itself, preceded by name and/or caption)
-figure :: OrgParser (F Blocks)
+figure :: OrgParser Blocks
figure = try $ do
(cap, nam) <- nameAndCaption
src <- skipSpaces *> selfTarget <* skipSpaces <* P.newline
guard (isImageFilename src)
- return $ do
- cap' <- cap
- return $ B.para $ B.image src nam cap'
+ return $ B.para $ B.image src nam cap
where
nameAndCaption =
do
@@ -535,8 +562,8 @@ figure = try $ do
--
-- Comments, Options and Metadata
-specialLine :: OrgParser (F Blocks)
-specialLine = fmap return . try $ metaLine <|> commentLine
+specialLine :: OrgParser Blocks
+specialLine = try $ metaLine <|> commentLine
metaLine :: OrgParser Blocks
metaLine = try $ mempty
@@ -556,14 +583,14 @@ commentLineStart = try $ mappend <$> many spaceChar <*> string "# "
declarationLine :: OrgParser ()
declarationLine = try $ do
key <- metaKey
- inlinesF <- metaInlines
+ inlines <- metaInlines
updateState $ \st ->
- let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta
- in st { orgStateMeta' = orgStateMeta' st <> meta' }
+ let meta' = B.setMeta key inlines nullMeta
+ in st { orgStateMeta = orgStateMeta st <> meta' }
return ()
-metaInlines :: OrgParser (F MetaValue)
-metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline
+metaInlines :: OrgParser MetaValue
+metaInlines = (MetaInlines . B.toList) <$> inlinesTillNewline
metaKey :: OrgParser String
metaKey = map toLower <$> many1 (noneOf ": \n\r")
@@ -604,11 +631,11 @@ parseFormat = try $ do
--
-- | Headers
-header :: OrgParser (F Blocks)
+header :: OrgParser Blocks
header = try $ do
level <- headerStart
title <- inlinesTillNewline
- return $ B.header level <$> title
+ return $ B.header level title
headerStart :: OrgParser Int
headerStart = try $
@@ -632,7 +659,7 @@ hline = try $ do
-- Tables
--
-data OrgTableRow = OrgContentRow (F [Blocks])
+data OrgTableRow = OrgContentRow [Blocks]
| OrgAlignRow [Alignment]
| OrgHlineRow
@@ -643,13 +670,13 @@ data OrgTable = OrgTable
, orgTableRows :: [[Blocks]]
}
-table :: OrgParser (F Blocks)
+table :: OrgParser Blocks
table = try $ do
lookAhead tableStart
do
rows <- tableRows
- cptn <- fromMaybe (pure "") <$> lookupInlinesAttr "caption"
- return $ (<$> cptn) . orgToPandocTable . normalizeTable =<< rowsToTable rows
+ (cptn :: Inlines) <- fromMaybe "" <$> lookupInlinesAttr "caption"
+ return $ ($ cptn) . orgToPandocTable . normalizeTable . rowsToTable $ rows
orgToPandocTable :: OrgTable
-> Inlines
@@ -665,11 +692,11 @@ tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow)
tableContentRow :: OrgParser OrgTableRow
tableContentRow = try $
- OrgContentRow . sequence <$> (tableStart *> manyTill tableContentCell newline)
+ OrgContentRow <$> (tableStart *> manyTill tableContentCell newline)
-tableContentCell :: OrgParser (F Blocks)
+tableContentCell :: OrgParser Blocks
tableContentCell = try $
- fmap B.plain . trimInlinesF . mconcat <$> many1Till inline endOfCell
+ B.plain . trimInlines . mconcat <$> many1Till inline endOfCell
endOfCell :: OrgParser Char
endOfCell = try $ char '|' <|> lookAhead newline
@@ -701,8 +728,8 @@ tableHline = try $
OrgHlineRow <$ (tableStart *> char '-' *> anyLine)
rowsToTable :: [OrgTableRow]
- -> F OrgTable
-rowsToTable = foldM (flip rowToContent) zeroTable
+ -> OrgTable
+rowsToTable = foldl' (flip rowToContent) zeroTable
where zeroTable = OrgTable 0 mempty mempty mempty
normalizeTable :: OrgTable
@@ -721,45 +748,43 @@ normalizeTable (OrgTable cols aligns heads lns) =
-- line as a header. All other horizontal lines are discarded.
rowToContent :: OrgTableRow
-> OrgTable
- -> F OrgTable
+ -> OrgTable
rowToContent OrgHlineRow t = maybeBodyToHeader t
-rowToContent (OrgAlignRow as) t = setLongestRow as =<< setAligns as t
-rowToContent (OrgContentRow rf) t = do
- rs <- rf
- setLongestRow rs =<< appendToBody rs t
+rowToContent (OrgAlignRow as) t = setLongestRow as . setAligns as $ t
+rowToContent (OrgContentRow rf) t = setLongestRow rf . appendToBody rf $ t
setLongestRow :: [a]
-> OrgTable
- -> F OrgTable
+ -> OrgTable
setLongestRow rs t =
- return t{ orgTableColumns = max (length rs) (orgTableColumns t) }
+ t{ orgTableColumns = max (length rs) (orgTableColumns t) }
maybeBodyToHeader :: OrgTable
- -> F OrgTable
+ -> OrgTable
maybeBodyToHeader t = case t of
OrgTable{ orgTableHeader = [], orgTableRows = b:[] } ->
- return t{ orgTableHeader = b , orgTableRows = [] }
- _ -> return t
+ t{ orgTableHeader = b , orgTableRows = [] }
+ _ -> t
appendToBody :: [Blocks]
-> OrgTable
- -> F OrgTable
-appendToBody r t = return t{ orgTableRows = orgTableRows t ++ [r] }
+ -> OrgTable
+appendToBody r t = t{ orgTableRows = orgTableRows t ++ [r] }
setAligns :: [Alignment]
-> OrgTable
- -> F OrgTable
-setAligns aligns t = return $ t{ orgTableAlignments = aligns }
+ -> OrgTable
+setAligns aligns t = t{ orgTableAlignments = aligns }
--
-- LaTeX fragments
--
-latexFragment :: OrgParser (F Blocks)
+latexFragment :: OrgParser Blocks
latexFragment = try $ do
envName <- latexEnvStart
content <- mconcat <$> manyTill anyLineNewline (latexEnd envName)
- return . return $ B.rawBlock "latex" (content `inLatexEnv` envName)
+ return $ B.rawBlock "latex" (content `inLatexEnv` envName)
where
c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n"
, c
@@ -789,7 +814,7 @@ latexEnvName = try $ do
--
-- Footnote defintions
--
-noteBlock :: OrgParser (F Blocks)
+noteBlock :: OrgParser Blocks
noteBlock = try $ do
ref <- noteMarker <* skipSpaces
content <- mconcat <$> blocksTillHeaderOrNote
@@ -801,35 +826,37 @@ noteBlock = try $ do
<|> () <$ lookAhead headerStart)
-- Paragraphs or Plain text
-paraOrPlain :: OrgParser (F Blocks)
+paraOrPlain :: OrgParser Blocks
paraOrPlain = try $ do
ils <- parseInlines
nl <- option False (newline >> return True)
try (guard nl >> notFollowedBy (orderedListStart <|> bulletListStart) >>
- return (B.para <$> ils))
- <|> (return (B.plain <$> ils))
+ (return $ B.para ils))
+ <|> (return $ B.plain ils)
-inlinesTillNewline :: OrgParser (F Inlines)
-inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline
+inlinesTillNewline :: OrgParser Inlines
+inlinesTillNewline = trimInlines . mconcat <$> manyTill inline newline
--
-- list blocks
--
-list :: OrgParser (F Blocks)
+list :: OrgParser Blocks
list = choice [ definitionList, bulletList, orderedList ] <?> "list"
-definitionList :: OrgParser (F Blocks)
-definitionList = fmap B.definitionList . fmap compactify'DL . sequence
- <$> many1 (definitionListItem bulletListStart)
+definitionList :: OrgParser Blocks
+definitionList = try $ do n <- lookAhead (bulletListStart' Nothing)
+ B.definitionList . compactify'DL
+ <$> many1 (definitionListItem $ bulletListStart' (Just n))
-bulletList :: OrgParser (F Blocks)
-bulletList = fmap B.bulletList . fmap compactify' . sequence
- <$> many1 (listItem bulletListStart)
+bulletList :: OrgParser Blocks
+bulletList = try $ do n <- lookAhead (bulletListStart' Nothing)
+ B.bulletList . compactify'
+ <$> many1 (listItem (bulletListStart' $ Just n))
-orderedList :: OrgParser (F Blocks)
-orderedList = fmap B.orderedList . fmap compactify' . sequence
+orderedList :: OrgParser Blocks
+orderedList = B.orderedList . compactify'
<$> many1 (listItem orderedListStart)
genericListStart :: OrgParser String
@@ -838,10 +865,27 @@ genericListStart listMarker = try $
(+) <$> (length <$> many spaceChar)
<*> (length <$> listMarker <* many1 spaceChar)
--- parses bullet list start and returns its length (excl. following whitespace)
+-- parses bullet list marker. maybe we know the indent level
bulletListStart :: OrgParser Int
-bulletListStart = genericListStart bulletListMarker
- where bulletListMarker = pure <$> oneOf "*-+"
+bulletListStart = bulletListStart' Nothing
+
+bulletListStart' :: Maybe Int -> OrgParser Int
+-- returns length of bulletList prefix, inclusive of marker
+bulletListStart' Nothing = do ind <- length <$> many spaceChar
+ when (ind == 0) $ notFollowedBy (char '*')
+ oneOf bullets
+ many1 spaceChar
+ return (ind + 1)
+ -- Unindented lists are legal, but they can't use '*' bullets
+ -- We return n to maintain compatibility with the generic listItem
+bulletListStart' (Just n) = do count (n-1) spaceChar
+ when (n == 1) $ notFollowedBy (char '*')
+ oneOf bullets
+ many1 spaceChar
+ return n
+
+bullets :: String
+bullets = "*+-"
orderedListStart :: OrgParser Int
orderedListStart = genericListStart orderedListMarker
@@ -849,21 +893,21 @@ orderedListStart = genericListStart orderedListMarker
where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)")
definitionListItem :: OrgParser Int
- -> OrgParser (F (Inlines, [Blocks]))
+ -> OrgParser (Inlines, [Blocks])
definitionListItem parseMarkerGetLength = try $ do
markerLength <- parseMarkerGetLength
term <- manyTill (noneOf "\n\r") (try $ string "::")
line1 <- anyLineNewline
blank <- option "" ("\n" <$ blankline)
cont <- concat <$> many (listContinuation markerLength)
- term' <- parseFromString inline term
+ term' <- parseFromString parseInlines term
contents' <- parseFromString parseBlocks $ line1 ++ blank ++ cont
- return $ (,) <$> term' <*> fmap (:[]) contents'
+ return (term', [contents'])
-- parse raw text for one list item, excluding start marker and continuations
listItem :: OrgParser Int
- -> OrgParser (F Blocks)
+ -> OrgParser Blocks
listItem start = try $ do
markerLength <- try start
firstLine <- anyLineNewline
@@ -889,7 +933,7 @@ anyLineNewline = (++ "\n") <$> anyLine
-- inline
--
-inline :: OrgParser (F Inlines)
+inline :: OrgParser Inlines
inline =
choice [ whitespace
, linebreak
@@ -911,35 +955,36 @@ inline =
, subscript
, superscript
, inlineLaTeX
+ , smart
, symbol
] <* (guard =<< newlinesCountWithinLimits)
<?> "inline"
-parseInlines :: OrgParser (F Inlines)
-parseInlines = trimInlinesF . mconcat <$> many1 inline
+parseInlines :: OrgParser Inlines
+parseInlines = trimInlines . mconcat <$> many1 inline
-- treat these as potentially non-text when parsing inline:
specialChars :: [Char]
-specialChars = "\"$'()*+-./:<=>[\\]^_{|}~"
+specialChars = "\"$'()*+-,./:<=>[\\]^_{|}~"
-whitespace :: OrgParser (F Inlines)
-whitespace = pure B.space <$ skipMany1 spaceChar
+whitespace :: OrgParser Inlines
+whitespace = B.space <$ skipMany1 spaceChar
<* updateLastPreCharPos
<* updateLastForbiddenCharPos
<?> "whitespace"
-linebreak :: OrgParser (F Inlines)
-linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline
+linebreak :: OrgParser Inlines
+linebreak = try $ B.linebreak <$ string "\\\\" <* skipSpaces <* newline
-str :: OrgParser (F Inlines)
-str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
+str :: OrgParser Inlines
+str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
<* updateLastStrPos
-- | An endline character that can be treated as a space, not a structural
-- break. This should reflect the values of the Emacs variable
-- @org-element-pagaraph-separate@.
-endline :: OrgParser (F Inlines)
+endline :: OrgParser Inlines
endline = try $ do
newline
notFollowedBy blankline
@@ -957,77 +1002,72 @@ endline = try $ do
decEmphasisNewlinesCount
guard =<< newlinesCountWithinLimits
updateLastPreCharPos
- return . return $ B.space
+ return $ B.space
-cite :: OrgParser (F Inlines)
+cite :: OrgParser Inlines
cite = try $ do
guardEnabled Ext_citations
(cs, raw) <- withRaw normalCite
- return $ (flip B.cite (B.text raw)) <$> cs
+ return $ flip B.cite (B.text raw) cs
-normalCite :: OrgParser (F [Citation])
+normalCite :: OrgParser [Citation]
normalCite = try $ char '['
*> skipSpaces
*> citeList
<* skipSpaces
<* char ']'
-citeList :: OrgParser (F [Citation])
-citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces)
+citeList :: OrgParser [Citation]
+citeList = sepBy1 citation (try $ char ';' *> skipSpaces)
-citation :: OrgParser (F Citation)
+citation :: OrgParser Citation
citation = try $ do
pref <- prefix
(suppress_author, key) <- citeKey
suff <- suffix
- return $ do
- x <- pref
- y <- suff
- return $ Citation{ citationId = key
- , citationPrefix = B.toList x
- , citationSuffix = B.toList y
- , citationMode = if suppress_author
- then SuppressAuthor
- else NormalCitation
- , citationNoteNum = 0
- , citationHash = 0
- }
+ return $ Citation{ citationId = key
+ , citationPrefix = B.toList pref
+ , citationSuffix = B.toList suff
+ , citationMode = if suppress_author
+ then SuppressAuthor
+ else NormalCitation
+ , citationNoteNum = 0
+ , citationHash = 0
+ }
where
- prefix = trimInlinesF . mconcat <$>
+ prefix = trimInlines . mconcat <$>
manyTill inline (char ']' <|> (']' <$ lookAhead citeKey))
suffix = try $ do
hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
skipSpaces
- rest <- trimInlinesF . mconcat <$>
+ rest <- trimInlines . mconcat <$>
many (notFollowedBy (oneOf ";]") *> inline)
- return $ if hasSpace
- then (B.space <>) <$> rest
- else rest
+ return $
+ if hasSpace
+ then B.space <> rest
+ else rest
-footnote :: OrgParser (F Inlines)
+footnote :: OrgParser Inlines
footnote = try $ inlineNote <|> referencedNote
-inlineNote :: OrgParser (F Inlines)
+inlineNote :: OrgParser Inlines
inlineNote = try $ do
string "[fn:"
ref <- many alphaNum
char ':'
- note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']')
+ note <- B.para . trimInlines . mconcat <$> many1Till inline (char ']')
when (not $ null ref) $
addToNotesTable ("fn:" ++ ref, note)
- return $ B.note <$> note
+ return $ B.note note
-referencedNote :: OrgParser (F Inlines)
+referencedNote :: OrgParser Inlines
referencedNote = try $ do
ref <- noteMarker
- return $ do
- notes <- asksF orgStateNotes'
+ notes <- asks (orgStateNotes' . finalState)
+ return $
case lookup ref notes of
- Nothing -> return $ B.str $ "[" ++ ref ++ "]"
- Just contents -> do
- st <- askF
- let contents' = runF contents st{ orgStateNotes' = [] }
- return $ B.note contents'
+ Just contents -> B.note contents
+ Nothing -> B.str $ "[" ++ ref ++ "]"
noteMarker :: OrgParser String
noteMarker = try $ do
@@ -1037,37 +1077,37 @@ noteMarker = try $ do
<*> many1Till (noneOf "\n\r\t ") (char ']')
]
-linkOrImage :: OrgParser (F Inlines)
+linkOrImage :: OrgParser Inlines
linkOrImage = explicitOrImageLink
<|> selflinkOrImage
<|> angleLink
<|> plainLink
<?> "link or image"
-explicitOrImageLink :: OrgParser (F Inlines)
+explicitOrImageLink :: OrgParser Inlines
explicitOrImageLink = try $ do
char '['
- srcF <- applyCustomLinkFormat =<< linkTarget
+ src <- applyCustomLinkFormat =<< possiblyEmptyLinkTarget
title <- enclosedRaw (char '[') (char ']')
title' <- parseFromString (mconcat <$> many inline) title
char ']'
- return $ do
- src <- srcF
- if isImageFilename src && isImageFilename title
- then pure $ B.link src "" $ B.image title mempty mempty
- else linkToInlinesF src =<< title'
+ alt <- internalLink src title'
+ return $
+ (if isImageFilename title
+ then B.link src "" $ B.image title mempty mempty
+ else fromMaybe alt (linkToInlines src title'))
-selflinkOrImage :: OrgParser (F Inlines)
+selflinkOrImage :: OrgParser Inlines
selflinkOrImage = try $ do
src <- char '[' *> linkTarget <* char ']'
- return $ linkToInlinesF src (B.str src)
+ return $ fromMaybe "" (linkToInlines src (B.str src))
-plainLink :: OrgParser (F Inlines)
+plainLink :: OrgParser Inlines
plainLink = try $ do
(orig, src) <- uri
- returnF $ B.link src "" (B.str orig)
+ return $ B.link src "" (B.str orig)
-angleLink :: OrgParser (F Inlines)
+angleLink :: OrgParser Inlines
angleLink = try $ do
char '<'
link <- plainLink
@@ -1080,34 +1120,53 @@ selfTarget = try $ char '[' *> linkTarget <* char ']'
linkTarget :: OrgParser String
linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]")
-applyCustomLinkFormat :: String -> OrgParser (F String)
+possiblyEmptyLinkTarget :: OrgParser String
+possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]")
+
+applyCustomLinkFormat :: String -> OrgParser String
applyCustomLinkFormat link = do
let (linkType, rest) = break (== ':') link
- return $ do
- formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters
- return $ maybe link ($ drop 1 rest) formatter
-
-
-linkToInlinesF :: String -> Inlines -> F Inlines
-linkToInlinesF s@('#':_) = pure . B.link s ""
-linkToInlinesF s
- | isImageFilename s = const . pure $ B.image s "" ""
- | isUri s = pure . B.link s ""
- | isRelativeUrl s = pure . B.link s ""
-linkToInlinesF s = \title -> do
- anchorB <- (s `elem`) <$> asksF orgStateAnchorIds
- if anchorB
- then pure $ B.link ('#':s) "" title
- else pure $ B.emph title
-
-isRelativeUrl :: String -> Bool
-isRelativeUrl s = (':' `notElem` s) && ("./" `isPrefixOf` s)
+ fmts <- asks finalState
+ return $
+ case M.lookup linkType (orgStateLinkFormatters fmts) of
+ Just v -> (v (drop 1 rest))
+ Nothing -> link
+
+-- TODO: might be a lot smarter/cleaner to use parsec and ADTs for this kind
+-- of parsing.
+linkToInlines :: String -> Inlines -> Maybe Inlines
+linkToInlines = \s ->
+ case s of
+ _ | null s -> Just . B.link "" ""
+ _ | isAnchor s -> Just . B.link s ""
+ _ | isImageFilename s -> const . Just $ B.image s "" ""
+ _ | isFileLink s -> Just . B.link (dropLinkType s) ""
+ _ | isUri s -> Just . B.link s ""
+ _ | isAbsoluteFilePath s -> Just . B.link ("file://" ++ s) ""
+ _ | isRelativeFilePath s -> Just . B.link s ""
+ _ -> const Nothing
+
+isAnchor :: String -> Bool
+isAnchor s = "#" `isPrefixOf` s
+
+isFileLink :: String -> Bool
+isFileLink s = ("file:" `isPrefixOf` s) && not ("file://" `isPrefixOf` s)
+
+dropLinkType :: String -> String
+dropLinkType = tail . snd . break (== ':')
+
+isRelativeFilePath :: String -> Bool
+isRelativeFilePath s = (("./" `isPrefixOf` s) || ("../" `isPrefixOf` s)) &&
+ (':' `notElem` s)
isUri :: String -> Bool
isUri s = let (scheme, path) = break (== ':') s
- in all (\c -> isAlphaNum c || c `elem` ".-") scheme
+ in all (\c -> isAlphaNum c || c `elem` (".-" :: String)) scheme
&& not (null path)
+isAbsoluteFilePath :: String -> Bool
+isAbsoluteFilePath = ('/' ==) . head
+
isImageFilename :: String -> Bool
isImageFilename filename =
any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions &&
@@ -1117,17 +1176,25 @@ isImageFilename filename =
imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
protocols = [ "file", "http", "https" ]
+internalLink :: String -> Inlines -> OrgParser Inlines
+internalLink link title = do
+ anchorB <- asks finalState
+ return $
+ if link `elem` (orgStateAnchorIds anchorB)
+ then B.link ('#':link) "" title
+ else B.emph title
+
-- | Parse an anchor like @<<anchor-id>>@ and return an empty span with
-- @anchor-id@ set as id. Legal anchors in org-mode are defined through
-- @org-target-regexp@, which is fairly liberal. Since no link is created if
-- @anchor-id@ contains spaces, we are more restrictive in what is accepted as
-- an anchor.
-anchor :: OrgParser (F Inlines)
+anchor :: OrgParser Inlines
anchor = try $ do
anchorId <- parseAnchor
recordAnchorId anchorId
- returnF $ B.spanWith (solidify anchorId, [], []) mempty
+ return $ B.spanWith (solidify anchorId, [], []) mempty
where
parseAnchor = string "<<"
*> many1 (noneOf "\t\n\r<>\"' ")
@@ -1141,11 +1208,11 @@ solidify :: String -> String
solidify = map replaceSpecialChar
where replaceSpecialChar c
| isAlphaNum c = c
- | c `elem` "_.-:" = c
+ | c `elem` ("_.-:" :: String) = c
| otherwise = '-'
-- | Parses an inline code block and marks it as an babel block.
-inlineCodeBlock :: OrgParser (F Inlines)
+inlineCodeBlock :: OrgParser Inlines
inlineCodeBlock = try $ do
string "src_"
lang <- many1 orgArgWordChar
@@ -1153,7 +1220,7 @@ inlineCodeBlock = try $ do
inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r")
let attrClasses = [translateLang lang, rundocBlockClass]
let attrKeyVal = map toRundocAttrib (("language", lang) : opts)
- returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
+ return $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
enclosedByPair :: Char -- ^ opening char
-> Char -- ^ closing char
@@ -1161,50 +1228,51 @@ enclosedByPair :: Char -- ^ opening char
-> OrgParser [a]
enclosedByPair s e p = char s *> many1Till p (char e)
-emph :: OrgParser (F Inlines)
-emph = fmap B.emph <$> emphasisBetween '/'
+emph :: OrgParser Inlines
+emph = B.emph <$> emphasisBetween '/'
-strong :: OrgParser (F Inlines)
-strong = fmap B.strong <$> emphasisBetween '*'
+strong :: OrgParser Inlines
+strong = B.strong <$> emphasisBetween '*'
-strikeout :: OrgParser (F Inlines)
-strikeout = fmap B.strikeout <$> emphasisBetween '+'
+strikeout :: OrgParser Inlines
+strikeout = B.strikeout <$> emphasisBetween '+'
-- There is no underline, so we use strong instead.
-underline :: OrgParser (F Inlines)
-underline = fmap B.strong <$> emphasisBetween '_'
+underline :: OrgParser Inlines
+underline = B.strong <$> emphasisBetween '_'
-verbatim :: OrgParser (F Inlines)
-verbatim = return . B.code <$> verbatimBetween '='
+verbatim :: OrgParser Inlines
+verbatim = B.code <$> verbatimBetween '='
-code :: OrgParser (F Inlines)
-code = return . B.code <$> verbatimBetween '~'
+code :: OrgParser Inlines
+code = B.code <$> verbatimBetween '~'
-subscript :: OrgParser (F Inlines)
-subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr)
+subscript :: OrgParser Inlines
+subscript = B.subscript <$> try (char '_' *> subOrSuperExpr)
-superscript :: OrgParser (F Inlines)
-superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr)
+superscript :: OrgParser Inlines
+superscript = B.superscript <$> try (char '^' *> subOrSuperExpr)
-math :: OrgParser (F Inlines)
-math = return . B.math <$> choice [ math1CharBetween '$'
+math :: OrgParser Inlines
+math = B.math <$> choice [ math1CharBetween '$'
, mathStringBetween '$'
, rawMathBetween "\\(" "\\)"
]
-displayMath :: OrgParser (F Inlines)
-displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
- , rawMathBetween "$$" "$$"
- ]
-symbol :: OrgParser (F Inlines)
-symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
- where updatePositions c
- | c `elem` emphasisPreChars = c <$ updateLastPreCharPos
- | c `elem` emphasisForbiddenBorderChars = c <$ updateLastForbiddenCharPos
- | otherwise = return c
+displayMath :: OrgParser Inlines
+displayMath = B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
+ , rawMathBetween "$$" "$$"
+ ]
+
+symbol :: OrgParser Inlines
+symbol = B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
+ where updatePositions c = do
+ when (c `elem` emphasisPreChars) updateLastPreCharPos
+ when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos
+ return c
emphasisBetween :: Char
- -> OrgParser (F Inlines)
+ -> OrgParser Inlines
emphasisBetween c = try $ do
startEmphasisNewlinesCounting emphasisAllowedNewlines
res <- enclosedInlines (emphasisStart c) (emphasisEnd c)
@@ -1281,9 +1349,9 @@ mathEnd c = try $ do
enclosedInlines :: OrgParser a
-> OrgParser b
- -> OrgParser (F Inlines)
+ -> OrgParser Inlines
enclosedInlines start end = try $
- trimInlinesF . mconcat <$> enclosed start end inline
+ trimInlines . mconcat <$> enclosed start end inline
enclosedRaw :: OrgParser a
-> OrgParser b
@@ -1362,7 +1430,7 @@ notAfterForbiddenBorderChar = do
return $ lastFBCPos /= Just pos
-- | Read a sub- or superscript expression
-subOrSuperExpr :: OrgParser (F Inlines)
+subOrSuperExpr :: OrgParser Inlines
subOrSuperExpr = try $
choice [ id <$> charsInBalanced '{' '}' (noneOf "\n\r")
, enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r")
@@ -1377,10 +1445,11 @@ simpleSubOrSuperString = try $
<*> many1 alphaNum
]
-inlineLaTeX :: OrgParser (F Inlines)
+inlineLaTeX :: OrgParser Inlines
inlineLaTeX = try $ do
cmd <- inlineLaTeXCommand
- maybe mzero returnF $ parseAsMath cmd `mplus` parseAsInlineLaTeX cmd
+ maybe mzero return $
+ parseAsMath cmd `mplus` parseAsMathMLSym cmd `mplus` parseAsInlineLaTeX cmd
where
parseAsMath :: String -> Maybe Inlines
parseAsMath cs = B.fromList <$> texMathToPandoc cs
@@ -1388,6 +1457,11 @@ inlineLaTeX = try $ do
parseAsInlineLaTeX :: String -> Maybe Inlines
parseAsInlineLaTeX cs = maybeRight $ runParser inlineCommand state "" cs
+ parseAsMathMLSym :: String -> Maybe Inlines
+ parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs)
+ -- dropWhileEnd would be nice here, but it's not available before base 4.5
+ where clean = reverse . dropWhile (`elem` ("{}" :: String)) . reverse . drop 1
+
state :: ParserState
state = def{ stateOptions = def{ readerParseRaw = True }}
@@ -1406,3 +1480,31 @@ inlineLaTeXCommand = try $ do
count len anyChar
return cs
_ -> mzero
+
+smart :: OrgParser Inlines
+smart = do
+ getOption readerSmart >>= guard
+ doubleQuoted <|> singleQuoted <|>
+ choice [orgApostrophe, dash, ellipses]
+ where orgApostrophe =
+ (char '\'' <|> char '\8217') <* updateLastPreCharPos
+ <* updateLastForbiddenCharPos
+ *> return (B.str "\x2019")
+
+singleQuoted :: OrgParser Inlines
+singleQuoted = try $ do
+ singleQuoteStart
+ withQuoteContext InSingleQuote $
+ B.singleQuoted . trimInlines . mconcat <$>
+ many1Till inline singleQuoteEnd
+
+-- doubleQuoted will handle regular double-quoted sections, as well
+-- as dialogues with an open double-quote without a close double-quote
+-- in the same paragraph.
+doubleQuoted :: OrgParser Inlines
+doubleQuoted = try $ do
+ doubleQuoteStart
+ contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
+ (withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return
+ (B.doubleQuoted . trimInlines $ contents))
+ <|> (return $ (B.str "\8220") <> contents)
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index e5eccb116..a8112bc81 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE FlexibleContexts #-}
{-
Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
@@ -29,32 +30,38 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion from reStructuredText to 'Pandoc' document.
-}
module Text.Pandoc.Readers.RST (
- readRST
+ readRST,
+ readRSTWithWarnings
) where
import Text.Pandoc.Definition
import Text.Pandoc.Builder (setMeta, fromList)
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
import Text.Pandoc.Options
-import Control.Monad ( when, liftM, guard, mzero, mplus )
+import Control.Monad ( when, liftM, guard, mzero )
import Data.List ( findIndex, intersperse, intercalate,
- transpose, sort, deleteFirstsBy, isSuffixOf )
+ transpose, sort, deleteFirstsBy, isSuffixOf , nub, union)
import Data.Maybe (fromMaybe)
import qualified Data.Map as M
import Text.Printf ( printf )
-import Control.Applicative ((<$>), (<$), (<*), (*>), (<*>))
+import Control.Applicative ((<$>), (<$), (<*), (*>), (<*>), pure)
import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
import qualified Text.Pandoc.Builder as B
import Data.Monoid (mconcat, mempty)
import Data.Sequence (viewr, ViewR(..))
-import Data.Char (toLower, isHexDigit)
+import Data.Char (toLower, isHexDigit, isSpace)
+
+import Text.Pandoc.Error
-- | Parse reStructuredText string and return Pandoc document.
readRST :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Pandoc
+ -> Either PandocError Pandoc
readRST opts s = (readWith parseRST) def{ stateOptions = opts } (s ++ "\n\n")
+readRSTWithWarnings :: ReaderOptions -> String -> Either PandocError (Pandoc, [String])
+readRSTWithWarnings opts s = (readWith (returnWarnings parseRST)) def{ stateOptions = opts } (s ++ "\n\n")
+
type RSTParser = Parser [Char] ParserState
--
@@ -335,6 +342,13 @@ indentedBlock = try $ do
optional blanklines
return $ unlines lns
+quotedBlock :: Parser [Char] st [Char]
+quotedBlock = try $ do
+ quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
+ lns <- many1 $ lookAhead (char quote) >> anyLine
+ optional blanklines
+ return $ unlines lns
+
codeBlockStart :: Parser [Char] st Char
codeBlockStart = string "::" >> blankline >> blankline
@@ -342,7 +356,8 @@ codeBlock :: Parser [Char] st Blocks
codeBlock = try $ codeBlockStart >> codeBlockBody
codeBlockBody :: Parser [Char] st Blocks
-codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$> indentedBlock
+codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$>
+ (indentedBlock <|> quotedBlock)
lhsCodeBlock :: RSTParser Blocks
lhsCodeBlock = try $ do
@@ -513,7 +528,6 @@ directive = try $ do
-- TODO: line-block, parsed-literal, table, csv-table, list-table
-- date
-- include
--- class
-- title
directive' :: RSTParser Blocks
directive' = do
@@ -594,38 +608,69 @@ directive' = do
Just t -> B.link (escapeURI $ trim t) ""
$ B.image src "" alt
Nothing -> B.image src "" alt
- _ -> return mempty
+ "class" -> do
+ let attrs = ("", (splitBy isSpace $ trim top), map (\(k,v) -> (k, trimr v)) fields)
+ -- directive content or the first immediately following element
+ children <- case body of
+ "" -> block
+ _ -> parseFromString parseBlocks body'
+ return $ B.divWith attrs children
+ other -> do
+ pos <- getPosition
+ addWarning (Just pos) $ "ignoring unknown directive: " ++ other
+ return mempty
-- TODO:
-- - Silently ignores illegal fields
--- - Silently drops classes
-- - Only supports :format: fields with a single format for :raw: roles,
-- change Text.Pandoc.Definition.Format to fix
addNewRole :: String -> [(String, String)] -> RSTParser Blocks
addNewRole roleString fields = do
(role, parentRole) <- parseFromString inheritedRole roleString
customRoles <- stateRstCustomRoles <$> getState
- baseRole <- case M.lookup parentRole customRoles of
- Just (base, _, _) -> return base
- Nothing -> return parentRole
-
- let fmt = if baseRole == "raw" then lookup "format" fields else Nothing
- annotate = maybe id addLanguage $
- if baseRole == "code"
+ let (baseRole, baseFmt, baseAttr) =
+ maybe (parentRole, Nothing, nullAttr) id $
+ M.lookup parentRole customRoles
+ fmt = if parentRole == "raw" then lookup "format" fields else baseFmt
+ annotate :: [String] -> [String]
+ annotate = maybe id (:) $
+ if parentRole == "code"
then lookup "language" fields
else Nothing
+ attr = let (ident, classes, keyValues) = baseAttr
+ -- nub in case role name & language class are the same
+ in (ident, nub . (role :) . annotate $ classes, keyValues)
+
+ -- warn about syntax we ignore
+ flip mapM_ fields $ \(key, _) -> case key of
+ "language" -> when (parentRole /= "code") $ addWarning Nothing $
+ "ignoring :language: field because the parent of role :" ++
+ role ++ ": is :" ++ parentRole ++ ": not :code:"
+ "format" -> when (parentRole /= "raw") $ addWarning Nothing $
+ "ignoring :format: field because the parent of role :" ++
+ role ++ ": is :" ++ parentRole ++ ": not :raw:"
+ _ -> addWarning Nothing $ "ignoring unknown field :" ++ key ++
+ ": in definition of role :" ++ role ++ ": in"
+ when (parentRole == "raw" && countKeys "format" > 1) $
+ addWarning Nothing $
+ "ignoring :format: fields after the first in the definition of role :"
+ ++ role ++": in"
+ when (parentRole == "code" && countKeys "language" > 1) $
+ addWarning Nothing $
+ "ignoring :language: fields after the first in the definition of role :"
+ ++ role ++": in"
updateState $ \s -> s {
stateRstCustomRoles =
- M.insert role (baseRole, fmt, (,) parentRole . annotate) customRoles
+ M.insert role (baseRole, fmt, attr) customRoles
}
return $ B.singleton Null
where
- addLanguage lang (ident, classes, keyValues) =
- (ident, "sourceCode" : lang : classes, keyValues)
+ countKeys k = length . filter (== k) . map fst $ fields
inheritedRole =
- (,) <$> roleNameEndingIn (char '(') <*> roleNameEndingIn (char ')')
+ (,) <$> roleName <*> ((char '(' *> roleName <* char ')') <|> pure "span")
+
-- Can contain character codes as decimal numbers or
-- hexadecimal numbers, prefixed by 0x, x, \x, U+, u, or \u
@@ -666,7 +711,7 @@ extractCaption = do
toChunks :: String -> [String]
toChunks = dropWhile null
. map (trim . unlines)
- . splitBy (all (`elem` " \t")) . lines
+ . splitBy (all (`elem` (" \t" :: String))) . lines
codeblock :: Maybe String -> String -> String -> RSTParser Blocks
codeblock numberLines lang body =
@@ -985,21 +1030,23 @@ renderRole contents fmt role attr = case role of
"RFC" -> return $ rfcLink contents
"pep-reference" -> return $ pepLink contents
"PEP" -> return $ pepLink contents
- "literal" -> return $ B.str contents
+ "literal" -> return $ B.codeWith attr contents
"math" -> return $ B.math contents
"title-reference" -> titleRef contents
"title" -> titleRef contents
"t" -> titleRef contents
- "code" -> return $ B.codeWith attr contents
+ "code" -> return $ B.codeWith (addClass "sourceCode" attr) contents
+ "span" -> return $ B.spanWith attr $ B.str contents
"raw" -> return $ B.rawInline (fromMaybe "" fmt) contents
custom -> do
- customRole <- stateRstCustomRoles <$> getState
- case M.lookup custom customRole of
- Just (_, newFmt, inherit) -> let
- fmtStr = fmt `mplus` newFmt
- (newRole, newAttr) = inherit attr
- in renderRole contents fmtStr newRole newAttr
- Nothing -> return $ B.str contents -- Undefined role
+ customRoles <- stateRstCustomRoles <$> getState
+ case M.lookup custom customRoles of
+ Just (newRole, newFmt, newAttr) ->
+ renderRole contents newFmt newRole newAttr
+ Nothing -> do
+ pos <- getPosition
+ addWarning (Just pos) $ "ignoring unknown role :" ++ custom ++ ": in"
+ return $ B.str contents -- Undefined role
where
titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour
rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo)
@@ -1008,11 +1055,14 @@ renderRole contents fmt role attr = case role of
where padNo = replicate (4 - length pepNo) '0' ++ pepNo
pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/"
-roleNameEndingIn :: RSTParser Char -> RSTParser String
-roleNameEndingIn end = many1Till (letter <|> char '-') end
+addClass :: String -> Attr -> Attr
+addClass c (ident, classes, keyValues) = (ident, union classes [c], keyValues)
+
+roleName :: RSTParser String
+roleName = many1 (letter <|> char '-')
roleMarker :: RSTParser String
-roleMarker = char ':' *> roleNameEndingIn (char ':')
+roleMarker = char ':' *> roleName <* char ':'
roleBefore :: RSTParser (String,String)
roleBefore = try $ do
diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs
new file mode 100644
index 000000000..07b414431
--- /dev/null
+++ b/src/Text/Pandoc/Readers/TWiki.hs
@@ -0,0 +1,527 @@
+{-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances, FlexibleContexts #-}
+-- RelaxedPolyRec needed for inlinesBetween on GHC < 7
+{-
+ Copyright (C) 2014 Alexander Sulfrian <alexander.sulfrian@fu-berlin.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.TWiki
+ Copyright : Copyright (C) 2014 Alexander Sulfrian
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Alexander Sulfrian <alexander.sulfrian@fu-berlin.de>
+ Stability : alpha
+ Portability : portable
+
+Conversion of twiki text to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.TWiki ( readTWiki
+ , readTWikiWithWarnings
+ ) where
+
+import Text.Pandoc.Definition
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing hiding (enclosed, macro, nested)
+import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag)
+import Data.Monoid (Monoid, mconcat, mempty)
+import Control.Applicative ((<$>), (<*), (*>), (<$))
+import Control.Monad
+import Text.Printf (printf)
+import Debug.Trace (trace)
+import Text.Pandoc.XML (fromEntities)
+import Data.Maybe (fromMaybe)
+import Text.HTML.TagSoup
+import Data.Char (isAlphaNum)
+import qualified Data.Foldable as F
+import Text.Pandoc.Error
+
+-- | Read twiki from an input string and return a Pandoc document.
+readTWiki :: ReaderOptions -- ^ Reader options
+ -> String -- ^ String to parse (assuming @'\n'@ line endings)
+ -> Either PandocError Pandoc
+readTWiki opts s =
+ (readWith parseTWiki) def{ stateOptions = opts } (s ++ "\n\n")
+
+readTWikiWithWarnings :: ReaderOptions -- ^ Reader options
+ -> String -- ^ String to parse (assuming @'\n'@ line endings)
+ -> Either PandocError (Pandoc, [String])
+readTWikiWithWarnings opts s =
+ (readWith parseTWikiWithWarnings) def{ stateOptions = opts } (s ++ "\n\n")
+ where parseTWikiWithWarnings = do
+ doc <- parseTWiki
+ warnings <- stateWarnings <$> getState
+ return (doc, warnings)
+
+type TWParser = Parser [Char] ParserState
+
+--
+-- utility functions
+--
+
+tryMsg :: String -> TWParser a -> TWParser a
+tryMsg msg p = try p <?> msg
+
+skip :: TWParser a -> TWParser ()
+skip parser = parser >> return ()
+
+nested :: TWParser a -> TWParser a
+nested p = do
+ nestlevel <- stateMaxNestingLevel <$> getState
+ guard $ nestlevel > 0
+ updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 }
+ res <- p
+ updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
+ return res
+
+htmlElement :: String -> TWParser (Attr, String)
+htmlElement tag = tryMsg tag $ do
+ (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag [])
+ content <- manyTill anyChar (endtag <|> endofinput)
+ return (htmlAttrToPandoc attr, trim content)
+ where
+ endtag = skip $ htmlTag (~== TagClose tag)
+ endofinput = lookAhead $ try $ skipMany blankline >> skipSpaces >> eof
+ trim = dropWhile (=='\n') . reverse . dropWhile (=='\n') . reverse
+
+htmlAttrToPandoc :: [Attribute String] -> Attr
+htmlAttrToPandoc attrs = (ident, classes, keyvals)
+ where
+ ident = fromMaybe "" $ lookup "id" attrs
+ classes = maybe [] words $ lookup "class" attrs
+ keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+
+parseHtmlContentWithAttrs :: String -> TWParser a -> TWParser (Attr, [a])
+parseHtmlContentWithAttrs tag parser = do
+ (attr, content) <- htmlElement tag
+ parsedContent <- try $ parseContent content
+ return (attr, parsedContent)
+ where
+ parseContent = parseFromString $ nested $ manyTill parser endOfContent
+ endOfContent = try $ skipMany blankline >> skipSpaces >> eof
+
+parseHtmlContent :: String -> TWParser a -> TWParser [a]
+parseHtmlContent tag p = parseHtmlContentWithAttrs tag p >>= return . snd
+
+--
+-- main parser
+--
+
+parseTWiki :: TWParser Pandoc
+parseTWiki = do
+ bs <- mconcat <$> many block
+ spaces
+ eof
+ return $ B.doc bs
+
+
+--
+-- block parsers
+--
+
+block :: TWParser B.Blocks
+block = do
+ tr <- getOption readerTrace
+ pos <- getPosition
+ res <- mempty <$ skipMany1 blankline
+ <|> blockElements
+ <|> para
+ skipMany blankline
+ when tr $
+ trace (printf "line %d: %s" (sourceLine pos)
+ (take 60 $ show $ B.toList res)) (return ())
+ return res
+
+blockElements :: TWParser B.Blocks
+blockElements = choice [ separator
+ , header
+ , verbatim
+ , literal
+ , list ""
+ , table
+ , blockQuote
+ , noautolink
+ ]
+
+separator :: TWParser B.Blocks
+separator = tryMsg "separator" $ string "---" >> newline >> return B.horizontalRule
+
+header :: TWParser B.Blocks
+header = tryMsg "header" $ do
+ string "---"
+ level <- many1 (char '+') >>= return . length
+ guard $ level <= 6
+ classes <- option [] $ string "!!" >> return ["unnumbered"]
+ skipSpaces
+ content <- B.trimInlines . mconcat <$> manyTill inline newline
+ attr <- registerHeader ("", classes, []) content
+ return $ B.headerWith attr level $ content
+
+verbatim :: TWParser B.Blocks
+verbatim = (htmlElement "verbatim" <|> htmlElement "pre")
+ >>= return . (uncurry B.codeBlockWith)
+
+literal :: TWParser B.Blocks
+literal = htmlElement "literal" >>= return . rawBlock
+ where
+ format (_, _, kvs) = fromMaybe "html" $ lookup "format" kvs
+ rawBlock (attrs, content) = B.rawBlock (format attrs) content
+
+list :: String -> TWParser B.Blocks
+list prefix = choice [ bulletList prefix
+ , orderedList prefix
+ , definitionList prefix]
+
+definitionList :: String -> TWParser B.Blocks
+definitionList prefix = tryMsg "definitionList" $ do
+ indent <- lookAhead $ string prefix *> (many1 $ string " ") <* string "$ "
+ elements <- many $ parseDefinitionListItem (prefix ++ concat indent)
+ return $ B.definitionList elements
+ where
+ parseDefinitionListItem :: String -> TWParser (B.Inlines, [B.Blocks])
+ parseDefinitionListItem indent = do
+ string (indent ++ "$ ") >> skipSpaces
+ term <- many1Till inline $ string ": "
+ line <- listItemLine indent $ string "$ "
+ return $ (mconcat term, [line])
+
+bulletList :: String -> TWParser B.Blocks
+bulletList prefix = tryMsg "bulletList" $
+ parseList prefix (char '*') (char ' ')
+
+orderedList :: String -> TWParser B.Blocks
+orderedList prefix = tryMsg "orderedList" $
+ parseList prefix (oneOf "1iIaA") (string ". ")
+
+parseList :: Show a => String -> TWParser Char -> TWParser a -> TWParser B.Blocks
+parseList prefix marker delim = do
+ (indent, style) <- lookAhead $ string prefix *> listStyle <* delim
+ blocks <- many $ parseListItem (prefix ++ indent) (char style <* delim)
+ return $ case style of
+ '1' -> B.orderedListWith (1, DefaultStyle, DefaultDelim) blocks
+ 'i' -> B.orderedListWith (1, LowerRoman, DefaultDelim) blocks
+ 'I' -> B.orderedListWith (1, UpperRoman, DefaultDelim) blocks
+ 'a' -> B.orderedListWith (1, LowerAlpha, DefaultDelim) blocks
+ 'A' -> B.orderedListWith (1, UpperAlpha, DefaultDelim) blocks
+ _ -> B.bulletList blocks
+ where
+ listStyle = do
+ indent <- many1 $ string " "
+ style <- marker
+ return (concat indent, style)
+
+parseListItem :: Show a => String -> TWParser a -> TWParser B.Blocks
+parseListItem prefix marker = string prefix >> marker >> listItemLine prefix marker
+
+listItemLine :: Show a => String -> TWParser a -> TWParser B.Blocks
+listItemLine prefix marker = lineContent >>= parseContent >>= return . mconcat
+ where
+ lineContent = do
+ content <- anyLine
+ continuation <- optionMaybe listContinuation
+ return $ filterSpaces content ++ "\n" ++ (maybe "" (" " ++) continuation)
+ filterSpaces = reverse . dropWhile (== ' ') . reverse
+ listContinuation = notFollowedBy (string prefix >> marker) >>
+ string " " >> lineContent
+ parseContent = parseFromString $ many1 $ nestedList <|> parseInline
+ parseInline = many1Till inline (lastNewline <|> newlineBeforeNestedList) >>=
+ return . B.plain . mconcat
+ nestedList = list prefix
+ lastNewline = try $ char '\n' <* eof
+ newlineBeforeNestedList = try $ char '\n' <* lookAhead nestedList
+
+table :: TWParser B.Blocks
+table = try $ do
+ tableHead <- optionMaybe $ many1Till tableParseHeader newline >>= return . unzip
+ rows <- many1 tableParseRow
+ return $ buildTable mempty rows $ fromMaybe (align rows, columns rows) tableHead
+ where
+ buildTable caption rows (aligns, heads)
+ = B.table caption aligns heads rows
+ align rows = replicate (columCount rows) (AlignDefault, 0)
+ columns rows = replicate (columCount rows) mempty
+ columCount rows = length $ head rows
+
+tableParseHeader :: TWParser ((Alignment, Double), B.Blocks)
+tableParseHeader = try $ do
+ char '|'
+ leftSpaces <- many spaceChar >>= return . length
+ char '*'
+ content <- tableColumnContent (char '*' >> skipSpaces >> char '|')
+ char '*'
+ rightSpaces <- many spaceChar >>= return . length
+ optional tableEndOfRow
+ return (tableAlign leftSpaces rightSpaces, content)
+ where
+ tableAlign left right
+ | left >= 2 && left == right = (AlignCenter, 0)
+ | left > right = (AlignRight, 0)
+ | otherwise = (AlignLeft, 0)
+
+tableParseRow :: TWParser [B.Blocks]
+tableParseRow = many1Till tableParseColumn newline
+
+tableParseColumn :: TWParser B.Blocks
+tableParseColumn = char '|' *> skipSpaces *>
+ tableColumnContent (skipSpaces >> char '|')
+ <* skipSpaces <* optional tableEndOfRow
+
+tableEndOfRow :: TWParser Char
+tableEndOfRow = lookAhead (try $ char '|' >> char '\n') >> char '|'
+
+tableColumnContent :: Show a => TWParser a -> TWParser B.Blocks
+tableColumnContent end = manyTill content (lookAhead $ try end) >>= return . B.plain . mconcat
+ where
+ content = continuation <|> inline
+ continuation = try $ char '\\' >> newline >> return mempty
+
+blockQuote :: TWParser B.Blocks
+blockQuote = parseHtmlContent "blockquote" block >>= return . B.blockQuote . mconcat
+
+noautolink :: TWParser B.Blocks
+noautolink = do
+ (_, content) <- htmlElement "noautolink"
+ st <- getState
+ setState $ st{ stateAllowLinks = False }
+ blocks <- try $ parseContent content
+ setState $ st{ stateAllowLinks = True }
+ return $ mconcat blocks
+ where
+ parseContent = parseFromString $ many $ block
+
+para :: TWParser B.Blocks
+para = many1Till inline endOfParaElement >>= return . result . mconcat
+ where
+ endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement
+ endOfInput = try $ skipMany blankline >> skipSpaces >> eof
+ endOfPara = try $ blankline >> skipMany1 blankline
+ newBlockElement = try $ blankline >> skip blockElements
+ result content = if F.all (==Space) content
+ then mempty
+ else B.para $ B.trimInlines content
+
+
+--
+-- inline parsers
+--
+
+inline :: TWParser B.Inlines
+inline = choice [ whitespace
+ , br
+ , macro
+ , strong
+ , strongHtml
+ , strongAndEmph
+ , emph
+ , emphHtml
+ , boldCode
+ , smart
+ , link
+ , htmlComment
+ , code
+ , codeHtml
+ , nop
+ , autoLink
+ , str
+ , symbol
+ ] <?> "inline"
+
+whitespace :: TWParser B.Inlines
+whitespace = (lb <|> regsp) >>= return
+ where lb = try $ skipMany spaceChar >> linebreak >> return B.space
+ regsp = try $ skipMany1 spaceChar >> return B.space
+
+br :: TWParser B.Inlines
+br = try $ string "%BR%" >> return B.linebreak
+
+linebreak :: TWParser B.Inlines
+linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline)
+ where lastNewline = eof >> return mempty
+ innerNewline = return B.space
+
+between :: (Show b, Monoid c) => TWParser a -> TWParser b -> (TWParser b -> TWParser c) -> TWParser c
+between start end p =
+ mconcat <$> try (start >> notFollowedBy whitespace >> many1Till (p end) end)
+
+enclosed :: (Show a, Monoid b) => TWParser a -> (TWParser a -> TWParser b) -> TWParser b
+enclosed sep p = between sep (try $ sep <* endMarker) p
+ where
+ endMarker = lookAhead $ skip endSpace <|> skip (oneOf ".,!?:)|") <|> eof
+ endSpace = (spaceChar <|> newline) >> return B.space
+
+macro :: TWParser B.Inlines
+macro = macroWithParameters <|> withoutParameters
+ where
+ withoutParameters = enclosed (char '%') (\_ -> macroName) >>= return . emptySpan
+ emptySpan name = buildSpan name [] mempty
+
+macroWithParameters :: TWParser B.Inlines
+macroWithParameters = try $ do
+ char '%'
+ name <- macroName
+ (content, kvs) <- attributes
+ char '%'
+ return $ buildSpan name kvs $ B.str content
+
+buildSpan :: String -> [(String, String)] -> B.Inlines -> B.Inlines
+buildSpan className kvs = B.spanWith attrs
+ where
+ attrs = ("", ["twiki-macro", className] ++ additionalClasses, kvsWithoutClasses)
+ additionalClasses = maybe [] words $ lookup "class" kvs
+ kvsWithoutClasses = [(k,v) | (k,v) <- kvs, k /= "class"]
+
+macroName :: TWParser String
+macroName = do
+ first <- letter
+ rest <- many $ alphaNum <|> char '_'
+ return (first:rest)
+
+attributes :: TWParser (String, [(String, String)])
+attributes = char '{' *> spnl *> many (attribute <* spnl) <* char '}' >>=
+ return . foldr (either mkContent mkKvs) ([], [])
+ where
+ spnl = skipMany (spaceChar <|> newline)
+ mkContent c ([], kvs) = (c, kvs)
+ mkContent c (rest, kvs) = (c ++ " " ++ rest, kvs)
+ mkKvs kv (cont, rest) = (cont, (kv : rest))
+
+attribute :: TWParser (Either String (String, String))
+attribute = withKey <|> withoutKey
+ where
+ withKey = try $ do
+ key <- macroName
+ char '='
+ parseValue False >>= return . (curry Right key)
+ withoutKey = try $ parseValue True >>= return . Left
+ parseValue allowSpaces = (withQuotes <|> withoutQuotes allowSpaces) >>= return . fromEntities
+ withQuotes = between (char '"') (char '"') (\_ -> count 1 $ noneOf ['"'])
+ withoutQuotes allowSpaces
+ | allowSpaces == True = many1 $ noneOf "}"
+ | otherwise = many1 $ noneOf " }"
+
+nestedInlines :: Show a => TWParser a -> TWParser B.Inlines
+nestedInlines end = innerSpace <|> nestedInline
+ where
+ innerSpace = try $ whitespace <* (notFollowedBy end)
+ nestedInline = notFollowedBy whitespace >> nested inline
+
+strong :: TWParser B.Inlines
+strong = try $ enclosed (char '*') nestedInlines >>= return . B.strong
+
+strongHtml :: TWParser B.Inlines
+strongHtml = (parseHtmlContent "strong" inline <|> parseHtmlContent "b" inline)
+ >>= return . B.strong . mconcat
+
+strongAndEmph :: TWParser B.Inlines
+strongAndEmph = try $ enclosed (string "__") nestedInlines >>= return . B.emph . B.strong
+
+emph :: TWParser B.Inlines
+emph = try $ enclosed (char '_') nestedInlines >>= return . B.emph
+
+emphHtml :: TWParser B.Inlines
+emphHtml = (parseHtmlContent "em" inline <|> parseHtmlContent "i" inline)
+ >>= return . B.emph . mconcat
+
+nestedString :: Show a => TWParser a -> TWParser String
+nestedString end = innerSpace <|> (count 1 nonspaceChar)
+ where
+ innerSpace = try $ many1 spaceChar <* notFollowedBy end
+
+boldCode :: TWParser B.Inlines
+boldCode = try $ enclosed (string "==") nestedString >>= return . B.strong . B.code . fromEntities
+
+htmlComment :: TWParser B.Inlines
+htmlComment = htmlTag isCommentTag >> return mempty
+
+code :: TWParser B.Inlines
+code = try $ enclosed (char '=') nestedString >>= return . B.code . fromEntities
+
+codeHtml :: TWParser B.Inlines
+codeHtml = do
+ (attrs, content) <- parseHtmlContentWithAttrs "code" anyChar
+ return $ B.codeWith attrs $ fromEntities content
+
+autoLink :: TWParser B.Inlines
+autoLink = try $ do
+ state <- getState
+ guard $ stateAllowLinks state
+ (text, url) <- parseLink
+ guard $ checkLink (head $ reverse url)
+ return $ makeLink (text, url)
+ where
+ parseLink = notFollowedBy nop >> (uri <|> emailAddress)
+ makeLink (text, url) = B.link url "" $ B.str text
+ checkLink c
+ | c == '/' = True
+ | otherwise = isAlphaNum c
+
+str :: TWParser B.Inlines
+str = (many1 alphaNum <|> count 1 characterReference) >>= return . B.str
+
+nop :: TWParser B.Inlines
+nop = try $ (skip exclamation <|> skip nopTag) >> followContent
+ where
+ exclamation = char '!'
+ nopTag = stringAnyCase "<nop>"
+ followContent = many1 nonspaceChar >>= return . B.str . fromEntities
+
+symbol :: TWParser B.Inlines
+symbol = count 1 nonspaceChar >>= return . B.str
+
+smart :: TWParser B.Inlines
+smart = do
+ getOption readerSmart >>= guard
+ doubleQuoted <|> singleQuoted <|>
+ choice [ apostrophe
+ , dash
+ , ellipses
+ ]
+
+singleQuoted :: TWParser B.Inlines
+singleQuoted = try $ do
+ singleQuoteStart
+ withQuoteContext InSingleQuote $
+ many1Till inline singleQuoteEnd >>=
+ (return . B.singleQuoted . B.trimInlines . mconcat)
+
+doubleQuoted :: TWParser B.Inlines
+doubleQuoted = try $ do
+ doubleQuoteStart
+ contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
+ (withQuoteContext InDoubleQuote $ doubleQuoteEnd >>
+ return (B.doubleQuoted $ B.trimInlines contents))
+ <|> (return $ (B.str "\8220") B.<> contents)
+
+link :: TWParser B.Inlines
+link = try $ do
+ st <- getState
+ guard $ stateAllowLinks st
+ setState $ st{ stateAllowLinks = False }
+ (url, title, content) <- linkText
+ setState $ st{ stateAllowLinks = True }
+ return $ B.link url title content
+
+linkText :: TWParser (String, String, B.Inlines)
+linkText = do
+ string "[["
+ url <- many1Till anyChar (char ']')
+ content <- option [B.str url] linkContent
+ char ']'
+ return (url, "", mconcat content)
+ where
+ linkContent = (char '[') >> many1Till anyChar (char ']') >>= parseLinkContent
+ parseLinkContent = parseFromString $ many1 inline
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index ee64e8f2a..4565b26a1 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -57,6 +57,7 @@ import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag )
+import Text.Pandoc.Shared (trim)
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..))
import Text.HTML.TagSoup.Match
@@ -67,11 +68,12 @@ import Text.Printf
import Control.Applicative ((<$>), (*>), (<*), (<$))
import Data.Monoid
import Debug.Trace (trace)
+import Text.Pandoc.Error
-- | Parse a Textile text and return a Pandoc document.
readTextile :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Pandoc
+ -> Either PandocError Pandoc
readTextile opts s =
(readWith parseTextile) def{ stateOptions = opts } (s ++ "\n\n")
@@ -325,33 +327,30 @@ para = B.para . trimInlines . mconcat <$> many1 inline
-- Tables
-- | A table cell spans until a pipe |
-tableCell :: Parser [Char] ParserState Blocks
-tableCell = do
- c <- many1 (noneOf "|\n")
- content <- trimInlines . mconcat <$> parseFromString (many1 inline) c
+tableCell :: Bool -> Parser [Char] ParserState Blocks
+tableCell headerCell = try $ do
+ char '|'
+ when headerCell $ () <$ string "_."
+ notFollowedBy blankline
+ raw <- trim <$>
+ many (noneOf "|\n" <|> try (char '\n' <* notFollowedBy blankline))
+ content <- mconcat <$> parseFromString (many inline) raw
return $ B.plain content
-- | A table row is made of many table cells
tableRow :: Parser [Char] ParserState [Blocks]
-tableRow = try $ ( char '|' *>
- (endBy1 tableCell (optional blankline *> char '|')) <* newline)
-
--- | Many table rows
-tableRows :: Parser [Char] ParserState [[Blocks]]
-tableRows = many1 tableRow
+tableRow = many1 (tableCell False) <* char '|' <* newline
--- | Table headers are made of cells separated by a tag "|_."
-tableHeaders :: Parser [Char] ParserState [Blocks]
-tableHeaders = let separator = (try $ string "|_.") in
- try $ ( separator *> (sepBy1 tableCell separator) <* char '|' <* newline )
+tableHeader :: Parser [Char] ParserState [Blocks]
+tableHeader = many1 (tableCell True) <* char '|' <* newline
-- | A table with an optional header. Current implementation can
-- handle tables with and without header, but will parse cells
-- alignment attributes as content.
table :: Parser [Char] ParserState Blocks
table = try $ do
- headers <- option mempty tableHeaders
- rows <- tableRows
+ headers <- option mempty $ tableHeader
+ rows <- many1 tableRow
blanklines
let nbOfCols = max (length headers) (length $ head rows)
return $ B.table mempty
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index 6f8c19ac7..304d6d4c5 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -48,11 +48,12 @@ import Data.Monoid (Monoid, mconcat, mempty, mappend)
import Control.Monad (void, guard, when)
import Data.Default
import Control.Monad.Reader (Reader, runReader, asks)
+import Text.Pandoc.Error
import Data.Time.LocalTime (getZonedTime)
import Text.Pandoc.Compat.Directory(getModificationTime)
import Data.Time.Format (formatTime)
-import System.Locale (defaultTimeLocale)
+import Text.Pandoc.Compat.Locale (defaultTimeLocale)
import System.IO.Error (catchIOError)
type T2T = ParserT String ParserState (Reader T2TMeta)
@@ -83,12 +84,12 @@ getT2TMeta inps out = do
return $ T2TMeta curDate curMtime (intercalate ", " inps) out
-- | Read Txt2Tags from an input string returning a Pandoc document
-readTxt2Tags :: T2TMeta -> ReaderOptions -> String -> Pandoc
+readTxt2Tags :: T2TMeta -> ReaderOptions -> String -> Either PandocError Pandoc
readTxt2Tags t opts s = flip runReader t $ readWithM parseT2T (def {stateOptions = opts}) (s ++ "\n\n")
-- | Read Txt2Tags (ignoring all macros) from an input string returning
-- a Pandoc document
-readTxt2TagsNoMacros :: ReaderOptions -> String -> Pandoc
+readTxt2TagsNoMacros :: ReaderOptions -> String -> Either PandocError Pandoc
readTxt2TagsNoMacros = readTxt2Tags def
parseT2T :: T2T Pandoc
@@ -576,4 +577,3 @@ atStart = (sourceColumn <$> getPosition) >>= guard . (== 1)
ignoreSpacesCap :: T2T String -> T2T String
ignoreSpacesCap p = map toLower <$> (spaces *> p <* spaces)
-
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
index 36839ddd0..d0dacd5b6 100644
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -45,20 +45,32 @@ import Text.Pandoc.MediaBag (MediaBag)
import Text.Pandoc.MIME (MimeType)
import Text.Pandoc.UTF8 (toString, fromString)
import Text.Pandoc.Options (WriterOptions(..))
+import Data.List (isPrefixOf)
isOk :: Char -> Bool
isOk c = isAscii c && isAlphaNum c
+makeDataURI :: String -> ByteString -> String
+makeDataURI mime raw =
+ if textual
+ then "data:" ++ mime' ++ "," ++ escapeURIString isOk (toString raw)
+ else "data:" ++ mime' ++ ";base64," ++ toString (encode raw)
+ where textual = "text/" `Data.List.isPrefixOf` mime
+ mime' = if textual && ';' `notElem` mime
+ then mime ++ ";charset=utf-8"
+ else mime -- mime type already has charset
+
convertTag :: MediaBag -> Maybe String -> Tag String -> IO (Tag String)
convertTag media sourceURL t@(TagOpen tagname as)
- | tagname `elem` ["img", "embed", "video", "input", "audio", "source"] = do
+ | tagname `elem`
+ ["img", "embed", "video", "input", "audio", "source", "track"] = do
as' <- mapM processAttribute as
return $ TagOpen tagname as'
where processAttribute (x,y) =
if x == "src" || x == "href" || x == "poster"
then do
(raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) y
- let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw)
+ let enc = makeDataURI mime raw
return (x, enc)
else return (x,y)
convertTag media sourceURL t@(TagOpen "script" as) =
@@ -66,14 +78,14 @@ convertTag media sourceURL t@(TagOpen "script" as) =
[] -> return t
src -> do
(raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) src
- let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw)
+ let enc = makeDataURI mime raw
return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"])
convertTag media sourceURL t@(TagOpen "link" as) =
case fromAttrib "href" t of
[] -> return t
src -> do
(raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) src
- let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw)
+ let enc = makeDataURI mime raw
return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"])
convertTag _ _ t = return t
@@ -95,8 +107,7 @@ cssURLs media sourceURL d orig =
else d </> url
(raw, mime) <- getRaw media sourceURL "" url'
rest <- cssURLs media sourceURL d v
- let enc = "data:" `B.append` fromString mime `B.append`
- ";base64," `B.append` (encode raw)
+ let enc = fromString $ makeDataURI mime raw
return $ x `B.append` "url(" `B.append` enc `B.append` rest
getRaw :: MediaBag -> Maybe String -> MimeType -> String
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 2d7c08718..e0460c66e 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -1,5 +1,6 @@
{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses,
- FlexibleContexts, ScopedTypeVariables, PatternGuards #-}
+ FlexibleContexts, ScopedTypeVariables, PatternGuards,
+ ViewPatterns #-}
{-
Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
@@ -84,6 +85,8 @@ module Text.Pandoc.Shared (
-- * Error handling
err,
warn,
+ mapLeft,
+ hush,
-- * Safe read
safeRead,
-- * Temp directory
@@ -106,15 +109,15 @@ import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo,
unEscapeString, parseURIReference, isAllowedInURI )
import qualified Data.Set as Set
import System.Directory
-import System.FilePath (joinPath, splitDirectories)
+import System.FilePath (joinPath, splitDirectories, pathSeparator, isPathSeparator)
import Text.Pandoc.MIME (MimeType, getMimeType)
import System.FilePath ( (</>), takeExtension, dropExtension)
import Data.Generics (Typeable, Data)
import qualified Control.Monad.State as S
import qualified Control.Exception as E
-import Control.Monad (msum, unless)
+import Control.Monad (msum, unless, MonadPlus(..))
import Text.Pandoc.Pretty (charWidth)
-import System.Locale (defaultTimeLocale)
+import Text.Pandoc.Compat.Locale (defaultTimeLocale)
import Data.Time
import System.IO (stderr)
import System.IO.Temp
@@ -734,12 +737,10 @@ renderTags' = renderTagsOptions
-- | Perform an IO action in a directory, returning to starting directory.
inDirectory :: FilePath -> IO a -> IO a
-inDirectory path action = do
- oldDir <- getCurrentDirectory
- setCurrentDirectory path
- result <- action
- setCurrentDirectory oldDir
- return result
+inDirectory path action = E.bracket
+ getCurrentDirectory
+ setCurrentDirectory
+ (const $ setCurrentDirectory path >> action)
readDefaultDataFile :: FilePath -> IO BS.ByteString
readDefaultDataFile fname =
@@ -856,6 +857,14 @@ warn msg = do
name <- getProgName
UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
+mapLeft :: (a -> b) -> Either a c -> Either b c
+mapLeft f (Left x) = Left (f x)
+mapLeft _ (Right x) = Right x
+
+hush :: Either a b -> Maybe b
+hush (Left _) = Nothing
+hush (Right x) = Just x
+
-- | Remove intermediate "." and ".." directories from a path.
--
-- > collapseFilePath "./foo" == "foo"
@@ -871,21 +880,24 @@ collapseFilePath = joinPath . reverse . foldl go [] . splitDirectories
go rs "." = rs
go r@(p:rs) ".." = case p of
".." -> ("..":r)
- "/" -> ("..":r)
+ (checkPathSeperator -> Just True) -> ("..":r)
_ -> rs
- go _ "/" = ["/"]
+ go _ (checkPathSeperator -> Just True) = [[pathSeparator]]
go rs x = x:rs
-
+ isSingleton [] = Nothing
+ isSingleton [x] = Just x
+ isSingleton _ = Nothing
+ checkPathSeperator = fmap isPathSeparator . isSingleton
--
-- Safe read
--
-safeRead :: (Monad m, Read a) => String -> m a
+safeRead :: (MonadPlus m, Read a) => String -> m a
safeRead s = case reads s of
(d,x):_
| all isSpace x -> return d
- _ -> fail $ "Could not read `" ++ s ++ "'"
+ _ -> mzero
--
-- Temp directory
diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
index 4ae6a6d8a..eefce2744 100644
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@@ -124,11 +124,12 @@ getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first
-> String -- ^ Name of writer
-> IO (Either E.IOException String)
getDefaultTemplate user writer = do
- let format = takeWhile (`notElem` "+-") writer -- strip off extensions
+ let format = takeWhile (`notElem` ("+-" :: String)) writer -- strip off extensions
case format of
"native" -> return $ Right ""
"json" -> return $ Right ""
"docx" -> return $ Right ""
+ "fb2" -> return $ Right ""
"odt" -> getDefaultTemplate user "opendocument"
"markdown_strict" -> getDefaultTemplate user "markdown"
"multimarkdown" -> getDefaultTemplate user "markdown"
@@ -287,7 +288,7 @@ reservedWords :: [Text]
reservedWords = ["else","endif","for","endfor","sep"]
skipEndline :: Parser ()
-skipEndline = P.try $ P.skipMany (P.satisfy (`elem` " \t")) >> P.char '\n' >> return ()
+skipEndline = P.try $ P.skipMany (P.satisfy (`elem` (" \t" :: String))) >> P.char '\n' >> return ()
pConditional :: Parser Template
pConditional = do
diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs
index e5b8c5167..1c33b004a 100644
--- a/src/Text/Pandoc/Writers/AsciiDoc.hs
+++ b/src/Text/Pandoc/Writers/AsciiDoc.hs
@@ -126,7 +126,7 @@ blockToAsciiDoc :: WriterOptions -- ^ Options
blockToAsciiDoc _ Null = return empty
blockToAsciiDoc opts (Plain inlines) = do
contents <- inlineListToAsciiDoc opts inlines
- return $ contents <> cr
+ return $ contents <> blankline
blockToAsciiDoc opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = do
blockToAsciiDoc opts (Para [Image alt (src,tit)])
blockToAsciiDoc opts (Para inlines) = do
@@ -272,7 +272,7 @@ bulletListItemToAsciiDoc opts blocks = do
contents <- foldM addBlock empty blocks
modify $ \s -> s{ bulletListLevel = lev }
let marker = text (replicate lev '*')
- return $ marker <> space <> contents <> cr
+ return $ marker <> text " " <> contents <> cr
-- | Convert ordered list item (a list of blocks) to asciidoc.
orderedListItemToAsciiDoc :: WriterOptions -- ^ options
@@ -292,7 +292,7 @@ orderedListItemToAsciiDoc opts marker blocks = do
modify $ \s -> s{ orderedListLevel = lev + 1 }
contents <- foldM addBlock empty blocks
modify $ \s -> s{ orderedListLevel = lev }
- return $ text marker <> space <> contents <> cr
+ return $ text marker <> text " " <> contents <> cr
-- | Convert definition list item (label, list of blocks) to asciidoc.
definitionListItemToAsciiDoc :: WriterOptions
diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs
new file mode 100644
index 000000000..706b27175
--- /dev/null
+++ b/src/Text/Pandoc/Writers/CommonMark.hs
@@ -0,0 +1,178 @@
+{-
+Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.CommonMark
+ Copyright : Copyright (C) 2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to CommonMark.
+
+CommonMark: <http://commonmark.org>
+-}
+module Text.Pandoc.Writers.CommonMark (writeCommonMark) where
+
+import Text.Pandoc.Writers.HTML (writeHtmlString)
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared (isTightList)
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Options
+import CMark
+import qualified Data.Text as T
+import Control.Monad.Identity (runIdentity, Identity)
+import Control.Monad.State (runState, State, modify, get)
+import Text.Pandoc.Walk (walkM)
+
+-- | Convert Pandoc to CommonMark.
+writeCommonMark :: WriterOptions -> Pandoc -> String
+writeCommonMark opts (Pandoc meta blocks) = rendered
+ where main = runIdentity $ blocksToCommonMark opts (blocks' ++ notes')
+ (blocks', notes) = runState (walkM processNotes blocks) []
+ notes' = if null notes
+ then []
+ else [OrderedList (1, Decimal, Period) $ reverse notes]
+ metadata = runIdentity $ metaToJSON opts
+ (blocksToCommonMark opts)
+ (inlinesToCommonMark opts)
+ meta
+ context = defField "body" main $ metadata
+ rendered = if writerStandalone opts
+ then renderTemplate' (writerTemplate opts) context
+ else main
+
+processNotes :: Inline -> State [[Block]] Inline
+processNotes (Note bs) = do
+ modify (bs :)
+ notes <- get
+ return $ Str $ "[" ++ show (length notes) ++ "]"
+processNotes x = return x
+
+node :: NodeType -> [Node] -> Node
+node = Node Nothing
+
+blocksToCommonMark :: WriterOptions -> [Block] -> Identity String
+blocksToCommonMark opts bs = return $
+ T.unpack $ nodeToCommonmark cmarkOpts colwidth
+ $ node DOCUMENT (blocksToNodes bs)
+ where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts]
+ colwidth = if writerWrapText opts
+ then writerColumns opts
+ else 0
+
+inlinesToCommonMark :: WriterOptions -> [Inline] -> Identity String
+inlinesToCommonMark opts ils = return $
+ T.unpack $ nodeToCommonmark cmarkOpts colwidth
+ $ node PARAGRAPH (inlinesToNodes ils)
+ where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts]
+ colwidth = if writerWrapText opts
+ then writerColumns opts
+ else 0
+
+blocksToNodes :: [Block] -> [Node]
+blocksToNodes = foldr blockToNodes []
+
+blockToNodes :: Block -> [Node] -> [Node]
+blockToNodes (Plain xs) = (node PARAGRAPH (inlinesToNodes xs) :)
+blockToNodes (Para xs) = (node PARAGRAPH (inlinesToNodes xs) :)
+blockToNodes (CodeBlock (_,classes,_) xs) =
+ (node (CODE_BLOCK (T.pack (unwords classes)) (T.pack xs)) [] :)
+blockToNodes (RawBlock fmt xs)
+ | fmt == Format "html" = (node (HTML (T.pack xs)) [] :)
+ | otherwise = id
+blockToNodes (BlockQuote bs) =
+ (node BLOCK_QUOTE (blocksToNodes bs) :)
+blockToNodes (BulletList items) =
+ (node (LIST ListAttributes{
+ listType = BULLET_LIST,
+ listDelim = PERIOD_DELIM,
+ listTight = isTightList items,
+ listStart = 1 }) (map (node ITEM . blocksToNodes) items) :)
+blockToNodes (OrderedList (start, _sty, delim) items) =
+ (node (LIST ListAttributes{
+ listType = ORDERED_LIST,
+ listDelim = case delim of
+ OneParen -> PAREN_DELIM
+ TwoParens -> PAREN_DELIM
+ _ -> PERIOD_DELIM,
+ listTight = isTightList items,
+ listStart = start }) (map (node ITEM . blocksToNodes) items) :)
+blockToNodes HorizontalRule = (node HRULE [] :)
+blockToNodes (Header lev _ ils) = (node (HEADER lev) (inlinesToNodes ils) :)
+blockToNodes (Div _ bs) = (blocksToNodes bs ++)
+blockToNodes (DefinitionList items) = blockToNodes (BulletList items')
+ where items' = map dlToBullet items
+ dlToBullet (term, ((Para xs : ys) : zs)) =
+ Para (term ++ [LineBreak] ++ xs) : ys ++ concat zs
+ dlToBullet (term, ((Plain xs : ys) : zs)) =
+ Plain (term ++ [LineBreak] ++ xs) : ys ++ concat zs
+ dlToBullet (term, xs) =
+ Para term : concat xs
+blockToNodes t@(Table _ _ _ _ _) =
+ (node (HTML (T.pack $! writeHtmlString def $! Pandoc nullMeta [t])) [] :)
+blockToNodes Null = id
+
+inlinesToNodes :: [Inline] -> [Node]
+inlinesToNodes = foldr inlineToNodes []
+
+inlineToNodes :: Inline -> [Node] -> [Node]
+inlineToNodes (Str s) = (node (TEXT (T.pack s)) [] :)
+inlineToNodes Space = (node (TEXT (T.pack " ")) [] :)
+inlineToNodes LineBreak = (node LINEBREAK [] :)
+inlineToNodes (Emph xs) = (node EMPH (inlinesToNodes xs) :)
+inlineToNodes (Strong xs) = (node STRONG (inlinesToNodes xs) :)
+inlineToNodes (Strikeout xs) =
+ ((node (INLINE_HTML (T.pack "<s>")) [] : inlinesToNodes xs ++
+ [node (INLINE_HTML (T.pack "</s>")) []]) ++ )
+inlineToNodes (Superscript xs) =
+ ((node (INLINE_HTML (T.pack "<sub>")) [] : inlinesToNodes xs ++
+ [node (INLINE_HTML (T.pack "</sub>")) []]) ++ )
+inlineToNodes (Subscript xs) =
+ ((node (INLINE_HTML (T.pack "<sup>")) [] : inlinesToNodes xs ++
+ [node (INLINE_HTML (T.pack "</sup>")) []]) ++ )
+inlineToNodes (SmallCaps xs) =
+ ((node (INLINE_HTML (T.pack "<span style=\"font-variant:small-caps;\">")) []
+ : inlinesToNodes xs ++
+ [node (INLINE_HTML (T.pack "</span>")) []]) ++ )
+inlineToNodes (Link ils (url,tit)) =
+ (node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes ils) :)
+inlineToNodes (Image ils (url,tit)) =
+ (node (IMAGE (T.pack url) (T.pack tit)) (inlinesToNodes ils) :)
+inlineToNodes (RawInline fmt xs)
+ | fmt == Format "html" = (node (INLINE_HTML (T.pack xs)) [] :)
+ | otherwise = id
+inlineToNodes (Quoted qt ils) =
+ ((node (TEXT start) [] : inlinesToNodes ils ++ [node (TEXT end) []]) ++)
+ where (start, end) = case qt of
+ SingleQuote -> (T.pack "‘", T.pack "’")
+ DoubleQuote -> (T.pack "“", T.pack "”")
+inlineToNodes (Code _ str) = (node (CODE (T.pack str)) [] :)
+inlineToNodes (Math mt str) =
+ case mt of
+ InlineMath ->
+ (node (INLINE_HTML (T.pack ("\\(" ++ str ++ "\\)"))) [] :)
+ DisplayMath ->
+ (node (INLINE_HTML (T.pack ("\\[" ++ str ++ "\\]"))) [] :)
+inlineToNodes (Span _ ils) = (inlinesToNodes ils ++)
+inlineToNodes (Cite _ ils) = (inlinesToNodes ils ++)
+inlineToNodes (Note _) = id -- should not occur
+-- we remove Note elements in preprocessing
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index bbca7f858..edfb4d0ff 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -36,6 +36,7 @@ import Text.Pandoc.Options
import Text.Pandoc.Walk (query)
import Text.Printf ( printf )
import Data.List ( intercalate )
+import Data.Char ( ord )
import Control.Monad.State
import Text.Pandoc.Pretty
import Text.Pandoc.Templates ( renderTemplate' )
@@ -114,6 +115,13 @@ escapeCharForConTeXt opts ch =
stringToConTeXt :: WriterOptions -> String -> String
stringToConTeXt opts = concatMap (escapeCharForConTeXt opts)
+-- | Sanitize labels
+toLabel :: String -> String
+toLabel z = concatMap go z
+ where go x
+ | elem x ("\\#[]\",{}%()|=" :: String) = "ux" ++ printf "%x" (ord x)
+ | otherwise = [x]
+
-- | Convert Elements to ConTeXt
elementToConTeXt :: WriterOptions -> Element -> State WriterState Doc
elementToConTeXt _ (Blk block) = blockToConTeXt block
@@ -286,15 +294,16 @@ inlineToConTeXt Space = return space
-- Handle HTML-like internal document references to sections
inlineToConTeXt (Link txt (('#' : ref), _)) = do
opts <- gets stOptions
- label <- inlineListToConTeXt txt
+ contents <- inlineListToConTeXt txt
+ let ref' = toLabel $ stringToConTeXt opts ref
return $ text "\\in"
<> braces (if writerNumberSections opts
- then label <+> text "(\\S"
- else label) -- prefix
+ then contents <+> text "(\\S"
+ else contents) -- prefix
<> braces (if writerNumberSections opts
then text ")"
else empty) -- suffix
- <> brackets (text ref)
+ <> brackets (text ref')
inlineToConTeXt (Link txt (src, _)) = do
let isAutolink = txt == [Str (unEscapeString src)]
@@ -302,13 +311,13 @@ inlineToConTeXt (Link txt (src, _)) = do
let next = stNextRef st
put $ st {stNextRef = next + 1}
let ref = "url" ++ show next
- label <- inlineListToConTeXt txt
+ contents <- inlineListToConTeXt txt
return $ "\\useURL"
<> brackets (text ref)
<> brackets (text $ escapeStringUsing [('#',"\\#"),('%',"\\%")] src)
<> (if isAutolink
then empty
- else brackets empty <> brackets label)
+ else brackets empty <> brackets contents)
<> "\\from"
<> brackets (text ref)
inlineToConTeXt (Image _ (src, _)) = do
@@ -337,6 +346,7 @@ sectionHeader (ident,classes,_) hdrLevel lst = do
st <- get
let opts = stOptions st
let level' = if writerChapters opts then hdrLevel - 1 else hdrLevel
+ let ident' = toLabel ident
let (section, chapter) = if "unnumbered" `elem` classes
then (text "subject", text "title")
else (text "section", text "chapter")
@@ -344,7 +354,7 @@ sectionHeader (ident,classes,_) hdrLevel lst = do
then char '\\'
<> text (concat (replicate (level' - 1) "sub"))
<> section
- <> (if (not . null) ident then brackets (text ident) else empty)
+ <> (if (not . null) ident' then brackets (text ident') else empty)
<> braces contents
<> blankline
else if level' == 0
diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs
index 914d61850..6fc3b9b3c 100644
--- a/src/Text/Pandoc/Writers/Custom.hs
+++ b/src/Text/Pandoc/Writers/Custom.hs
@@ -1,5 +1,5 @@
{-# LANGUAGE OverlappingInstances, FlexibleInstances, OverloadedStrings,
- ScopedTypeVariables #-}
+ ScopedTypeVariables, DeriveDataTypeable #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{- Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu>
@@ -35,6 +35,7 @@ import Text.Pandoc.Definition
import Text.Pandoc.Options
import Data.List ( intersperse )
import Data.Char ( toLower )
+import Data.Typeable
import Scripting.Lua (LuaState, StackValue, callfunc)
import Text.Pandoc.Writers.Shared
import qualified Scripting.Lua as Lua
@@ -42,6 +43,8 @@ import Text.Pandoc.UTF8 (fromString, toString)
import Data.ByteString (ByteString)
import qualified Data.ByteString.Char8 as C8
import Data.Monoid
+import Control.Monad (when)
+import Control.Exception
import qualified Data.Map as M
import Text.Pandoc.Templates
@@ -145,13 +148,22 @@ instance StackValue Citation where
peek = undefined
valuetype _ = Lua.TTABLE
+data PandocLuaException = PandocLuaException String
+ deriving (Show, Typeable)
+
+instance Exception PandocLuaException
+
-- | Convert Pandoc to custom markup.
writeCustom :: FilePath -> WriterOptions -> Pandoc -> IO String
writeCustom luaFile opts doc@(Pandoc meta _) = do
luaScript <- C8.unpack `fmap` C8.readFile luaFile
lua <- Lua.newstate
Lua.openlibs lua
- Lua.loadstring lua luaScript "custom"
+ status <- Lua.loadstring lua luaScript luaFile
+ -- check for error in lua script (later we'll change the return type
+ -- to handle this more gracefully):
+ when (status /= 0) $
+ Lua.tostring lua 1 >>= throw . PandocLuaException
Lua.call lua 0 0
-- TODO - call hierarchicalize, so we have that info
rendered <- docToCustom lua opts doc
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index b10317506..19f8f2f11 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -114,7 +114,8 @@ elementToDocbook opts lvl (Sec _ _num (id',_,_) title elements) =
n | n == 0 -> "chapter"
| n >= 1 && n <= 5 -> "sect" ++ show n
| otherwise -> "simplesect"
- in inTags True tag [("id", writerIdentifierPrefix opts ++ id')] $
+ in inTags True tag [("id", writerIdentifierPrefix opts ++ id') |
+ not (null id')] $
inTagsSimple "title" (inlinesToDocbook opts title) $$
vcat (map (elementToDocbook opts (lvl + 1)) elements')
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 09321d1cc..4809d2a14 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE ScopedTypeVariables, PatternGuards #-}
+{-# LANGUAGE ScopedTypeVariables, PatternGuards, ViewPatterns #-}
{-
Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu>
@@ -41,7 +41,7 @@ import Data.Time.Clock.POSIX
import Data.Time.Clock
import Data.Time.Format
import System.Environment
-import System.Locale
+import Text.Pandoc.Compat.Locale (defaultTimeLocale)
import Text.Pandoc.Definition
import Text.Pandoc.Generic
import Text.Pandoc.ImageSize
@@ -52,8 +52,10 @@ import Text.Pandoc.Readers.TeXMath
import Text.Pandoc.Highlighting ( highlight )
import Text.Pandoc.Walk
import Text.Highlighting.Kate.Types ()
-import Text.XML.Light
+import Text.XML.Light as XML
import Text.TeXMath
+import Text.Pandoc.Readers.Docx.StyleMap
+import Text.Pandoc.Readers.Docx.Util (elemName)
import Control.Monad.State
import Text.Highlighting.Kate
import Data.Unique (hashUnique, newUnique)
@@ -62,8 +64,9 @@ import Text.Printf (printf)
import qualified Control.Exception as E
import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef,
extensionFromMimeType)
-import Control.Applicative ((<$>), (<|>))
-import Data.Maybe (fromMaybe, mapMaybe)
+import Control.Applicative ((<$>), (<|>), (<*>))
+import Data.Maybe (fromMaybe, mapMaybe, maybeToList)
+import Data.Char (ord)
data ListMarker = NoMarker
| BulletMarker
@@ -104,13 +107,17 @@ data WriterState = WriterState{
, stInDel :: Bool
, stChangesAuthor :: String
, stChangesDate :: String
+ , stPrintWidth :: Integer
+ , stStyleMaps :: StyleMaps
+ , stFirstPara :: Bool
+ , stTocTitle :: [Inline]
}
defaultWriterState :: WriterState
defaultWriterState = WriterState{
stTextProperties = []
, stParaProperties = []
- , stFootnotes = []
+ , stFootnotes = defaultFootnotes
, stSectionIds = []
, stExternalLinks = M.empty
, stImages = M.empty
@@ -122,6 +129,10 @@ defaultWriterState = WriterState{
, stInDel = False
, stChangesAuthor = "unknown"
, stChangesDate = "1969-12-31T19:00:00Z"
+ , stPrintWidth = 1
+ , stStyleMaps = defaultStyleMaps
+ , stFirstPara = False
+ , stTocTitle = normalizeInlines [Str "Table of Contents"]
}
type WS a = StateT WriterState IO a
@@ -143,24 +154,106 @@ renderXml :: Element -> BL.ByteString
renderXml elt = BL8.pack "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" <>
UTF8.fromStringLazy (showElement elt)
+renumIdMap :: Int -> [Element] -> M.Map String String
+renumIdMap _ [] = M.empty
+renumIdMap n (e:es)
+ | Just oldId <- findAttr (QName "Id" Nothing Nothing) e =
+ M.insert oldId ("rId" ++ (show n)) (renumIdMap (n+1) es)
+ | otherwise = renumIdMap n es
+
+replaceAttr :: (QName -> Bool) -> String -> [XML.Attr] -> [XML.Attr]
+replaceAttr _ _ [] = []
+replaceAttr f val (a:as) | f (attrKey a) =
+ (XML.Attr (attrKey a) val) : (replaceAttr f val as)
+ | otherwise = a : (replaceAttr f val as)
+
+renumId :: (QName -> Bool) -> (M.Map String String) -> Element -> Element
+renumId f renumMap e
+ | Just oldId <- findAttrBy f e
+ , Just newId <- M.lookup oldId renumMap =
+ let attrs' = replaceAttr f newId (elAttribs e)
+ in
+ e { elAttribs = attrs' }
+ | otherwise = e
+
+renumIds :: (QName -> Bool) -> (M.Map String String) -> [Element] -> [Element]
+renumIds f renumMap = map (renumId f renumMap)
+
+-- | Certain characters are invalid in XML even if escaped.
+-- See #1992
+stripInvalidChars :: Pandoc -> Pandoc
+stripInvalidChars = bottomUp (filter isValidChar)
+
+-- | See XML reference
+isValidChar :: Char -> Bool
+isValidChar (ord -> c)
+ | c == 0x9 = True
+ | c == 0xA = True
+ | c == 0xD = True
+ | 0x20 <= c && c <= 0xD7FF = True
+ | 0xE000 <= c && c <= 0xFFFD = True
+ | 0x10000 <= c && c <= 0x10FFFF = True
+ | otherwise = False
+
+metaValueToInlines :: MetaValue -> [Inline]
+metaValueToInlines (MetaString s) = normalizeInlines [Str s]
+metaValueToInlines (MetaInlines ils) = ils
+metaValueToInlines (MetaBlocks bs) = query return bs
+metaValueToInlines (MetaBool b) = [Str $ show b]
+metaValueToInlines _ = []
+
-- | Produce an Docx file from a Pandoc document.
writeDocx :: WriterOptions -- ^ Writer options
-> Pandoc -- ^ Document to convert
-> IO BL.ByteString
writeDocx opts doc@(Pandoc meta _) = do
let datadir = writerUserDataDir opts
- let doc' = walk fixDisplayMath doc
+ let doc' = stripInvalidChars . walk fixDisplayMath $ doc
username <- lookup "USERNAME" <$> getEnvironment
utctime <- getCurrentTime
refArchive <- liftM (toArchive . toLazy) $
case writerReferenceDocx opts of
Just f -> B.readFile f
Nothing -> readDataFile datadir "reference.docx"
- distArchive <- liftM (toArchive . toLazy) $ readDataFile Nothing "reference.docx"
+ distArchive <- liftM (toArchive . toLazy) $ readDataFile datadir "reference.docx"
+
+ parsedDoc <- parseXml refArchive distArchive "word/document.xml"
+ let wname f qn = qPrefix qn == Just "w" && f (qName qn)
+ let mbsectpr = filterElementName (wname (=="sectPr")) parsedDoc
+
+ -- Gets the template size
+ let mbpgsz = mbsectpr >>= (filterElementName (wname (=="pgSz")))
+ let mbAttrSzWidth = (elAttribs <$> mbpgsz) >>= (lookupAttrBy ((=="w") . qName))
+
+ let mbpgmar = mbsectpr >>= (filterElementName (wname (=="pgMar")))
+ let mbAttrMarLeft = (elAttribs <$> mbpgmar) >>= (lookupAttrBy ((=="left") . qName))
+ let mbAttrMarRight = (elAttribs <$> mbpgmar) >>= (lookupAttrBy ((=="right") . qName))
+
+ -- Get the avaible area (converting the size and the margins to int and
+ -- doing the difference
+ let pgContentWidth = (-) <$> (read <$> mbAttrSzWidth ::Maybe Integer)
+ <*> (
+ (+) <$> (read <$> mbAttrMarRight ::Maybe Integer)
+ <*> (read <$> mbAttrMarLeft ::Maybe Integer)
+ )
+
+ -- styles
+ let stylepath = "word/styles.xml"
+ styledoc <- parseXml refArchive distArchive stylepath
+
+ -- parse styledoc for heading styles
+ let styleMaps = getStyleMaps styledoc
+
+ let tocTitle = fromMaybe (stTocTitle defaultWriterState) $
+ metaValueToInlines <$> lookupMeta "toc-title" meta
((contents, footnotes), st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc')
defaultWriterState{ stChangesAuthor = fromMaybe "unknown" username
- , stChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime}
+ , stChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime
+ , stPrintWidth = (maybe 420 (\x -> quot x 20) pgContentWidth)
+ , stStyleMaps = styleMaps
+ , stTocTitle = tocTitle
+ }
let epochtime = floor $ utcTimeToPOSIXSeconds utctime
let imgs = M.elems $ stImages st
@@ -168,13 +261,6 @@ writeDocx opts doc@(Pandoc meta _) = do
let toImageEntry (_,path,_,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img
let imageEntries = map toImageEntry imgs
- -- adjust contents to add sectPr from reference.docx
- parsedDoc <- parseXml refArchive distArchive "word/document.xml"
- let wname f qn = qPrefix qn == Just "w" && f (qName qn)
- let mbsectpr = filterElementName (wname (=="sectPr")) parsedDoc
-
- let sectpr = fromMaybe (mknode "w:sectPr" [] ()) mbsectpr
-
let stdAttributes =
[("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")
,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math")
@@ -186,9 +272,6 @@ writeDocx opts doc@(Pandoc meta _) = do
,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture")
,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")]
- let contents' = contents ++ [sectpr]
- let docContents = mknode "w:document" stdAttributes
- $ mknode "w:body" [] contents'
parsedRels <- parseXml refArchive distArchive "word/_rels/document.xml.rels"
let isHeaderNode e = findAttr (QName "Type" Nothing Nothing) e == Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/header"
@@ -255,7 +338,7 @@ writeDocx opts doc@(Pandoc meta _) = do
[("Type",url')
,("Id",id')
,("Target",target')] ()
- let baserels = map toBaseRel
+ let baserels' = map toBaseRel
[("http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering",
"rId1",
"numbering.xml")
@@ -277,8 +360,12 @@ writeDocx opts doc@(Pandoc meta _) = do
,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes",
"rId7",
"footnotes.xml")
- ] ++
- headers ++ footers
+ ]
+
+ let idMap = renumIdMap (length baserels' + 1) (headers ++ footers)
+ let renumHeaders = renumIds (\q -> qName q == "Id") idMap headers
+ let renumFooters = renumIds (\q -> qName q == "Id") idMap footers
+ let baserels = baserels' ++ renumHeaders ++ renumFooters
let toImgRel (ident,path,_,_,_) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] ()
let imgrels = map toImgRel imgs
let toLinkRel (src,ident) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] ()
@@ -288,6 +375,23 @@ writeDocx opts doc@(Pandoc meta _) = do
$ renderXml reldoc
+ -- adjust contents to add sectPr from reference.docx
+ let sectpr = case mbsectpr of
+ Just sectpr' -> let cs = renumIds
+ (\q -> qName q == "id" && qPrefix q == Just "r")
+ idMap
+ (elChildren sectpr')
+ in
+ add_attrs (elAttribs sectpr') $ mknode "w:sectPr" [] cs
+ Nothing -> (mknode "w:sectPr" [] ())
+
+ -- let sectpr = fromMaybe (mknode "w:sectPr" [] ()) mbsectpr'
+ let contents' = contents ++ [sectpr]
+ let docContents = mknode "w:document" stdAttributes
+ $ mknode "w:body" [] contents'
+
+
+
-- word/document.xml
let contentEntry = toEntry "word/document.xml" epochtime
$ renderXml docContents
@@ -302,11 +406,18 @@ writeDocx opts doc@(Pandoc meta _) = do
linkrels
-- styles
- let newstyles = styleToOpenXml $ writerHighlightStyle opts
- let stylepath = "word/styles.xml"
- styledoc <- parseXml refArchive distArchive stylepath
- let styledoc' = styledoc{ elContent = elContent styledoc ++
- [Elem x | x <- newstyles, writerHighlight opts] }
+ let newstyles = styleToOpenXml styleMaps $ writerHighlightStyle opts
+ let styledoc' = styledoc{ elContent = modifyContent (elContent styledoc) }
+ where
+ modifyContent
+ | writerHighlight opts = (++ map Elem newstyles)
+ | otherwise = filter notTokStyle
+ notTokStyle (Elem el) = notStyle el || notTokId el
+ notTokStyle _ = True
+ notStyle = (/= elemName' "style") . elName
+ notTokId = maybe True (`notElem` tokStys) . findAttr (elemName' "styleId")
+ tokStys = "SourceCode" : map show (enumFromTo KeywordTok NormalTok)
+ elemName' = elemName (sNameSpaces styleMaps) "w"
let styleEntry = toEntry stylepath epochtime $ renderXml styledoc'
-- construct word/numbering.xml
@@ -351,6 +462,17 @@ writeDocx opts doc@(Pandoc meta _) = do
]
let relsEntry = toEntry relsPath epochtime $ renderXml rels
+ -- we use dist archive for settings.xml, because Word sometimes
+ -- adds references to footnotes or endnotes we don't have...
+ -- we do, however, copy some settings over from reference
+ let settingsPath = "word/settings.xml"
+ settingsList = [ "w:autoHyphenation"
+ , "w:consecutiveHyphenLimit"
+ , "w:hyphenationZone"
+ , "w:doNotHyphenateCap"
+ ]
+ settingsEntry <- copyChildren refArchive distArchive settingsPath epochtime settingsList
+
let entryFromArchive arch path =
maybe (fail $ path ++ " corrupt or missing in reference docx")
return
@@ -358,9 +480,6 @@ writeDocx opts doc@(Pandoc meta _) = do
docPropsAppEntry <- entryFromArchive refArchive "docProps/app.xml"
themeEntry <- entryFromArchive refArchive "word/theme/theme1.xml"
fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml"
- -- we use dist archive for settings.xml, because Word sometimes
- -- adds references to footnotes or endnotes we don't have...
- settingsEntry <- entryFromArchive distArchive "word/settings.xml"
webSettingsEntry <- entryFromArchive refArchive "word/webSettings.xml"
headerFooterEntries <- mapM (entryFromArchive refArchive) $
mapMaybe (fmap ("word/" ++) . extractTarget)
@@ -383,10 +502,13 @@ writeDocx opts doc@(Pandoc meta _) = do
miscRelEntries ++ otherMediaEntries
return $ fromArchive archive
-styleToOpenXml :: Style -> [Element]
-styleToOpenXml style = parStyle : map toStyle alltoktypes
+styleToOpenXml :: StyleMaps -> Style -> [Element]
+styleToOpenXml sm style =
+ maybeToList parStyle ++ mapMaybe toStyle alltoktypes
where alltoktypes = enumFromTo KeywordTok NormalTok
- toStyle toktype = mknode "w:style" [("w:type","character"),
+ toStyle toktype | hasStyleName (show toktype) (sCharStyleMap sm) = Nothing
+ | otherwise = Just $
+ mknode "w:style" [("w:type","character"),
("w:customStyle","1"),("w:styleId",show toktype)]
[ mknode "w:name" [("w:val",show toktype)] ()
, mknode "w:basedOn" [("w:val","VerbatimChar")] ()
@@ -407,17 +529,35 @@ styleToOpenXml style = parStyle : map toStyle alltoktypes
tokBg toktype = maybe "auto" (drop 1 . fromColor)
$ (tokenBackground =<< lookup toktype tokStyles)
`mplus` backgroundColor style
- parStyle = mknode "w:style" [("w:type","paragraph"),
+ parStyle | hasStyleName "Source Code" (sParaStyleMap sm) = Nothing
+ | otherwise = Just $
+ mknode "w:style" [("w:type","paragraph"),
("w:customStyle","1"),("w:styleId","SourceCode")]
[ mknode "w:name" [("w:val","Source Code")] ()
, mknode "w:basedOn" [("w:val","Normal")] ()
, mknode "w:link" [("w:val","VerbatimChar")] ()
, mknode "w:pPr" []
$ mknode "w:wordWrap" [("w:val","off")] ()
+ : mknode "w:noProof" [] ()
: ( maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill",drop 1 $ fromColor col)] ()])
$ backgroundColor style )
]
+copyChildren :: Archive -> Archive -> String -> Integer -> [String] -> IO Entry
+copyChildren refArchive distArchive path timestamp elNames = do
+ ref <- parseXml refArchive distArchive path
+ dist <- parseXml distArchive distArchive path
+ return $ toEntry path timestamp $ renderXml dist{
+ elContent = elContent dist ++ copyContent ref
+ }
+ where
+ strName QName{qName=name, qPrefix=prefix}
+ | Just p <- prefix = p++":"++name
+ | otherwise = name
+ shouldCopy = (`elem` elNames) . strName
+ cleanElem el@Element{elName=name} = Elem el{elName=name{qURI=Nothing}}
+ copyContent = map cleanElem . filterChildrenName shouldCopy
+
-- this is the lowest number used for a list numId
baseListId :: Int
baseListId = 1000
@@ -495,6 +635,34 @@ mkLvl marker lvl =
getNumId :: WS Int
getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists
+makeTOC :: WriterOptions -> WS [Element]
+makeTOC opts | writerTableOfContents opts = do
+ let depth = "1-"++(show (writerTOCDepth opts))
+ let tocCmd = "TOC \\o \""++depth++"\" \\h \\z \\u"
+ tocTitle <- gets stTocTitle
+ title <- withParaPropM (pStyleM "TOC Heading") (blocksToOpenXML opts [Para tocTitle])
+ return $
+ [mknode "w:sdt" [] ([
+ mknode "w:sdtPr" [] (
+ mknode "w:docPartObj" [] (
+ [mknode "w:docPartGallery" [("w:val","Table of Contents")] (),
+ mknode "w:docPartUnique" [] ()]
+ ) -- w:docPartObj
+ ), -- w:sdtPr
+ mknode "w:sdtContent" [] (title++[
+ mknode "w:p" [] (
+ mknode "w:r" [] ([
+ mknode "w:fldChar" [("w:fldCharType","begin"),("w:dirty","true")] (),
+ mknode "w:instrText" [("xml:space","preserve")] tocCmd,
+ mknode "w:fldChar" [("w:fldCharType","separate")] (),
+ mknode "w:fldChar" [("w:fldCharType","end")] ()
+ ]) -- w:r
+ ) -- w:p
+ ])
+ ])] -- w:sdt
+makeTOC _ = return []
+
+
-- | Convert Pandoc document to two lists of
-- OpenXML elements (the main document and footnotes).
writeOpenXML :: WriterOptions -> Pandoc -> WS ([Element], [Element])
@@ -513,32 +681,45 @@ writeOpenXML opts (Pandoc meta blocks) = do
Just (MetaBlocks [Para xs]) -> xs
Just (MetaInlines xs) -> xs
_ -> []
- title <- withParaProp (pStyle "Title") $ blocksToOpenXML opts [Para tit | not (null tit)]
- subtitle <- withParaProp (pStyle "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')]
- authors <- withParaProp (pStyle "Author") $ blocksToOpenXML opts $
+ title <- withParaPropM (pStyleM "Title") $ blocksToOpenXML opts [Para tit | not (null tit)]
+ subtitle <- withParaPropM (pStyleM "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')]
+ authors <- withParaProp (pCustomStyle "Author") $ blocksToOpenXML opts $
map Para auths
- date <- withParaProp (pStyle "Date") $ blocksToOpenXML opts [Para dat | not (null dat)]
+ date <- withParaPropM (pStyleM "Date") $ blocksToOpenXML opts [Para dat | not (null dat)]
abstract <- if null abstract'
then return []
- else withParaProp (pStyle "Abstract") $ blocksToOpenXML opts abstract'
+ else withParaProp (pCustomStyle "Abstract") $ blocksToOpenXML opts abstract'
let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs
convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs
convertSpace xs = xs
let blocks' = bottomUp convertSpace blocks
- doc' <- blocksToOpenXML opts blocks'
+ doc' <- (setFirstPara >> blocksToOpenXML opts blocks')
notes' <- reverse `fmap` gets stFootnotes
- let meta' = title ++ subtitle ++ authors ++ date ++ abstract
+ toc <- makeTOC opts
+ let meta' = title ++ subtitle ++ authors ++ date ++ abstract ++ toc
return (meta' ++ doc', notes')
-- | Convert a list of Pandoc blocks to OpenXML.
blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element]
blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls
-pStyle :: String -> Element
-pStyle sty = mknode "w:pStyle" [("w:val",sty)] ()
+pCustomStyle :: String -> Element
+pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] ()
+
+pStyleM :: String -> WS XML.Element
+pStyleM styleName = do
+ styleMaps <- gets stStyleMaps
+ let sty' = getStyleId styleName $ sParaStyleMap styleMaps
+ return $ mknode "w:pStyle" [("w:val",sty')] ()
+
+rCustomStyle :: String -> Element
+rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] ()
-rStyle :: String -> Element
-rStyle sty = mknode "w:rStyle" [("w:val",sty)] ()
+rStyleM :: String -> WS XML.Element
+rStyleM styleName = do
+ styleMaps <- gets stStyleMaps
+ let sty' = getStyleId styleName $ sCharStyleMap styleMaps
+ return $ mknode "w:rStyle" [("w:val",sty')] ()
getUniqueId :: MonadIO m => m String
-- the + 20 is to ensure that there are no clashes with the rIds
@@ -552,12 +733,13 @@ blockToOpenXML opts (Div (_,["references"],_) bs) = do
let (hs, bs') = span isHeaderBlock bs
header <- blocksToOpenXML opts hs
-- We put the Bibliography style on paragraphs after the header
- rest <- withParaProp (pStyle "Bibliography") $ blocksToOpenXML opts bs'
+ rest <- withParaPropM (pStyleM "Bibliography") $ blocksToOpenXML opts bs'
return (header ++ rest)
blockToOpenXML opts (Div _ bs) = blocksToOpenXML opts bs
blockToOpenXML opts (Header lev (ident,_,_) lst) = do
- paraProps <- withParaProp (pStyle $ "Heading" ++ show lev) $
- getParaProps False
+ setFirstPara
+ paraProps <- withParaPropM (pStyleM ("Heading "++show lev)) $
+ getParaProps False
contents <- inlinesToOpenXML opts lst
usedIdents <- gets stSectionIds
let bookmarkName = if null ident
@@ -569,40 +751,60 @@ blockToOpenXML opts (Header lev (ident,_,_) lst) = do
,("w:name",bookmarkName)] ()
let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] ()
return [mknode "w:p" [] (paraProps ++ [bookmarkStart, bookmarkEnd] ++ contents)]
-blockToOpenXML opts (Plain lst) = withParaProp (pStyle "Compact")
+blockToOpenXML opts (Plain lst) = withParaProp (pCustomStyle "Compact")
$ blockToOpenXML opts (Para lst)
-- title beginning with fig: indicates that the image is a figure
blockToOpenXML opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = do
+ setFirstPara
+ pushParaProp $ pCustomStyle $
+ if null alt
+ then "Figure"
+ else "FigureWithCaption"
paraProps <- getParaProps False
+ popParaProp
contents <- inlinesToOpenXML opts [Image alt (src,tit)]
- captionNode <- withParaProp (pStyle "ImageCaption")
+ captionNode <- withParaProp (pCustomStyle "ImageCaption")
$ blockToOpenXML opts (Para alt)
return $ mknode "w:p" [] (paraProps ++ contents) : captionNode
-- fixDisplayMath sometimes produces a Para [] as artifact
blockToOpenXML _ (Para []) = return []
blockToOpenXML opts (Para lst) = do
- paraProps <- getParaProps $ case lst of
- [Math DisplayMath _] -> True
- _ -> False
- contents <- inlinesToOpenXML opts lst
- return [mknode "w:p" [] (paraProps ++ contents)]
+ isFirstPara <- gets stFirstPara
+ paraProps <- getParaProps $ case lst of
+ [Math DisplayMath _] -> True
+ _ -> False
+ bodyTextStyle <- pStyleM "Body Text"
+ let paraProps' = case paraProps of
+ [] | isFirstPara -> [mknode "w:pPr" [] [pCustomStyle "FirstParagraph"]]
+ [] -> [mknode "w:pPr" [] [bodyTextStyle]]
+ ps -> ps
+ modify $ \s -> s { stFirstPara = False }
+ contents <- inlinesToOpenXML opts lst
+ return [mknode "w:p" [] (paraProps' ++ contents)]
blockToOpenXML _ (RawBlock format str)
| format == Format "openxml" = return [ x | Elem x <- parseXML str ]
| otherwise = return []
-blockToOpenXML opts (BlockQuote blocks) =
- withParaProp (pStyle "BlockQuote") $ blocksToOpenXML opts blocks
-blockToOpenXML opts (CodeBlock attrs str) =
- withParaProp (pStyle "SourceCode") $ blockToOpenXML opts $ Para [Code attrs str]
-blockToOpenXML _ HorizontalRule = return [
- mknode "w:p" [] $ mknode "w:r" [] $ mknode "w:pict" []
+blockToOpenXML opts (BlockQuote blocks) = do
+ p <- withParaPropM (pStyleM "Block Text") $ blocksToOpenXML opts blocks
+ setFirstPara
+ return p
+blockToOpenXML opts (CodeBlock attrs str) = do
+ p <- withParaProp (pCustomStyle "SourceCode") (blockToOpenXML opts $ Para [Code attrs str])
+ setFirstPara
+ return p
+blockToOpenXML _ HorizontalRule = do
+ setFirstPara
+ return [
+ mknode "w:p" [] $ mknode "w:r" [] $ mknode "w:pict" []
$ mknode "v:rect" [("style","width:0;height:1.5pt"),
("o:hralign","center"),
("o:hrstd","t"),("o:hr","t")] () ]
blockToOpenXML opts (Table caption aligns widths headers rows) = do
+ setFirstPara
let captionStr = stringify caption
caption' <- if null caption
then return []
- else withParaProp (pStyle "TableCaption")
+ else withParaProp (pCustomStyle "TableCaption")
$ blockToOpenXML opts (Para caption)
let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] ()
let cellToOpenXML (al, cell) = withParaProp (alignmentFor al)
@@ -613,51 +815,62 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do
[ mknode "w:tcBorders" []
$ mknode "w:bottom" [("w:val","single")] ()
, mknode "w:vAlign" [("w:val","bottom")] () ]
- let emptyCell = [mknode "w:p" [] [mknode "w:pPr" []
- [mknode "w:pStyle" [("w:val","Compact")] ()]]]
+ let emptyCell = [mknode "w:p" [] [pCustomStyle "Compact"]]
let mkcell border contents = mknode "w:tc" []
$ [ borderProps | border ] ++
if null contents
then emptyCell
else contents
- let mkrow border cells = mknode "w:tr" [] $ map (mkcell border) cells
+ let mkrow border cells = mknode "w:tr" [] $
+ [mknode "w:trPr" [] [
+ mknode "w:cnfStyle" [("w:firstRow","1")] ()] | border]
+ ++ map (mkcell border) cells
let textwidth = 7920 -- 5.5 in in twips, 1/20 pt
let fullrow = 5000 -- 100% specified in pct
let rowwidth = fullrow * sum widths
let mkgridcol w = mknode "w:gridCol"
[("w:w", show (floor (textwidth * w) :: Integer))] ()
+ let hasHeader = not (all null headers)
return $
- mknode "w:tbl" []
+ caption' ++
+ [mknode "w:tbl" []
( mknode "w:tblPr" []
( mknode "w:tblStyle" [("w:val","TableNormal")] () :
mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () :
+ mknode "w:tblLook" [("w:firstRow","1") | hasHeader ] () :
[ mknode "w:tblCaption" [("w:val", captionStr)] ()
| not (null caption) ] )
: mknode "w:tblGrid" []
(if all (==0) widths
then []
else map mkgridcol widths)
- : [ mkrow True headers' | not (all null headers) ] ++
+ : [ mkrow True headers' | hasHeader ] ++
map (mkrow False) rows'
- ) : caption'
+ )]
blockToOpenXML opts (BulletList lst) = do
let marker = BulletMarker
addList marker
numid <- getNumId
- asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst
+ l <- asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst
+ setFirstPara
+ return l
blockToOpenXML opts (OrderedList (start, numstyle, numdelim) lst) = do
let marker = NumberMarker numstyle numdelim start
addList marker
numid <- getNumId
- asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst
-blockToOpenXML opts (DefinitionList items) =
- concat `fmap` mapM (definitionListItemToOpenXML opts) items
+ l <- asList $ concat `fmap` mapM (listItemToOpenXML opts numid) lst
+ setFirstPara
+ return l
+blockToOpenXML opts (DefinitionList items) = do
+ l <- concat `fmap` mapM (definitionListItemToOpenXML opts) items
+ setFirstPara
+ return l
definitionListItemToOpenXML :: WriterOptions -> ([Inline],[[Block]]) -> WS [Element]
definitionListItemToOpenXML opts (term,defs) = do
- term' <- withParaProp (pStyle "DefinitionTerm")
+ term' <- withParaProp (pCustomStyle "DefinitionTerm")
$ blockToOpenXML opts (Para term)
- defs' <- withParaProp (pStyle "Definition")
+ defs' <- withParaProp (pCustomStyle "Definition")
$ concat `fmap` mapM (blocksToOpenXML opts) defs
return $ term' ++ defs'
@@ -721,6 +934,9 @@ withTextProp d p = do
popTextProp
return res
+withTextPropM :: WS Element -> WS a -> WS a
+withTextPropM = (. flip withTextProp) . (>>=)
+
getParaProps :: Bool -> WS [Element]
getParaProps displayMathPara = do
props <- gets stParaProperties
@@ -749,6 +965,9 @@ withParaProp d p = do
popParaProp
return res
+withParaPropM :: WS Element -> WS a -> WS a
+withParaPropM = (. flip withParaProp) . (>>=)
+
formattedString :: String -> WS [Element]
formattedString str = do
props <- getTextProps
@@ -758,6 +977,9 @@ formattedString str = do
[ mknode (if inDel then "w:delText" else "w:t")
[("xml:space","preserve")] str ] ]
+setFirstPara :: WS ()
+setFirstPara = modify $ \s -> s { stFirstPara = True }
+
-- | Convert an inline element to OpenXML.
inlineToOpenXML :: WriterOptions -> Inline -> WS [Element]
inlineToOpenXML _ (Str str) = formattedString str
@@ -828,25 +1050,26 @@ inlineToOpenXML opts (Math mathType str) = do
Right r -> return [r]
Left _ -> inlinesToOpenXML opts (texMathToInlines mathType str)
inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst
-inlineToOpenXML opts (Code attrs str) =
- withTextProp (rStyle "VerbatimChar")
- $ if writerHighlight opts
- then case highlight formatOpenXML attrs str of
- Nothing -> unhighlighted
- Just h -> return h
- else unhighlighted
- where unhighlighted = intercalate [br] `fmap`
- (mapM formattedString $ lines str)
- formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok)
- toHlTok (toktype,tok) = mknode "w:r" []
- [ mknode "w:rPr" []
- [ rStyle $ show toktype ]
- , mknode "w:t" [("xml:space","preserve")] tok ]
+inlineToOpenXML opts (Code attrs str) = do
+ let unhighlighted = intercalate [br] `fmap`
+ (mapM formattedString $ lines str)
+ formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok)
+ toHlTok (toktype,tok) = mknode "w:r" []
+ [ mknode "w:rPr" []
+ [ rCustomStyle (show toktype) ]
+ , mknode "w:t" [("xml:space","preserve")] tok ]
+ withTextProp (rCustomStyle "VerbatimChar")
+ $ if writerHighlight opts
+ then case highlight formatOpenXML attrs str of
+ Nothing -> unhighlighted
+ Just h -> return h
+ else unhighlighted
inlineToOpenXML opts (Note bs) = do
notes <- gets stFootnotes
notenum <- getUniqueId
+ footnoteStyle <- rStyleM "Footnote Reference"
let notemarker = mknode "w:r" []
- [ mknode "w:rPr" [] (rStyle "FootnoteRef")
+ [ mknode "w:rPr" [] footnoteStyle
, mknode "w:footnoteRef" [] () ]
let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker
let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : ils) : xs
@@ -856,22 +1079,22 @@ inlineToOpenXML opts (Note bs) = do
oldParaProperties <- gets stParaProperties
oldTextProperties <- gets stTextProperties
modify $ \st -> st{ stListLevel = -1, stParaProperties = [], stTextProperties = [] }
- contents <- withParaProp (pStyle "FootnoteText") $ blocksToOpenXML opts
+ contents <- withParaPropM (pStyleM "Footnote Text") $ blocksToOpenXML opts
$ insertNoteRef bs
modify $ \st -> st{ stListLevel = oldListLevel, stParaProperties = oldParaProperties,
stTextProperties = oldTextProperties }
let newnote = mknode "w:footnote" [("w:id", notenum)] $ contents
modify $ \s -> s{ stFootnotes = newnote : notes }
return [ mknode "w:r" []
- [ mknode "w:rPr" [] (rStyle "FootnoteRef")
+ [ mknode "w:rPr" [] footnoteStyle
, mknode "w:footnoteReference" [("w:id", notenum)] () ] ]
-- internal link:
inlineToOpenXML opts (Link txt ('#':xs,_)) = do
- contents <- withTextProp (rStyle "Link") $ inlinesToOpenXML opts txt
+ contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt
return [ mknode "w:hyperlink" [("w:anchor",xs)] contents ]
-- external link:
inlineToOpenXML opts (Link txt (src,_)) = do
- contents <- withTextProp (rStyle "Link") $ inlinesToOpenXML opts txt
+ contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt
extlinks <- gets stExternalLinks
id' <- case M.lookup src extlinks of
Just i -> return i
@@ -883,6 +1106,7 @@ inlineToOpenXML opts (Link txt (src,_)) = do
return [ mknode "w:hyperlink" [("r:id",id')] contents ]
inlineToOpenXML opts (Image alt (src, tit)) = do
-- first, check to see if we've already done this image
+ pageWidth <- gets stPrintWidth
imgs <- gets stImages
case M.lookup src imgs of
Just (_,_,_,elt,_) -> return [elt]
@@ -899,7 +1123,7 @@ inlineToOpenXML opts (Image alt (src, tit)) = do
let size = imageSize img
let (xpt,ypt) = maybe (120,120) sizeInPoints size
-- 12700 emu = 1 pt
- let (xemu,yemu) = fitToPage (xpt * 12700, ypt * 12700)
+ let (xemu,yemu) = fitToPage (xpt * 12700, ypt * 12700) (pageWidth * 12700)
let cNvPicPr = mknode "pic:cNvPicPr" [] $
mknode "a:picLocks" [("noChangeArrowheads","1"),("noChangeAspect","1")] ()
let nvPicPr = mknode "pic:nvPicPr" []
@@ -957,6 +1181,22 @@ inlineToOpenXML opts (Image alt (src, tit)) = do
br :: Element
br = mknode "w:r" [] [mknode "w:br" [("w:type","textWrapping")] () ]
+-- Word will insert these footnotes into the settings.xml file
+-- (whether or not they're visible in the document). If they're in the
+-- file, but not in the footnotes.xml file, it will produce
+-- problems. So we want to make sure we insert them into our document.
+defaultFootnotes :: [Element]
+defaultFootnotes = [ mknode "w:footnote"
+ [("w:type", "separator"), ("w:id", "-1")] $
+ [ mknode "w:p" [] $
+ [mknode "w:r" [] $
+ [ mknode "w:separator" [] ()]]]
+ , mknode "w:footnote"
+ [("w:type", "continuationSeparator"), ("w:id", "0")] $
+ [ mknode "w:p" [] $
+ [ mknode "w:r" [] $
+ [ mknode "w:continuationSeparator" [] ()]]]]
+
parseXml :: Archive -> Archive -> String -> IO Element
parseXml refArchive distArchive relpath =
case ((findEntryByPath relpath refArchive `mplus`
@@ -966,9 +1206,11 @@ parseXml refArchive distArchive relpath =
Nothing -> fail $ relpath ++ " corrupt or missing in reference docx"
-- | Scales the image to fit the page
-fitToPage :: (Integer, Integer) -> (Integer, Integer)
-fitToPage (x, y)
- --5440680 is the emu width size of a letter page in portrait, minus the margins
- | x > 5440680 =
- (5440680, round $ (5440680 / ((fromIntegral :: Integer -> Double) x)) * (fromIntegral y))
+-- sizes are passed in emu
+fitToPage :: (Integer, Integer) -> Integer -> (Integer, Integer)
+fitToPage (x, y) pageWidth
+ -- Fixes width to the page width and scales the height
+ | x > pageWidth =
+ (pageWidth, round $
+ ((fromIntegral pageWidth) / ((fromIntegral :: Integer -> Double) x)) * (fromIntegral y))
| otherwise = (x, y)
diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs
index 8c1d360aa..17ff8a279 100644
--- a/src/Text/Pandoc/Writers/DokuWiki.hs
+++ b/src/Text/Pandoc/Writers/DokuWiki.hs
@@ -134,7 +134,9 @@ blockToDokuWiki opts (Para [Image txt (src,'f':'i':'g':':':tit)]) = do
let opt = if null txt
then ""
else "|" ++ if null tit then capt else tit ++ capt
- return $ "{{:" ++ src ++ opt ++ "}}\n"
+ -- Relative links fail isURI and receive a colon
+ prefix = if isURI src then "" else ":"
+ return $ "{{" ++ prefix ++ src ++ opt ++ "}}\n"
blockToDokuWiki opts (Para inlines) = do
indent <- stIndent <$> ask
@@ -178,7 +180,7 @@ blockToDokuWiki _ (CodeBlock (_,classes,_) str) = do
blockToDokuWiki opts (BlockQuote blocks) = do
contents <- blockListToDokuWiki opts blocks
if isSimpleBlockQuote blocks
- then return $ "> " ++ contents
+ then return $ unlines $ map ("> " ++) $ lines contents
else return $ "<HTML><blockquote>\n" ++ contents ++ "</blockquote></HTML>"
blockToDokuWiki opts (Table capt aligns _ headers rows) = do
@@ -352,9 +354,7 @@ isPlainOrPara (Para _) = True
isPlainOrPara _ = False
isSimpleBlockQuote :: [Block] -> Bool
-isSimpleBlockQuote [BlockQuote bs] = isSimpleBlockQuote bs
-isSimpleBlockQuote [b] = isPlainOrPara b
-isSimpleBlockQuote _ = False
+isSimpleBlockQuote bs = all isPlainOrPara bs
-- | Concatenates strings with line breaks between them.
vcat :: [String] -> String
@@ -459,7 +459,7 @@ inlineToDokuWiki _ (RawInline f str)
| f == Format "html" = return $ "<html>" ++ str ++ "</html>"
| otherwise = return ""
-inlineToDokuWiki _ (LineBreak) = return "\\\\ "
+inlineToDokuWiki _ (LineBreak) = return "\\\\\n"
inlineToDokuWiki _ Space = return " "
@@ -480,7 +480,9 @@ inlineToDokuWiki opts (Image alt (source, tit)) = do
("", []) -> ""
("", _ ) -> "|" ++ alt'
(_ , _ ) -> "|" ++ tit
- return $ "{{:" ++ source ++ txt ++ "}}"
+ -- Relative links fail isURI and receive a colon
+ prefix = if isURI source then "" else ":"
+ return $ "{{" ++ prefix ++ source ++ txt ++ "}}"
inlineToDokuWiki opts (Note contents) = do
contents' <- blockListToDokuWiki opts contents
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index e4f2d1335..29ea44e02 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables, ViewPatterns #-}
+{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables, ViewPatterns, FlexibleContexts #-}
{-
Copyright (C) 2010-2014 John MacFarlane <jgm@berkeley.edu>
@@ -35,16 +35,17 @@ import Data.Maybe ( fromMaybe )
import Data.List ( isPrefixOf, isInfixOf, intercalate )
import System.Environment ( getEnv )
import Text.Printf (printf)
-import System.FilePath ( (</>), takeExtension, takeFileName )
+import System.FilePath ( takeExtension, takeFileName )
+import System.FilePath.Glob ( namesMatching )
import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Lazy.Char8 as B8
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.SelfContained ( makeSelfContained )
import Codec.Archive.Zip ( emptyArchive, addEntryToArchive, eRelativePath, fromEntry , Entry, toEntry, fromArchive)
-import Control.Applicative ((<$>))
+import Control.Applicative ((<$>), (<$))
import Data.Time.Clock.POSIX ( getPOSIXTime )
import Data.Time (getCurrentTime,UTCTime, formatTime)
-import System.Locale ( defaultTimeLocale )
+import Text.Pandoc.Compat.Locale ( defaultTimeLocale )
import Text.Pandoc.Shared ( trimr, renderTags', safeRead, uniqueIdent, trim
, normalizeDate, readDataFile, stringify, warn
, hierarchicalize, fetchItem' )
@@ -57,14 +58,13 @@ import Text.Pandoc.Options ( WriterOptions(..)
import Text.Pandoc.Definition
import Text.Pandoc.Walk (walk, walkM)
import Control.Monad.State (modify, get, execState, State, put, evalState)
-import Control.Monad (foldM, when, mplus, liftM)
+import Control.Monad (foldM, mplus, liftM, when)
import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs
, strContent, lookupAttr, Node(..), QName(..), parseXML
, onlyElems, node, ppElement)
import Text.Pandoc.UUID (getRandomUUID)
import Text.Pandoc.Writers.HTML (writeHtmlString, writeHtml)
import Data.Char ( toLower, isDigit, isAlphaNum )
-import Network.URI ( unEscapeString )
import Text.Pandoc.MIME (MimeType, getMimeType)
import qualified Control.Exception as E
import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
@@ -344,7 +344,6 @@ writeEPUB opts doc@(Pandoc meta _) = do
, writerStandalone = True
, writerSectionDivs = True
, writerHtml5 = epub3
- , writerTableOfContents = False -- we always have one in epub
, writerVariables = vars
, writerHTMLMathMethod =
if epub3
@@ -359,8 +358,9 @@ writeEPUB opts doc@(Pandoc meta _) = do
Nothing -> return ([],[])
Just img -> do
let coverImage = "media/" ++ takeFileName img
- let cpContent = renderHtml $ writeHtml opts'
- (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"])
+ let cpContent = renderHtml $ writeHtml
+ opts'{ writerVariables = ("coverpage","true"):vars }
+ (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"])
imgContent <- B.readFile img
return ( [mkEntry "cover.xhtml" cpContent]
, [mkEntry coverImage imgContent] )
@@ -388,8 +388,14 @@ writeEPUB opts doc@(Pandoc meta _) = do
picEntries <- foldM readPicEntry [] pics
-- handle fonts
+ let matchingGlob f = do
+ xs <- namesMatching f
+ when (null xs) $
+ warn $ f ++ " did not match any font files."
+ return xs
let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f
- fontEntries <- mapM mkFontEntry $ writerEpubFonts opts'
+ fontFiles <- concat <$> mapM matchingGlob (writerEpubFonts opts')
+ fontEntries <- mapM mkFontEntry fontFiles
-- set page progression direction attribution
let progressionDirection = case epubPageDirection metadata of
@@ -489,6 +495,9 @@ writeEPUB opts doc@(Pandoc meta _) = do
[] -> "UNTITLED"
(x:_) -> titleText x
x -> stringify x
+
+ let tocTitle = fromMaybe plainTitle $
+ metaValueToString <$> lookupMeta "toc-title" meta
let uuid = case epubIdentifier metadata of
(x:_) -> identifierText x -- use first identifier as UUID
[] -> error "epubIdentifier is null" -- shouldn't happen
@@ -528,14 +537,12 @@ writeEPUB opts doc@(Pandoc meta _) = do
case lookupMeta "title" meta of
Just _ -> "yes"
Nothing -> "no")] $ ()) :
- (unode "itemref" ! [("idref", "nav")
- ,("linear", if writerTableOfContents opts
- then "yes"
- else "no")] $ ()) :
+ [unode "itemref" ! [("idref", "nav")] $ ()
+ | writerTableOfContents opts ] ++
map chapterRefNode chapterEntries)
, unode "guide" $
[ unode "reference" !
- [("type","toc"),("title",plainTitle),
+ [("type","toc"),("title", tocTitle),
("href","nav.xhtml")] $ ()
] ++
[ unode "reference" !
@@ -572,8 +579,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
let navMapFormatter :: Int -> String -> String -> [Element] -> Element
navMapFormatter n tit src subs = unode "navPoint" !
- [("id", "navPoint-" ++ show n)
- ,("playOrder", show n)] $
+ [("id", "navPoint-" ++ show n)] $
[ unode "navLabel" $ unode "text" tit
, unode "content" ! [("src", src)] $ ()
] ++ subs
@@ -598,7 +604,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
Nothing -> []
Just img -> [unode "meta" ! [("name","cover"),
("content", toId img)] $ ()]
- , unode "docTitle'" $ unode "text" $ plainTitle
+ , unode "docTitle" $ unode "text" $ plainTitle
, unode "navMap" $
tpNode : evalState (mapM (navPointNode navMapFormatter) secs) 1
]
@@ -614,17 +620,35 @@ writeEPUB opts doc@(Pandoc meta _) = do
(_:_) -> [unode "ol" ! [("class","toc")] $ subs]
let navtag = if epub3 then "nav" else "div"
- let navData = UTF8.fromStringLazy $ ppTopElement $
- unode "html" ! [("xmlns","http://www.w3.org/1999/xhtml")
- ,("xmlns:epub","http://www.idpf.org/2007/ops")] $
- [ unode "head" $
- [ unode "title" plainTitle
- , unode "link" ! [("rel","stylesheet"),("type","text/css"),("href","stylesheet.css")] $ () ]
- , unode "body" $
- unode navtag ! [("epub:type","toc") | epub3] $
- [ unode "h1" ! [("id","toc-title")] $ plainTitle
- , unode "ol" ! [("class","toc")] $ evalState (mapM (navPointNode navXhtmlFormatter) secs) 1]
- ]
+ let navBlocks = [RawBlock (Format "html") $ ppElement $
+ unode navtag ! ([("epub:type","toc") | epub3] ++
+ [("id","toc")]) $
+ [ unode "h1" ! [("id","toc-title")] $ tocTitle
+ , unode "ol" ! [("class","toc")] $ evalState (mapM (navPointNode navXhtmlFormatter) secs) 1]]
+ let landmarks = if epub3
+ then [RawBlock (Format "html") $ ppElement $
+ unode "nav" ! [("epub:type","landmarks")
+ ,("hidden","hidden")] $
+ [ unode "ol" $
+ [ unode "li"
+ [ unode "a" ! [("href", "cover.xhtml")
+ ,("epub:type", "cover")] $
+ "Cover"] |
+ epubCoverImage metadata /= Nothing
+ ] ++
+ [ unode "li"
+ [ unode "a" ! [("href", "#toc")
+ ,("epub:type", "toc")] $
+ "Table of contents"
+ ] | writerTableOfContents opts
+ ]
+ ]
+ ]
+ else []
+ let navData = renderHtml $ writeHtml opts'
+ (Pandoc (setMeta "title"
+ (walk removeNote $ fromList $ docTitle' meta) nullMeta)
+ (navBlocks ++ landmarks))
let navEntry = mkEntry "nav.xhtml" navData
-- mimetype
@@ -767,23 +791,20 @@ metadataElement version md currentTime =
showDateTimeISO8601 :: UTCTime -> String
showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ"
-transformTag :: WriterOptions
- -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media
+transformTag :: IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media
-> Tag String
-> IO (Tag String)
-transformTag opts mediaRef tag@(TagOpen name attr)
+transformTag mediaRef tag@(TagOpen name attr)
| name `elem` ["video", "source", "img", "audio"] = do
let src = fromAttrib "src" tag
let poster = fromAttrib "poster" tag
- let oldsrc = maybe src (</> src) $ writerSourceURL opts
- let oldposter = maybe poster (</> poster) $ writerSourceURL opts
- newsrc <- modifyMediaRef mediaRef oldsrc
- newposter <- modifyMediaRef mediaRef oldposter
+ newsrc <- modifyMediaRef mediaRef src
+ newposter <- modifyMediaRef mediaRef poster
let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++
[("src", newsrc) | not (null newsrc)] ++
[("poster", newposter) | not (null newposter)]
return $ TagOpen name attr'
-transformTag _ _ tag = return tag
+transformTag _ tag = return tag
modifyMediaRef :: IORef [(FilePath, FilePath)] -> FilePath -> IO FilePath
modifyMediaRef _ "" = return ""
@@ -793,7 +814,7 @@ modifyMediaRef mediaRef oldsrc = do
Just n -> return n
Nothing -> do
let new = "media/file" ++ show (length media) ++
- takeExtension oldsrc
+ takeExtension (takeWhile (/='?') oldsrc) -- remove query
modifyIORef mediaRef ( (oldsrc, new): )
return new
@@ -801,10 +822,10 @@ transformBlock :: WriterOptions
-> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media
-> Block
-> IO Block
-transformBlock opts mediaRef (RawBlock fmt raw)
+transformBlock _ mediaRef (RawBlock fmt raw)
| fmt == Format "html" = do
let tags = parseTags raw
- tags' <- mapM (transformTag opts mediaRef) tags
+ tags' <- mapM (transformTag mediaRef) tags
return $ RawBlock fmt (renderTags' tags')
transformBlock _ _ b = return b
@@ -812,19 +833,17 @@ transformInline :: WriterOptions
-> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media
-> Inline
-> IO Inline
-transformInline opts mediaRef (Image lab (src,tit)) = do
- let src' = unEscapeString src
- let oldsrc = maybe src' (</> src) $ writerSourceURL opts
- newsrc <- modifyMediaRef mediaRef oldsrc
+transformInline _ mediaRef (Image lab (src,tit)) = do
+ newsrc <- modifyMediaRef mediaRef src
return $ Image lab (newsrc, tit)
transformInline opts _ (x@(Math _ _))
| WebTeX _ <- writerHTMLMathMethod opts = do
raw <- makeSelfContained opts $ writeHtmlInline opts x
return $ RawInline (Format "html") raw
-transformInline opts mediaRef (RawInline fmt raw)
+transformInline _ mediaRef (RawInline fmt raw)
| fmt == Format "html" = do
let tags = parseTags raw
- tags' <- mapM (transformTag opts mediaRef) tags
+ tags' <- mapM (transformTag mediaRef) tags
return $ RawInline fmt (renderTags' tags')
transformInline _ _ x = return x
@@ -885,20 +904,27 @@ addIdentifiers bs = evalState (mapM go bs) []
-- was "header-1" might turn into "ch006.xhtml#header".
correlateRefs :: Int -> [Block] -> [(String,String)]
correlateRefs chapterHeaderLevel bs =
- identTable $ execState (mapM_ go bs)
+ identTable $ execState (walkM goBlock bs >>= walkM goInline)
IdentState{ chapterNumber = 0
, identTable = [] }
- where go :: Block -> State IdentState ()
- go (Header n (ident,_,_) _) = do
- when (n <= chapterHeaderLevel) $
- modify $ \s -> s{ chapterNumber = chapterNumber s + 1 }
+ where goBlock :: Block -> State IdentState Block
+ goBlock x@(Header n (ident,_,_) _) = x <$ addIdentifier (Just n) ident
+ goBlock x@(Div (ident,_,_) _) = x <$ addIdentifier Nothing ident
+ goBlock x = return x
+ goInline :: Inline -> State IdentState Inline
+ goInline x@(Span (ident,_,_) _) = x <$ addIdentifier Nothing ident
+ goInline x = return x
+ addIdentifier mbHeaderLevel ident = do
+ case mbHeaderLevel of
+ Just n | n <= chapterHeaderLevel ->
+ modify $ \s -> s{ chapterNumber = chapterNumber s + 1 }
+ _ -> return ()
st <- get
let chapterid = showChapter (chapterNumber st) ++
- if n <= chapterHeaderLevel
- then ""
- else '#' : ident
+ case mbHeaderLevel of
+ Just n | n <= chapterHeaderLevel -> ""
+ _ -> '#' : ident
modify $ \s -> s{ identTable = (ident, chapterid) : identTable st }
- go _ = return ()
-- Replace internal link references using the table produced
-- by correlateRefs.
diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs
index 233b8b32b..31fa4bee8 100644
--- a/src/Text/Pandoc/Writers/FB2.hs
+++ b/src/Text/Pandoc/Writers/FB2.hs
@@ -85,7 +85,7 @@ writeFB2 opts (Pandoc meta blocks) = flip evalStateT newFB $ do
(imgs,missing) <- liftM imagesToFetch get >>= \s -> liftIO (fetchImages s)
let body' = replaceImagesWithAlt missing body
let fb2_xml = el "FictionBook" (fb2_attrs, [desc, body'] ++ notes ++ imgs)
- return $ xml_head ++ (showContent fb2_xml)
+ return $ xml_head ++ (showContent fb2_xml) ++ "\n"
where
xml_head = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
fb2_attrs =
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 9ead604d7..53dc931cc 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE OverloadedStrings, CPP #-}
+{-# LANGUAGE OverloadedStrings, CPP, ViewPatterns, ScopedTypeVariables #-}
{-# OPTIONS_GHC -fno-warn-deprecations #-}
{-
Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
@@ -49,7 +49,10 @@ import Data.String ( fromString )
import Data.Maybe ( catMaybes, fromMaybe )
import Control.Monad.State
import Text.Blaze.Html hiding(contents)
+#if MIN_VERSION_blaze_markup(0,6,3)
+#else
import Text.Blaze.Internal(preEscapedString)
+#endif
#if MIN_VERSION_blaze_html(0,5,1)
import qualified Text.Blaze.XHtml5 as H5
#else
@@ -60,6 +63,8 @@ import qualified Text.Blaze.XHtml1.Transitional.Attributes as A
import Text.Blaze.Renderer.String (renderHtml)
import Text.TeXMath
import Text.XML.Light.Output
+import Text.XML.Light (unode, elChildren, unqual)
+import qualified Text.XML.Light as XML
import System.FilePath (takeExtension)
import Data.Monoid
import Data.Aeson (Value)
@@ -71,11 +76,13 @@ data WriterState = WriterState
, stQuotes :: Bool -- ^ <q> tag is used
, stHighlighting :: Bool -- ^ Syntax highlighting is used
, stSecNum :: [Int] -- ^ Number of current section
+ , stElement :: Bool -- ^ Processing an Element
}
defaultWriterState :: WriterState
defaultWriterState = WriterState {stNotes= [], stMath = False, stQuotes = False,
- stHighlighting = False, stSecNum = []}
+ stHighlighting = False, stSecNum = [],
+ stElement = False}
-- Helpers to render HTML with the appropriate function.
@@ -155,6 +162,10 @@ pandocToHtml opts (Pandoc meta blocks) = do
H.script ! A.src (toValue url)
! A.type_ "text/javascript"
$ mempty
+ KaTeX js css ->
+ (H.script ! A.src (toValue js) $ mempty) <>
+ (H.link ! A.rel "stylesheet" ! A.href (toValue css)) <>
+ (H.script ! A.type_ "text/javascript" $ toHtml renderKaTeX)
_ -> case lookup "mathml-script" (writerVariables opts) of
Just s | not (writerHtml5 opts) ->
H.script ! A.type_ "text/javascript"
@@ -274,7 +285,13 @@ elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elemen
let titleSlide = slide && level < slideLevel
header' <- if title' == [Str "\0"] -- marker for hrule
then return mempty
- else blockToHtml opts (Header level' (id',classes,keyvals) title')
+ else do
+ modify (\st -> st{ stElement = True})
+ res <- blockToHtml opts
+ (Header level' (id',classes,keyvals) title')
+ modify (\st -> st{ stElement = False})
+ return res
+
let isSec (Sec _ _ _ _ _) = True
isSec (Blk _) = False
let isPause (Blk x) = x == Para [Str ".",Space,Str ".",Space,Str "."]
@@ -342,10 +359,10 @@ parseMailto s = do
_ -> fail "not a mailto: URL"
-- | Obfuscate a "mailto:" link.
-obfuscateLink :: WriterOptions -> String -> String -> Html
+obfuscateLink :: WriterOptions -> Html -> String -> Html
obfuscateLink opts txt s | writerEmailObfuscation opts == NoObfuscation =
- H.a ! A.href (toValue s) $ toHtml txt
-obfuscateLink opts txt s =
+ H.a ! A.href (toValue s) $ txt
+obfuscateLink opts (renderHtml -> txt) s =
let meth = writerEmailObfuscation opts
s' = map toLower (take 7 s) ++ drop 7 s
in case parseMailto s' of
@@ -424,24 +441,30 @@ blockToHtml opts (Para [Image txt (s,'f':'i':'g':':':tit)]) = do
then H5.figure $ mconcat
[nl opts, img, capt, nl opts]
else H.div ! A.class_ "figure" $ mconcat
- [nl opts, img, capt, nl opts]
+ [nl opts, img, nl opts, capt, nl opts]
blockToHtml opts (Para lst) = do
contents <- inlineListToHtml opts lst
return $ H.p contents
blockToHtml opts (Div attr@(_,classes,_) bs) = do
- contents <- blockListToHtml opts bs
+ let speakerNotes = "notes" `elem` classes
+ -- we don't want incremental output inside speaker notes, see #1394
+ let opts' = if speakerNotes then opts{ writerIncremental = False } else opts
+ contents <- blockListToHtml opts' bs
let contents' = nl opts >> contents >> nl opts
return $
- if "notes" `elem` classes
- then let opts' = opts{ writerIncremental = False } in
- -- we don't want incremental output inside speaker notes
- case writerSlideVariant opts of
+ if speakerNotes
+ then case writerSlideVariant opts of
RevealJsSlides -> addAttrs opts' attr $ H5.aside $ contents'
NoSlides -> addAttrs opts' attr $ H.div $ contents'
_ -> mempty
else addAttrs opts attr $ H.div $ contents'
-blockToHtml _ (RawBlock f str)
+blockToHtml opts (RawBlock f str)
| f == Format "html" = return $ preEscapedString str
+ | f == Format "latex" =
+ case writerHTMLMathMethod opts of
+ MathJax _ -> do modify (\st -> st{ stMath = True })
+ return $ toHtml str
+ _ -> return mempty
| otherwise = return mempty
blockToHtml opts (HorizontalRule) = return $ if writerHtml5 opts then H5.hr else H.hr
blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do
@@ -485,7 +508,7 @@ blockToHtml opts (BlockQuote blocks) =
else do
contents <- blockListToHtml opts blocks
return $ H.blockquote $ nl opts >> contents >> nl opts
-blockToHtml opts (Header level (_,classes,_) lst) = do
+blockToHtml opts (Header level attr@(_,classes,_) lst) = do
contents <- inlineListToHtml opts lst
secnum <- liftM stSecNum get
let contents' = if writerNumberSections opts && not (null secnum)
@@ -493,7 +516,9 @@ blockToHtml opts (Header level (_,classes,_) lst) = do
then (H.span ! A.class_ "header-section-number" $ toHtml
$ showSecNum secnum) >> strToHtml " " >> contents
else contents
- return $ case level of
+ inElement <- gets stElement
+ return $ (if inElement then id else addAttrs opts attr)
+ $ case level of
1 -> H.h1 contents'
2 -> H.h2 contents'
3 -> H.h3 contents'
@@ -506,7 +531,9 @@ blockToHtml opts (BulletList lst) = do
return $ unordList opts contents
blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do
contents <- mapM (blockListToHtml opts) lst
- let numstyle' = camelCaseToHyphenated $ show numstyle
+ let numstyle' = case numstyle of
+ Example -> "decimal"
+ _ -> camelCaseToHyphenated $ show numstyle
let attribs = (if startnum /= 1
then [A.start $ toValue startnum]
else []) ++
@@ -615,13 +642,28 @@ inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html
inlineListToHtml opts lst =
mapM (inlineToHtml opts) lst >>= return . mconcat
+-- | Annotates a MathML expression with the tex source
+annotateMML :: XML.Element -> String -> XML.Element
+annotateMML e tex = math (unode "semantics" [cs, unode "annotation" (annotAttrs, tex)])
+ where
+ cs = case elChildren e of
+ [] -> unode "mrow" ()
+ [x] -> x
+ xs -> unode "mrow" xs
+ math childs = XML.Element q as [XML.Elem childs] l
+ where
+ (XML.Element q as _ l) = e
+ annotAttrs = [XML.Attr (unqual "encoding") "application/x-tex"]
+
+
-- | Convert Pandoc inline element to HTML.
inlineToHtml :: WriterOptions -> Inline -> State WriterState Html
inlineToHtml opts inline =
case inline of
(Str str) -> return $ strToHtml str
(Space) -> return $ strToHtml " "
- (LineBreak) -> return $ if writerHtml5 opts then H5.br else H.br
+ (LineBreak) -> return $ (if writerHtml5 opts then H5.br else H.br)
+ <> strToHtml "\n"
(Span (id',classes,kvs) ils)
-> inlineListToHtml opts ils >>=
return . addAttrs opts attr' . H.span
@@ -669,69 +711,78 @@ inlineToHtml opts inline =
H.q `fmap` inlineListToHtml opts lst
else (\x -> leftQuote >> x >> rightQuote)
`fmap` inlineListToHtml opts lst
- (Math t str) -> modify (\st -> st {stMath = True}) >>
- (case writerHTMLMathMethod opts of
- LaTeXMathML _ ->
- -- putting LaTeXMathML in container with class "LaTeX" prevents
- -- non-math elements on the page from being treated as math by
- -- the javascript
- return $ H.span ! A.class_ "LaTeX" $
- case t of
- InlineMath -> toHtml ("$" ++ str ++ "$")
- DisplayMath -> toHtml ("$$" ++ str ++ "$$")
- JsMath _ -> do
- let m = preEscapedString str
- return $ case t of
- InlineMath -> H.span ! A.class_ "math" $ m
- DisplayMath -> H.div ! A.class_ "math" $ m
- WebTeX url -> do
- let imtag = if writerHtml5 opts then H5.img else H.img
- let m = imtag ! A.style "vertical-align:middle"
- ! A.src (toValue $ url ++ urlEncode str)
- ! A.alt (toValue str)
- ! A.title (toValue str)
- let brtag = if writerHtml5 opts then H5.br else H.br
- return $ case t of
- InlineMath -> m
- DisplayMath -> brtag >> m >> brtag
- GladTeX ->
- return $ case t of
- InlineMath -> preEscapedString $ "<EQ ENV=\"math\">" ++ str ++ "</EQ>"
- DisplayMath -> preEscapedString $ "<EQ ENV=\"displaymath\">" ++ str ++ "</EQ>"
- MathML _ -> do
- let dt = if t == InlineMath
- then DisplayInline
- else DisplayBlock
- let conf = useShortEmptyTags (const False)
- defaultConfigPP
- case writeMathML dt <$> readTeX str of
- Right r -> return $ preEscapedString $
- ppcElement conf r
- Left _ -> inlineListToHtml opts
- (texMathToInlines t str) >>=
- return . (H.span ! A.class_ "math")
- MathJax _ -> return $ H.span ! A.class_ "math" $ toHtml $
- case t of
- InlineMath -> "\\(" ++ str ++ "\\)"
- DisplayMath -> "\\[" ++ str ++ "\\]"
- PlainMath -> do
- x <- inlineListToHtml opts (texMathToInlines t str)
- let m = H.span ! A.class_ "math" $ x
- let brtag = if writerHtml5 opts then H5.br else H.br
- return $ case t of
- InlineMath -> m
- DisplayMath -> brtag >> m >> brtag )
+ (Math t str) -> do
+ modify (\st -> st {stMath = True})
+ let mathClass = toValue $ ("math " :: String) ++
+ if t == InlineMath then "inline" else "display"
+ case writerHTMLMathMethod opts of
+ LaTeXMathML _ ->
+ -- putting LaTeXMathML in container with class "LaTeX" prevents
+ -- non-math elements on the page from being treated as math by
+ -- the javascript
+ return $ H.span ! A.class_ "LaTeX" $
+ case t of
+ InlineMath -> toHtml ("$" ++ str ++ "$")
+ DisplayMath -> toHtml ("$$" ++ str ++ "$$")
+ JsMath _ -> do
+ let m = preEscapedString str
+ return $ case t of
+ InlineMath -> H.span ! A.class_ mathClass $ m
+ DisplayMath -> H.div ! A.class_ mathClass $ m
+ WebTeX url -> do
+ let imtag = if writerHtml5 opts then H5.img else H.img
+ let m = imtag ! A.style "vertical-align:middle"
+ ! A.src (toValue $ url ++ urlEncode str)
+ ! A.alt (toValue str)
+ ! A.title (toValue str)
+ let brtag = if writerHtml5 opts then H5.br else H.br
+ return $ case t of
+ InlineMath -> m
+ DisplayMath -> brtag >> m >> brtag
+ GladTeX ->
+ return $ case t of
+ InlineMath -> preEscapedString $ "<EQ ENV=\"math\">" ++ str ++ "</EQ>"
+ DisplayMath -> preEscapedString $ "<EQ ENV=\"displaymath\">" ++ str ++ "</EQ>"
+ MathML _ -> do
+ let dt = if t == InlineMath
+ then DisplayInline
+ else DisplayBlock
+ let conf = useShortEmptyTags (const False)
+ defaultConfigPP
+ case writeMathML dt <$> readTeX str of
+ Right r -> return $ preEscapedString $
+ ppcElement conf (annotateMML r str)
+ Left _ -> inlineListToHtml opts
+ (texMathToInlines t str) >>=
+ return . (H.span ! A.class_ mathClass)
+ MathJax _ -> return $ H.span ! A.class_ mathClass $ toHtml $
+ case t of
+ InlineMath -> "\\(" ++ str ++ "\\)"
+ DisplayMath -> "\\[" ++ str ++ "\\]"
+ KaTeX _ _ -> return $ H.span ! A.class_ mathClass $
+ toHtml (case t of
+ InlineMath -> str
+ DisplayMath -> "\\displaystyle " ++ str)
+ PlainMath -> do
+ x <- inlineListToHtml opts (texMathToInlines t str)
+ let m = H.span ! A.class_ mathClass $ x
+ let brtag = if writerHtml5 opts then H5.br else H.br
+ return $ case t of
+ InlineMath -> m
+ DisplayMath -> brtag >> m >> brtag
(RawInline f str)
| f == Format "latex" ->
case writerHTMLMathMethod opts of
LaTeXMathML _ -> do modify (\st -> st {stMath = True})
return $ toHtml str
+ MathJax _ -> do modify (\st -> st {stMath = True})
+ return $ toHtml str
_ -> return mempty
| f == Format "html" -> return $ preEscapedString str
| otherwise -> return mempty
(Link txt (s,_)) | "mailto:" `isPrefixOf` s -> do
linkText <- inlineListToHtml opts txt
- return $ obfuscateLink opts (renderHtml linkText) s
+ return $ obfuscateLink opts linkText s
(Link txt (s,tit)) -> do
linkText <- inlineListToHtml opts txt
let s' = case s of
@@ -746,22 +797,15 @@ inlineToHtml opts inline =
then link'
else link' ! A.title (toValue tit)
(Image txt (s,tit)) | treatAsImage s -> do
- let alternate' = stringify txt
let attributes = [A.src $ toValue s] ++
- (if null tit
- then []
- else [A.title $ toValue tit]) ++
- if null txt
- then []
- else [A.alt $ toValue alternate']
+ [A.title $ toValue tit | not $ null tit] ++
+ [A.alt $ toValue $ stringify txt]
let tag = if writerHtml5 opts then H5.img else H.img
return $ foldl (!) tag attributes
-- note: null title included, as in Markdown.pl
(Image _ (s,tit)) -> do
let attributes = [A.src $ toValue s] ++
- (if null tit
- then []
- else [A.title $ toValue tit])
+ [A.title $ toValue tit | not $ null tit]
return $ foldl (!) H5.embed attributes
-- note: null title included, as in Markdown.pl
(Note contents)
@@ -815,3 +859,14 @@ blockListToNote opts ref blocks =
Just EPUB3 -> noteItem ! customAttribute "epub:type" "footnote"
_ -> noteItem
return $ nl opts >> noteItem'
+
+-- Javascript snippet to render all KaTeX elements
+renderKaTeX :: String
+renderKaTeX = unlines [
+ "window.onload = function(){var mathElements = document.getElementsByClassName(\"math\");"
+ , "for (var i=0; i < mathElements.length; i++)"
+ , "{"
+ , " var texText = mathElements[i].firstChild"
+ , " katex.render(texText.data, mathElements[i])"
+ , "}}"
+ ]
diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs
index ae20efd4b..6af4b7aa3 100644
--- a/src/Text/Pandoc/Writers/ICML.hs
+++ b/src/Text/Pandoc/Writers/ICML.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE OverloadedStrings, FlexibleContexts #-}
{- |
Module : Text.Pandoc.Writers.ICML
@@ -70,7 +70,6 @@ linkName = "Link"
-- block element names (appear in InDesign's paragraph styles pane)
paragraphName :: String
codeBlockName :: String
-rawBlockName :: String
blockQuoteName :: String
orderedListName :: String
bulletListName :: String
@@ -93,7 +92,6 @@ subListParName :: String
footnoteName :: String
paragraphName = "Paragraph"
codeBlockName = "CodeBlock"
-rawBlockName = "Rawblock"
blockQuoteName = "Blockquote"
orderedListName = "NumList"
bulletListName = "BulList"
@@ -278,7 +276,9 @@ blockToICML :: WriterOptions -> Style -> Block -> WS Doc
blockToICML opts style (Plain lst) = parStyle opts style lst
blockToICML opts style (Para lst) = parStyle opts (paragraphName:style) lst
blockToICML opts style (CodeBlock _ str) = parStyle opts (codeBlockName:style) $ [Str str]
-blockToICML opts style (RawBlock _ str) = parStyle opts (rawBlockName:style) $ [Str str]
+blockToICML _ _ (RawBlock f str)
+ | f == Format "icml" = return $ text str
+ | otherwise = return empty
blockToICML opts style (BlockQuote blocks) = blocksToICML opts (blockQuoteName:style) blocks
blockToICML opts style (OrderedList attribs lst) = listItemsToICML opts orderedListName style (Just attribs) lst
blockToICML opts style (BulletList lst) = listItemsToICML opts bulletListName style Nothing lst
@@ -399,12 +399,14 @@ inlineToICML opts style (Subscript lst) = inlinesToICML opts (subscriptName:styl
inlineToICML opts style (SmallCaps lst) = inlinesToICML opts (smallCapsName:style) lst
inlineToICML opts style (Quoted SingleQuote lst) = inlinesToICML opts style $ [Str "‘"] ++ lst ++ [Str "’"]
inlineToICML opts style (Quoted DoubleQuote lst) = inlinesToICML opts style $ [Str "“"] ++ lst ++ [Str "”"]
-inlineToICML opts style (Cite _ lst) = footnoteToICML opts style [Para lst]
+inlineToICML opts style (Cite _ lst) = inlinesToICML opts style lst
inlineToICML _ style (Code _ str) = charStyle (codeName:style) $ text $ escapeStringForXML str
inlineToICML _ style Space = charStyle style space
inlineToICML _ style LineBreak = charStyle style $ text lineSeparator
inlineToICML _ style (Math _ str) = charStyle style $ text $ escapeStringForXML str --InDesign doesn't really do math
-inlineToICML _ style (RawInline _ str) = charStyle style $ text $ escapeStringForXML str
+inlineToICML _ _ (RawInline f str)
+ | f == Format "icml" = return $ text str
+ | otherwise = return empty
inlineToICML opts style (Link lst (url, title)) = do
content <- inlinesToICML opts (linkName:style) lst
state $ \st ->
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 0fa1e4857..49bc27b58 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -42,6 +42,7 @@ import Data.List ( (\\), isSuffixOf, isInfixOf, stripPrefix,
isPrefixOf, intercalate, intersperse )
import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, ord )
import Data.Maybe ( fromMaybe )
+import Data.Aeson.Types ( (.:), parseMaybe, withObject )
import Control.Applicative ((<|>))
import Control.Monad.State
import Text.Pandoc.Pretty
@@ -54,6 +55,7 @@ data WriterState =
WriterState { stInNote :: Bool -- true if we're in a note
, stInQuote :: Bool -- true if in a blockquote
, stInMinipage :: Bool -- true if in minipage
+ , stInHeading :: Bool -- true if in a section heading
, stNotes :: [Doc] -- notes in a minipage
, stOLLevel :: Int -- level of ordered list nesting
, stOptions :: WriterOptions -- writer options, so they don't have to be parameter
@@ -76,9 +78,9 @@ writeLaTeX :: WriterOptions -> Pandoc -> String
writeLaTeX options document =
evalState (pandocToLaTeX options document) $
WriterState { stInNote = False, stInQuote = False,
- stInMinipage = False, stNotes = [],
- stOLLevel = 1, stOptions = options,
- stVerbInNote = False,
+ stInMinipage = False, stInHeading = False,
+ stNotes = [], stOLLevel = 1,
+ stOptions = options, stVerbInNote = False,
stTable = False, stStrikeout = False,
stUrl = False, stGraphics = False,
stLHS = False, stBook = writerChapters options,
@@ -101,8 +103,16 @@ pandocToLaTeX options (Pandoc meta blocks) = do
modify $ \s -> s{ stInternalLinks = query isInternalLink blocks' }
let template = writerTemplate options
-- set stBook depending on documentclass
+ let colwidth = if writerWrapText options
+ then Just $ writerColumns options
+ else Nothing
+ metadata <- metaToJSON options
+ (fmap (render colwidth) . blockListToLaTeX)
+ (fmap (render colwidth) . inlineListToLaTeX)
+ meta
let bookClasses = ["memoir","book","report","scrreprt","scrbook"]
- case lookup "documentclass" (writerVariables options) of
+ case lookup "documentclass" (writerVariables options) `mplus`
+ parseMaybe (withObject "object" (.: "documentclass")) metadata of
Just x | x `elem` bookClasses -> modify $ \s -> s{stBook = True}
| otherwise -> return ()
Nothing | any (\x -> "\\documentclass" `isPrefixOf` x &&
@@ -113,13 +123,6 @@ pandocToLaTeX options (Pandoc meta blocks) = do
-- \enquote{...} for smart quotes:
when ("{csquotes}" `isInfixOf` template) $
modify $ \s -> s{stCsquotes = True}
- let colwidth = if writerWrapText options
- then Just $ writerColumns options
- else Nothing
- metadata <- metaToJSON options
- (fmap (render colwidth) . blockListToLaTeX)
- (fmap (render colwidth) . inlineListToLaTeX)
- meta
let (blocks'', lastHeader) = if writerCiteMethod options == Citeproc then
(blocks', [])
else case last blocks' of
@@ -179,7 +182,9 @@ pandocToLaTeX options (Pandoc meta blocks) = do
elementToLaTeX :: WriterOptions -> Element -> State WriterState Doc
elementToLaTeX _ (Blk block) = blockToLaTeX block
elementToLaTeX opts (Sec level _ (id',classes,_) title' elements) = do
+ modify $ \s -> s{stInHeading = True}
header' <- sectionHeader ("unnumbered" `elem` classes) id' level title'
+ modify $ \s -> s{stInHeading = False}
innerContents <- mapM (elementToLaTeX opts) elements
return $ vsep (header' : innerContents)
@@ -203,7 +208,7 @@ stringToLaTeX ctx (x:xs) = do
'€' -> "\\euro{}" ++ rest
'{' -> "\\{" ++ rest
'}' -> "\\}" ++ rest
- '$' -> "\\$" ++ rest
+ '$' | not isUrl -> "\\$" ++ rest
'%' -> "\\%" ++ rest
'&' -> "\\&" ++ rest
'_' | not isUrl -> "\\_" ++ rest
@@ -237,7 +242,7 @@ toLabel z = go `fmap` stringToLaTeX URLString z
where go [] = ""
go (x:xs)
| (isLetter x || isDigit x) && isAscii x = x:go xs
- | elem x "-+=:;." = x:go xs
+ | elem x ("-+=:;." :: String) = x:go xs
| otherwise = "ux" ++ printf "%x" (ord x) ++ go xs
-- | Puts contents into LaTeX command.
@@ -466,8 +471,11 @@ blockToLaTeX (DefinitionList lst) = do
"\\end{description}"
blockToLaTeX HorizontalRule = return $
"\\begin{center}\\rule{0.5\\linewidth}{\\linethickness}\\end{center}"
-blockToLaTeX (Header level (id',classes,_) lst) =
- sectionHeader ("unnumbered" `elem` classes) id' level lst
+blockToLaTeX (Header level (id',classes,_) lst) = do
+ modify $ \s -> s{stInHeading = True}
+ hdr <- sectionHeader ("unnumbered" `elem` classes) id' level lst
+ modify $ \s -> s{stInHeading = False}
+ return hdr
blockToLaTeX (Table caption aligns widths heads rows) = do
headers <- if all null heads
then return empty
@@ -539,10 +547,16 @@ fixLineBreaks' ils = case splitBy (== LineBreak) ils of
where tohbox ys = RawInline "tex" "\\hbox{\\strut " : ys ++
[RawInline "tex" "}"]
+-- We also change display math to inline math, since display
+-- math breaks in simple tables.
+displayMathToInline :: Inline -> Inline
+displayMathToInline (Math DisplayMath x) = Math InlineMath x
+displayMathToInline x = x
+
tableCellToLaTeX :: Bool -> (Double, Alignment, [Block])
-> State WriterState Doc
tableCellToLaTeX _ (0, _, blocks) =
- blockListToLaTeX $ walk fixLineBreaks blocks
+ blockListToLaTeX $ walk fixLineBreaks $ walk displayMathToInline blocks
tableCellToLaTeX header (width, align, blocks) = do
modify $ \st -> st{ stInMinipage = True, stNotes = [] }
cellContents <- blockListToLaTeX blocks
@@ -607,6 +621,7 @@ sectionHeader :: Bool -- True for unnumbered
sectionHeader unnumbered ref level lst = do
txt <- inlineListToLaTeX lst
lab <- text `fmap` toLabel ref
+ plain <- stringToLaTeX TextString $ foldl (++) "" $ map stringify lst
let noNote (Note _) = Str ""
noNote x = x
let lstNoNotes = walk noNote lst
@@ -619,7 +634,12 @@ sectionHeader unnumbered ref level lst = do
then return empty
else do
return $ brackets txtNoNotes
- let stuffing = star <> optional <> braces txt
+ let contents = if render Nothing txt == plain
+ then braces txt
+ else braces (text "\\texorpdfstring"
+ <> braces txt
+ <> braces (text plain))
+ let stuffing = star <> optional <> contents
book <- gets stBook
opts <- gets stOptions
let level' = if book || writerChapters opts then level - 1 else level
@@ -663,7 +683,7 @@ sectionHeader unnumbered ref level lst = do
inlineListToLaTeX :: [Inline] -- ^ Inlines to convert
-> State WriterState Doc
inlineListToLaTeX lst =
- mapM inlineToLaTeX (fixLineInitialSpaces lst)
+ mapM inlineToLaTeX (fixBreaks $ fixLineInitialSpaces lst)
>>= return . hcat
-- nonbreaking spaces (~) in LaTeX don't work after line breaks,
-- so we turn nbsps after hard breaks to \hspace commands.
@@ -675,6 +695,14 @@ inlineListToLaTeX lst =
fixNbsps s = let (ys,zs) = span (=='\160') s
in replicate (length ys) hspace ++ [Str zs]
hspace = RawInline "latex" "\\hspace*{0.333em}"
+ -- linebreaks after blank lines cause problems:
+ fixBreaks [] = []
+ fixBreaks ys@(LineBreak : LineBreak : _) =
+ case span (== LineBreak) ys of
+ (lbs, rest) -> RawInline "latex"
+ ("\\\\[" ++ show (length lbs) ++
+ "\\baselineskip]") : fixBreaks rest
+ fixBreaks (y:ys) = y : fixBreaks ys
isQuoted :: Inline -> Bool
isQuoted (Quoted _ _) = True
@@ -724,22 +752,27 @@ inlineToLaTeX (Cite cits lst) = do
inlineToLaTeX (Code (_,classes,_) str) = do
opts <- gets stOptions
+ inHeading <- gets stInHeading
case () of
- _ | writerListings opts -> listingsCode
+ _ | writerListings opts && not inHeading -> listingsCode
| writerHighlight opts && not (null classes) -> highlightCode
- | otherwise -> rawCode
+ | otherwise -> rawCode
where listingsCode = do
inNote <- gets stInNote
when inNote $ modify $ \s -> s{ stVerbInNote = True }
- let chr = ((enumFromTo '!' '~') \\ str) !! 0
+ let chr = case "!\"&'()*,-./:;?@_" \\ str of
+ (c:_) -> c
+ [] -> '!'
return $ text $ "\\lstinline" ++ [chr] ++ str ++ [chr]
highlightCode = do
case highlight formatLaTeXInline ("",classes,[]) str of
Nothing -> rawCode
Just h -> modify (\st -> st{ stHighlighting = True }) >>
return (text h)
- rawCode = liftM (text . (\s -> "\\texttt{" ++ s ++ "}"))
+ rawCode = liftM (text . (\s -> "\\texttt{" ++ escapeSpaces s ++ "}"))
$ stringToLaTeX CodeString str
+ where
+ escapeSpaces = concatMap (\c -> if c == ' ' then "\\ " else [c])
inlineToLaTeX (Quoted qt lst) = do
contents <- inlineListToLaTeX lst
csquotes <- liftM stCsquotes get
@@ -772,7 +805,7 @@ inlineToLaTeX (RawInline f str)
| f == Format "latex" || f == Format "tex"
= return $ text str
| otherwise = return empty
-inlineToLaTeX (LineBreak) = return "\\\\"
+inlineToLaTeX (LineBreak) = return $ "\\\\" <> cr
inlineToLaTeX Space = return space
inlineToLaTeX (Link txt ('#':ident, _)) = do
contents <- inlineListToLaTeX txt
@@ -801,7 +834,10 @@ inlineToLaTeX (Image _ (source, _)) = do
then source
else unEscapeString source
source'' <- stringToLaTeX URLString source'
- return $ "\\includegraphics" <> braces (text source'')
+ inHeading <- gets stInHeading
+ return $
+ (if inHeading then "\\protect\\includegraphics" else "\\includegraphics")
+ <> braces (text source'')
inlineToLaTeX (Note contents) = do
inMinipage <- gets stInMinipage
modify (\s -> s{stInNote = True})
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index f06f1d6cc..dee4d56a4 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -57,12 +57,15 @@ import qualified Data.Text as T
type Notes = [[Block]]
type Refs = [([Inline], Target)]
-data WriterState = WriterState { stNotes :: Notes
- , stRefs :: Refs
- , stIds :: [String]
- , stPlain :: Bool }
+data WriterState = WriterState { stNotes :: Notes
+ , stRefs :: Refs
+ , stRefShortcutable :: Bool
+ , stInList :: Bool
+ , stIds :: [String]
+ , stPlain :: Bool }
instance Default WriterState
- where def = WriterState{ stNotes = [], stRefs = [], stIds = [], stPlain = False }
+ where def = WriterState{ stNotes = [], stRefs = [], stRefShortcutable = True,
+ stInList = False, stIds = [], stPlain = False }
-- | Convert Pandoc to Markdown.
writeMarkdown :: WriterOptions -> Pandoc -> String
@@ -323,9 +326,9 @@ blockToMarkdown opts (Plain inlines) = do
then Just $ writerColumns opts
else Nothing
let rendered = render colwidth contents
- let escapeDelimiter (x:xs) | x `elem` ".()" = '\\':x:xs
- | otherwise = x : escapeDelimiter xs
- escapeDelimiter [] = []
+ let escapeDelimiter (x:xs) | x `elem` (".()" :: String) = '\\':x:xs
+ | otherwise = x : escapeDelimiter xs
+ escapeDelimiter [] = []
let contents' = if isEnabled Ext_all_symbols_escapable opts &&
not (stPlain st) && beginsWithOrderedListMarker rendered
then text $ escapeDelimiter rendered
@@ -453,7 +456,7 @@ blockToMarkdown opts t@(Table caption aligns widths headers rows) = do
$ Pandoc nullMeta [t]
return $ nst $ tbl $$ blankline $$ caption'' $$ blankline
blockToMarkdown opts (BulletList items) = do
- contents <- mapM (bulletListItemToMarkdown opts) items
+ contents <- inList $ mapM (bulletListItemToMarkdown opts) items
return $ cat contents <> blankline
blockToMarkdown opts (OrderedList (start,sty,delim) items) = do
let start' = if isEnabled Ext_startnum opts then start else 1
@@ -464,13 +467,22 @@ blockToMarkdown opts (OrderedList (start,sty,delim) items) = do
let markers' = map (\m -> if length m < 3
then m ++ replicate (3 - length m) ' '
else m) markers
- contents <- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $
+ contents <- inList $
+ mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $
zip markers' items
return $ cat contents <> blankline
blockToMarkdown opts (DefinitionList items) = do
- contents <- mapM (definitionListItemToMarkdown opts) items
+ contents <- inList $ mapM (definitionListItemToMarkdown opts) items
return $ cat contents <> blankline
+inList :: State WriterState a -> State WriterState a
+inList p = do
+ oldInList <- gets stInList
+ modify $ \st -> st{ stInList = True }
+ res <- p
+ modify $ \st -> st{ stInList = oldInList }
+ return res
+
addMarkdownAttribute :: String -> String
addMarkdownAttribute s =
case span isTagText $ reverse $ parseTags s of
@@ -497,7 +509,12 @@ pipeTable headless aligns rawHeaders rawRows = do
AlignCenter -> ':':replicate w '-' ++ ":"
AlignRight -> replicate (w + 1) '-' ++ ":"
AlignDefault -> replicate (w + 2) '-'
- let header = if headless then empty else torow rawHeaders
+ -- note: pipe tables can't completely lack a
+ -- header; for a headerless table, we need a header of empty cells.
+ -- see jgm/pandoc#1996.
+ let header = if headless
+ then torow (replicate (length aligns) empty)
+ else torow rawHeaders
let border = nowrap $ text "|" <> hcat (intersperse (text "|") $
map toborder $ zip aligns widths) <> text "|"
let body = vcat $ map torow rawRows
@@ -677,12 +694,53 @@ getReference label (src, tit) = do
-- | Convert list of Pandoc inline elements to markdown.
inlineListToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc
-inlineListToMarkdown opts lst =
- mapM (inlineToMarkdown opts) (avoidBadWraps lst) >>= return . cat
- where avoidBadWraps [] = []
- avoidBadWraps (Space:Str (c:cs):xs)
- | c `elem` "-*+>" = Str (' ':c:cs) : avoidBadWraps xs
- avoidBadWraps (x:xs) = x : avoidBadWraps xs
+inlineListToMarkdown opts lst = do
+ inlist <- gets stInList
+ go (if inlist then avoidBadWrapsInList lst else lst)
+ where go [] = return empty
+ go (i:is) = case i of
+ (Link _ _) -> case is of
+ -- If a link is followed by another link or '[' we don't shortcut
+ (Link _ _):_ -> unshortcutable
+ Space:(Link _ _):_ -> unshortcutable
+ Space:(Str('[':_)):_ -> unshortcutable
+ Space:(RawInline _ ('[':_)):_ -> unshortcutable
+ Space:(Cite _ _):_ -> unshortcutable
+ (Cite _ _):_ -> unshortcutable
+ Str ('[':_):_ -> unshortcutable
+ (RawInline _ ('[':_)):_ -> unshortcutable
+ (RawInline _ (' ':'[':_)):_ -> unshortcutable
+ _ -> shortcutable
+ _ -> shortcutable
+ where shortcutable = liftM2 (<>) (inlineToMarkdown opts i) (go is)
+ unshortcutable = do
+ iMark <- withState (\s -> s { stRefShortcutable = False })
+ (inlineToMarkdown opts i)
+ modify (\s -> s {stRefShortcutable = True })
+ fmap (iMark <>) (go is)
+
+avoidBadWrapsInList :: [Inline] -> [Inline]
+avoidBadWrapsInList [] = []
+avoidBadWrapsInList (Space:Str ('>':cs):xs) =
+ Str (' ':'>':cs) : avoidBadWrapsInList xs
+avoidBadWrapsInList (Space:Str [c]:[])
+ | c `elem` ['-','*','+'] = Str [' ', c] : []
+avoidBadWrapsInList (Space:Str [c]:Space:xs)
+ | c `elem` ['-','*','+'] = Str [' ', c] : Space : avoidBadWrapsInList xs
+avoidBadWrapsInList (Space:Str cs:Space:xs)
+ | isOrderedListMarker cs = Str (' ':cs) : Space : avoidBadWrapsInList xs
+avoidBadWrapsInList (Space:Str cs:[])
+ | isOrderedListMarker cs = Str (' ':cs) : []
+avoidBadWrapsInList (x:xs) = x : avoidBadWrapsInList xs
+
+isOrderedListMarker :: String -> Bool
+isOrderedListMarker xs = (last xs `elem` ['.',')']) &&
+ isRight (runParserT (anyOrderedListMarker >> eof)
+ defaultParserState "" xs)
+
+isRight :: Either a b -> Bool
+isRight (Right _) = True
+isRight (Left _) = False
escapeSpaces :: Inline -> Inline
escapeSpaces (Str s) = Str $ substitute " " "\\ " s
@@ -692,8 +750,10 @@ escapeSpaces x = x
-- | Convert Pandoc inline element to markdown.
inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc
inlineToMarkdown opts (Span attrs ils) = do
+ plain <- gets stPlain
contents <- inlineListToMarkdown opts ils
- return $ if isEnabled Ext_raw_html opts
+ return $ if not plain &&
+ (isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts)
then tagWithAttrs "span" attrs <> contents <> text "</span>"
else contents
inlineToMarkdown opts (Emph lst) = do
@@ -726,13 +786,14 @@ inlineToMarkdown opts (Subscript lst) = do
else "<sub>" <> contents <> "</sub>"
inlineToMarkdown opts (SmallCaps lst) = do
plain <- gets stPlain
- if plain
- then inlineListToMarkdown opts $ capitalize lst
- else do
+ if not plain &&
+ (isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts)
+ then do
contents <- inlineListToMarkdown opts lst
return $ tagWithAttrs "span"
- ("",[],[("style","font-variant:small-caps;")])
+ ("",[],[("style","font-variant:small-caps;")])
<> contents <> text "</span>"
+ else inlineListToMarkdown opts $ capitalize lst
inlineToMarkdown opts (Quoted SingleQuote lst) = do
contents <- inlineListToMarkdown opts lst
return $ "‘" <> contents <> "’"
@@ -821,8 +882,8 @@ inlineToMarkdown opts (Cite (c:cs) lst)
sdoc <- inlineListToMarkdown opts sinlines
let k' = text (modekey m ++ "@" ++ k)
r = case sinlines of
- Str (y:_):_ | y `elem` ",;]@" -> k' <> sdoc
- _ -> k' <+> sdoc
+ Str (y:_):_ | y `elem` (",;]@" :: String) -> k' <> sdoc
+ _ -> k' <+> sdoc
return $ pdoc <+> r
modekey SuppressAuthor = "-"
modekey _ = ""
@@ -838,6 +899,9 @@ inlineToMarkdown opts (Link txt (src, tit)) = do
[Str s] | escapeURI s == srcSuffix -> True
_ -> False
let useRefLinks = writerReferenceLinks opts && not useAuto
+ shortcutable <- gets stRefShortcutable
+ let useShortcutRefLinks = shortcutable &&
+ isEnabled Ext_shortcut_reference_links opts
ref <- if useRefLinks then getReference txt (src, tit) else return []
reftext <- inlineListToMarkdown opts ref
return $ if useAuto
@@ -847,7 +911,9 @@ inlineToMarkdown opts (Link txt (src, tit)) = do
else if useRefLinks
then let first = "[" <> linktext <> "]"
second = if txt == ref
- then "[]"
+ then if useShortcutRefLinks
+ then ""
+ else "[]"
else "[" <> reftext <> "]"
in first <> second
else if plain
diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs
index 3f392a5d0..b49c60867 100644
--- a/src/Text/Pandoc/Writers/MediaWiki.hs
+++ b/src/Text/Pandoc/Writers/MediaWiki.hs
@@ -107,7 +107,7 @@ blockToMediaWiki (Para [Image txt (src,'f':'i':'g':':':tit)]) = do
let opt = if null txt
then ""
else "|alt=" ++ if null tit then capt else tit ++ capt
- return $ "[[Image:" ++ src ++ "|frame|none" ++ opt ++ "]]\n"
+ return $ "[[File:" ++ src ++ "|frame|none" ++ opt ++ "]]\n"
blockToMediaWiki (Para inlines) = do
tags <- asks useTags
@@ -375,14 +375,14 @@ inlineToMediaWiki (RawInline f str)
| f == Format "html" = return str
| otherwise = return ""
-inlineToMediaWiki (LineBreak) = return "<br />"
+inlineToMediaWiki (LineBreak) = return "<br />\n"
inlineToMediaWiki Space = return " "
inlineToMediaWiki (Link txt (src, _)) = do
label <- inlineListToMediaWiki txt
case txt of
- [Str s] | escapeURI s == src -> return src
+ [Str s] | isURI src && escapeURI s == src -> return src
_ -> return $ if isURI src
then "[" ++ src ++ " " ++ label ++ "]"
else "[[" ++ src' ++ "|" ++ label ++ "]]"
@@ -397,7 +397,7 @@ inlineToMediaWiki (Image alt (source, tit)) = do
then ""
else '|' : alt'
else '|' : tit
- return $ "[[Image:" ++ source ++ txt ++ "]]"
+ return $ "[[File:" ++ source ++ txt ++ "]]"
inlineToMediaWiki (Note contents) = do
contents' <- blockListToMediaWiki contents
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index 03f8e8ba4..81bbdaf3f 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -41,7 +41,7 @@ import Control.Applicative ((<$>))
import Text.Pandoc.Options ( WriterOptions(..) )
import Text.Pandoc.Shared ( stringify, readDataFile, fetchItem', warn )
import Text.Pandoc.ImageSize ( imageSize, sizeInPoints )
-import Text.Pandoc.MIME ( getMimeType )
+import Text.Pandoc.MIME ( getMimeType, extensionFromMimeType )
import Text.Pandoc.Definition
import Text.Pandoc.Walk
import Text.Pandoc.Writers.Shared ( fixDisplayMath )
@@ -51,7 +51,7 @@ import Text.Pandoc.XML
import Text.Pandoc.Pretty
import qualified Control.Exception as E
import Data.Time.Clock.POSIX ( getPOSIXTime )
-import System.FilePath ( takeExtension, takeDirectory )
+import System.FilePath ( takeExtension, takeDirectory, (<.>))
-- | Produce an ODT file from a Pandoc document.
writeODT :: WriterOptions -- ^ Writer options
@@ -127,23 +127,27 @@ writeODT opts doc@(Pandoc meta _) = do
return $ fromArchive archive''
transformPicMath :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline
-transformPicMath opts entriesRef (Image lab (src,_)) = do
+transformPicMath opts entriesRef (Image lab (src,t)) = do
res <- fetchItem' (writerMediaBag opts) (writerSourceURL opts) src
case res of
Left (_ :: E.SomeException) -> do
warn $ "Could not find image `" ++ src ++ "', skipping..."
return $ Emph lab
- Right (img, _) -> do
+ Right (img, mbMimeType) -> do
let size = imageSize img
let (w,h) = fromMaybe (0,0) $ sizeInPoints `fmap` size
let tit' = show w ++ "x" ++ show h
entries <- readIORef entriesRef
- let newsrc = "Pictures/" ++ show (length entries) ++ takeExtension src
+ let extension = fromMaybe (takeExtension $ takeWhile (/='?') src)
+ (mbMimeType >>= extensionFromMimeType)
+ let newsrc = "Pictures/" ++ show (length entries) <.> extension
let toLazy = B.fromChunks . (:[])
epochtime <- floor `fmap` getPOSIXTime
let entry = toEntry newsrc epochtime $ toLazy img
modifyIORef entriesRef (entry:)
- return $ Image lab (newsrc, tit')
+ let fig | "fig:" `isPrefixOf` t = "fig:"
+ | otherwise = ""
+ return $ Image lab (newsrc, fig++tit')
transformPicMath _ entriesRef (Math t math) = do
entries <- readIORef entriesRef
let dt = if t == InlineMath then DisplayInline else DisplayBlock
diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs
index dd359f3f5..5c8ef8c45 100644
--- a/src/Text/Pandoc/Writers/OPML.hs
+++ b/src/Text/Pandoc/Writers/OPML.hs
@@ -38,7 +38,7 @@ import Text.Pandoc.Writers.HTML (writeHtmlString)
import Text.Pandoc.Writers.Markdown (writeMarkdown)
import Text.Pandoc.Pretty
import Data.Time
-import System.Locale (defaultTimeLocale)
+import Text.Pandoc.Compat.Locale (defaultTimeLocale)
import qualified Text.Pandoc.Builder as B
-- | Convert Pandoc document to string in OPML format.
@@ -87,4 +87,3 @@ elementToOPML opts (Sec _ _num _ title elements) =
| not (null blocks)]
in inTags True "outline" attrs $
vcat (map (elementToOPML opts) rest)
-
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index 773d142f4..aee656413 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE PatternGuards, OverloadedStrings #-}
+{-# LANGUAGE PatternGuards, OverloadedStrings, FlexibleContexts #-}
{-
Copyright (C) 2008-2014 Andrea Rossato <andrea.rossato@ing.unitn.it>
and John MacFarlane.
@@ -288,6 +288,8 @@ blockToOpenDocument o bs
| Plain b <- bs = if null b
then return empty
else inParagraphTags =<< inlinesToOpenDocument o b
+ | Para [Image c (s,'f':'i':'g':':':t)] <- bs
+ = figure c s t
| Para b <- bs = if null b
then return empty
else inParagraphTags =<< inlinesToOpenDocument o b
@@ -334,7 +336,7 @@ blockToOpenDocument o bs
mapM_ addParaStyle . newPara $ paraHStyles ++ paraStyles
captionDoc <- if null c
then return empty
- else withParagraphStyle o "Caption" [Para c]
+ else withParagraphStyle o "TableCaption" [Para c]
th <- if all null h
then return empty
else colHeadsToOpenDocument o name (map fst paraHStyles) h
@@ -342,6 +344,12 @@ blockToOpenDocument o bs
return $ inTags True "table:table" [ ("table:name" , name)
, ("table:style-name", name)
] (vcat columns $$ th $$ vcat tr) $$ captionDoc
+ figure caption source title | null caption =
+ withParagraphStyle o "Figure" [Para [Image caption (source,title)]]
+ | otherwise = do
+ imageDoc <- withParagraphStyle o "FigureWithCaption" [Para [Image caption (source,title)]]
+ captionDoc <- withParagraphStyle o "FigureCaption" [Para caption]
+ return $ imageDoc $$ captionDoc
colHeadsToOpenDocument :: WriterOptions -> String -> [String] -> [[Block]] -> State WriterState Doc
colHeadsToOpenDocument o tn ns hs =
@@ -370,7 +378,7 @@ inlineToOpenDocument :: WriterOptions -> Inline -> State WriterState Doc
inlineToOpenDocument o ils
| Space <- ils = inTextStyle space
| Span _ xs <- ils = inlinesToOpenDocument o xs
- | LineBreak <- ils = return $ selfClosingTag "text:line-break" []
+ | LineBreak <- ils = return $ selfClosingTag "text:line-break" [] <> cr
| Str s <- ils = inTextStyle $ handleSpaces $ escapeStringForXML s
| Emph l <- ils = withTextStyle Italic $ inlinesToOpenDocument o l
| Strong l <- ils = withTextStyle Bold $ inlinesToOpenDocument o l
@@ -553,4 +561,3 @@ textStyleAttr s
,("style:font-name-asian" ,"Courier New")
,("style:font-name-complex" ,"Courier New")]
| otherwise = []
-
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index 57ebfc360..2dd899680 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -48,11 +48,13 @@ import Data.Char (isSpace, toLower)
type Refs = [([Inline], Target)]
data WriterState =
- WriterState { stNotes :: [[Block]]
- , stLinks :: Refs
- , stImages :: [([Inline], (String, String, Maybe String))]
- , stHasMath :: Bool
- , stOptions :: WriterOptions
+ WriterState { stNotes :: [[Block]]
+ , stLinks :: Refs
+ , stImages :: [([Inline], (String, String, Maybe String))]
+ , stHasMath :: Bool
+ , stHasRawTeX :: Bool
+ , stOptions :: WriterOptions
+ , stTopLevel :: Bool
}
-- | Convert Pandoc to RST.
@@ -60,7 +62,8 @@ writeRST :: WriterOptions -> Pandoc -> String
writeRST opts document =
let st = WriterState { stNotes = [], stLinks = [],
stImages = [], stHasMath = False,
- stOptions = opts }
+ stHasRawTeX = False, stOptions = opts,
+ stTopLevel = True}
in evalState (pandocToRST document) st
-- | Return RST representation of document.
@@ -78,23 +81,32 @@ pandocToRST (Pandoc meta blocks) = do
(fmap (render colwidth) . blockListToRST)
(fmap (trimr . render colwidth) . inlineListToRST)
$ deleteMeta "title" $ deleteMeta "subtitle" meta
- body <- blockListToRST blocks
+ body <- blockListToRST' True $ normalizeHeadings 1 blocks
notes <- liftM (reverse . stNotes) get >>= notesToRST
-- note that the notes may contain refs, so we do them first
refs <- liftM (reverse . stLinks) get >>= refsToRST
pics <- liftM (reverse . stImages) get >>= pictRefsToRST
hasMath <- liftM stHasMath get
+ rawTeX <- liftM stHasRawTeX get
let main = render colwidth $ foldl ($+$) empty $ [body, notes, refs, pics]
let context = defField "body" main
$ defField "toc" (writerTableOfContents opts)
- $ defField "toc-depth" (writerTOCDepth opts)
+ $ defField "toc-depth" (show $ writerTOCDepth opts)
$ defField "math" hasMath
$ defField "title" (render Nothing title :: String)
$ defField "math" hasMath
+ $ defField "rawtex" rawTeX
$ metadata
if writerStandalone opts
then return $ renderTemplate' (writerTemplate opts) context
else return main
+ where
+ normalizeHeadings lev (Header l a i:bs) = Header lev a i:normalizeHeadings (lev+1) cont ++ normalizeHeadings lev bs'
+ where (cont,bs') = break (headerLtEq l) bs
+ headerLtEq level (Header l' _ _) = l' <= level
+ headerLtEq _ _ = False
+ normalizeHeadings lev (b:bs) = b:normalizeHeadings lev bs
+ normalizeHeadings _ [] = []
-- | Return RST representation of reference key table.
refsToRST :: Refs -> State WriterState Doc
@@ -105,7 +117,7 @@ keyToRST :: ([Inline], (String, String))
-> State WriterState Doc
keyToRST (label, (src, _)) = do
label' <- inlineListToRST label
- let label'' = if ':' `elem` (render Nothing label')
+ let label'' = if ':' `elem` ((render Nothing label') :: String)
then char '`' <> label' <> char '`'
else label'
return $ nowrap $ ".. _" <> label'' <> ": " <> text src
@@ -173,11 +185,11 @@ blockToRST (Para [Image txt (src,'f':'i':'g':':':tit)]) = do
capt <- inlineListToRST txt
let fig = "figure:: " <> text src
let alt = ":alt: " <> if null tit then capt else text tit
- return $ hang 3 ".. " $ fig $$ alt $+$ capt $$ blankline
+ return $ hang 3 ".. " (fig $$ alt $+$ capt) $$ blankline
blockToRST (Para inlines)
| LineBreak `elem` inlines = do -- use line block if LineBreaks
lns <- mapM inlineListToRST $ splitBy (==LineBreak) inlines
- return $ (vcat $ map (text "| " <>) lns) <> blankline
+ return $ (vcat $ map (hang 2 (text "| ")) lns) <> blankline
| otherwise = do
contents <- inlineListToRST inlines
return $ contents <> blankline
@@ -188,11 +200,21 @@ blockToRST (RawBlock f@(Format f') str)
(nest 3 $ text str) $$ blankline
blockToRST HorizontalRule =
return $ blankline $$ "--------------" $$ blankline
-blockToRST (Header level _ inlines) = do
+blockToRST (Header level (name,classes,_) inlines) = do
contents <- inlineListToRST inlines
- let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1)
- let border = text $ replicate (offset contents) headerChar
- return $ nowrap $ contents $$ border $$ blankline
+ isTopLevel <- gets stTopLevel
+ if isTopLevel
+ then do
+ let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1)
+ let border = text $ replicate (offset contents) headerChar
+ return $ nowrap $ contents $$ border $$ blankline
+ else do
+ let rub = "rubric:: " <> contents
+ let name' | null name = empty
+ | otherwise = ":name: " <> text name
+ let cls | null classes = empty
+ | otherwise = ":class: " <> text (unwords classes)
+ return $ nowrap $ hang 3 ".. " (rub $$ name' $$ cls) $$ blankline
blockToRST (CodeBlock (_,classes,kvs) str) = do
opts <- stOptions <$> get
let tabstop = writerTabStop opts
@@ -239,8 +261,7 @@ blockToRST (Table caption _ widths headers rows) = do
middle = hcat $ intersperse sep' blocks
let makeRow = hpipeBlocks . zipWith lblock widthsInChars
let head' = makeRow headers'
- rows' <- mapM (\row -> do cols <- mapM blockListToRST row
- return $ makeRow cols) rows
+ let rows' = map makeRow rawRows
let border ch = char '+' <> char ch <>
(hcat $ intersperse (char ch <> char '+' <> char ch) $
map (\l -> text $ replicate l ch) widthsInChars) <>
@@ -253,7 +274,7 @@ blockToRST (Table caption _ widths headers rows) = do
blockToRST (BulletList items) = do
contents <- mapM bulletListItemToRST items
-- ensure that sublists have preceding blank line
- return $ blankline $$ vcat contents $$ blankline
+ return $ blankline $$ chomp (vcat contents) $$ blankline
blockToRST (OrderedList (start, style', delim) items) = do
let markers = if start == 1 && style' == DefaultStyle && delim == DefaultDelim
then take (length items) $ repeat "#."
@@ -265,11 +286,11 @@ blockToRST (OrderedList (start, style', delim) items) = do
contents <- mapM (\(item, num) -> orderedListItemToRST item num) $
zip markers' items
-- ensure that sublists have preceding blank line
- return $ blankline $$ vcat contents $$ blankline
+ return $ blankline $$ chomp (vcat contents) $$ blankline
blockToRST (DefinitionList items) = do
contents <- mapM definitionListItemToRST items
-- ensure that sublists have preceding blank line
- return $ blankline $$ vcat contents $$ blankline
+ return $ blankline $$ chomp (vcat contents) $$ blankline
-- | Convert bullet list item (list of blocks) to RST.
bulletListItemToRST :: [Block] -> State WriterState Doc
@@ -295,9 +316,19 @@ definitionListItemToRST (label, defs) = do
return $ label' $$ nest tabstop (nestle contents <> cr)
-- | Convert list of Pandoc block elements to RST.
+blockListToRST' :: Bool
+ -> [Block] -- ^ List of block elements
+ -> State WriterState Doc
+blockListToRST' topLevel blocks = do
+ tl <- gets stTopLevel
+ modify (\s->s{stTopLevel=topLevel})
+ res <- vcat `fmap` mapM blockToRST blocks
+ modify (\s->s{stTopLevel=tl})
+ return res
+
blockListToRST :: [Block] -- ^ List of block elements
-> State WriterState Doc
-blockListToRST blocks = mapM blockToRST blocks >>= return . vcat
+blockListToRST = blockListToRST' False
-- | Convert list of Pandoc inline elements to RST.
inlineListToRST :: [Inline] -> State WriterState Doc
@@ -334,12 +365,12 @@ inlineListToRST lst =
okAfterComplex :: Inline -> Bool
okAfterComplex Space = True
okAfterComplex LineBreak = True
- okAfterComplex (Str (c:_)) = isSpace c || c `elem` "-.,:;!?\\/'\")]}>–—"
+ okAfterComplex (Str (c:_)) = isSpace c || c `elem` ("-.,:;!?\\/'\")]}>–—" :: String)
okAfterComplex _ = False
okBeforeComplex :: Inline -> Bool
okBeforeComplex Space = True
okBeforeComplex LineBreak = True
- okBeforeComplex (Str (c:_)) = isSpace c || c `elem` "-:/'\"<([{–—"
+ okBeforeComplex (Str (c:_)) = isSpace c || c `elem` ("-:/'\"<([{–—" :: String)
okBeforeComplex _ = False
isComplex :: Inline -> Bool
isComplex (Emph _) = True
@@ -393,6 +424,9 @@ inlineToRST (Math t str) = do
else blankline $$ (".. math:: " <> text str) $$ blankline
inlineToRST (RawInline f x)
| f == "rst" = return $ text x
+ | f == "latex" || f == "tex" = do
+ modify $ \st -> st{ stHasRawTeX = True }
+ return $ ":raw-latex:`" <> text x <> "`"
| otherwise = return empty
inlineToRST (LineBreak) = return cr -- there's no line break in RST (see Para)
inlineToRST Space = return space
@@ -427,7 +461,7 @@ inlineToRST (Image alternate (source, tit)) = do
return $ "|" <> label <> "|"
inlineToRST (Note contents) = do
-- add to notes in state
- notes <- get >>= return . stNotes
+ notes <- gets stNotes
modify $ \st -> st { stNotes = contents:notes }
let ref = show $ (length notes) + 1
return $ " [" <> text ref <> "]_"
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
index 43405ce3c..dfad4b0e2 100644
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -106,7 +106,9 @@ writeRTF options (Pandoc meta@(Meta metamap) blocks) =
$ metadata
in if writerStandalone options
then renderTemplate' (writerTemplate options) context
- else body
+ else case reverse body of
+ ('\n':_) -> body
+ _ -> body ++ "\n"
-- | Construct table of contents from list of header blocks.
tableOfContents :: [Block] -> String
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
index 8ac717bab..792718e95 100644
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -368,7 +368,7 @@ inlineListForNode = return . text . stringToTexinfo .
-- periods, commas, colons, and parentheses are disallowed in node names
disallowedInNode :: Char -> Bool
-disallowedInNode c = c `elem` ".,:()"
+disallowedInNode c = c `elem` (".,:()" :: String)
-- | Convert inline element to Texinfo
inlineToTexinfo :: Inline -- ^ Inline to convert
@@ -421,8 +421,8 @@ inlineToTexinfo (RawInline f str)
return $ text "@tex" $$ text str $$ text "@end tex"
| f == "texinfo" = return $ text str
| otherwise = return empty
-inlineToTexinfo (LineBreak) = return $ text "@*"
-inlineToTexinfo Space = return $ char ' '
+inlineToTexinfo (LineBreak) = return $ text "@*" <> cr
+inlineToTexinfo Space = return space
inlineToTexinfo (Link txt (src@('#':_), _)) = do
contents <- escapeCommas $ inlineListToTexinfo txt