aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc.hs38
-rw-r--r--src/Text/Pandoc/Error.hs64
-rw-r--r--src/Text/Pandoc/ImageSize.hs68
-rw-r--r--src/Text/Pandoc/MediaBag.hs6
-rw-r--r--src/Text/Pandoc/Options.hs33
-rw-r--r--src/Text/Pandoc/PDF.hs22
-rw-r--r--src/Text/Pandoc/Parsing.hs80
-rw-r--r--src/Text/Pandoc/Pretty.hs18
-rw-r--r--src/Text/Pandoc/Readers/CommonMark.hs119
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs31
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs27
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs80
-rw-r--r--src/Text/Pandoc/Readers/Docx/StyleMap.hs106
-rw-r--r--src/Text/Pandoc/Readers/Docx/Util.hs26
-rw-r--r--src/Text/Pandoc/Readers/EPUB.hs37
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs21
-rw-r--r--src/Text/Pandoc/Readers/Haddock.hs8
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs225
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs752
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs20
-rw-r--r--src/Text/Pandoc/Readers/Native.hs40
-rw-r--r--src/Text/Pandoc/Readers/OPML.hs48
-rw-r--r--src/Text/Pandoc/Readers/Org.hs488
-rw-r--r--src/Text/Pandoc/Readers/RST.hs8
-rw-r--r--src/Text/Pandoc/Readers/TWiki.hs5
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs3
-rw-r--r--src/Text/Pandoc/Readers/Txt2Tags.hs5
-rw-r--r--src/Text/Pandoc/Shared.hs16
-rw-r--r--src/Text/Pandoc/Writers/AsciiDoc.hs6
-rw-r--r--src/Text/Pandoc/Writers/CommonMark.hs178
-rw-r--r--src/Text/Pandoc/Writers/Docbook.hs3
-rw-r--r--src/Text/Pandoc/Writers/Docx.hs278
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs7
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs20
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs25
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs108
-rw-r--r--src/Text/Pandoc/Writers/ODT.hs6
-rw-r--r--src/Text/Pandoc/Writers/OpenDocument.hs11
-rw-r--r--src/Text/Pandoc/Writers/RST.hs61
39 files changed, 1956 insertions, 1141 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index d2bb85699..dd361f8d7 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -37,10 +37,12 @@ inline links:
> module Main where
> import Text.Pandoc
+> import Text.Pandoc.Error (handleError)
>
> markdownToRST :: String -> String
-> markdownToRST =
-> (writeRST def {writerReferenceLinks = True}) . readMarkdown def
+> markdownToRST = handleError .
+> writeRST def {writerReferenceLinks = True} .
+> readMarkdown def
>
> main = getContents >>= putStrLn . markdownToRST
@@ -66,6 +68,7 @@ module Text.Pandoc
, mkStringReader
, readDocx
, readMarkdown
+ , readCommonMark
, readMediaWiki
, readRST
, readOrg
@@ -109,6 +112,7 @@ module Text.Pandoc
, writeOrg
, writeAsciiDoc
, writeHaddock
+ , writeCommonMark
, writeCustom
-- * Rendering templates and default templates
, module Text.Pandoc.Templates
@@ -124,6 +128,7 @@ import Text.Pandoc.Definition
import Text.Pandoc.Generic
import Text.Pandoc.JSON
import Text.Pandoc.Readers.Markdown
+import Text.Pandoc.Readers.CommonMark
import Text.Pandoc.Readers.MediaWiki
import Text.Pandoc.Readers.RST
import Text.Pandoc.Readers.Org
@@ -161,11 +166,13 @@ import Text.Pandoc.Writers.Textile
import Text.Pandoc.Writers.Org
import Text.Pandoc.Writers.AsciiDoc
import Text.Pandoc.Writers.Haddock
+import Text.Pandoc.Writers.CommonMark
import Text.Pandoc.Writers.Custom
import Text.Pandoc.Templates
import Text.Pandoc.Options
-import Text.Pandoc.Shared (safeRead, warn)
+import Text.Pandoc.Shared (safeRead, warn, mapLeft)
import Text.Pandoc.MediaBag (MediaBag)
+import Text.Pandoc.Error
import Data.Aeson
import qualified Data.ByteString.Lazy as BL
import Data.List (intercalate)
@@ -201,19 +208,22 @@ parseFormatSpec = parse formatSpec ""
'-' -> Set.delete ext
_ -> Set.insert ext
-data Reader = StringReader (ReaderOptions -> String -> IO Pandoc)
- | ByteStringReader (ReaderOptions -> BL.ByteString -> IO (Pandoc, MediaBag))
-mkStringReader :: (ReaderOptions -> String -> Pandoc) -> Reader
+data Reader = StringReader (ReaderOptions -> String -> IO (Either PandocError Pandoc))
+ | ByteStringReader (ReaderOptions -> BL.ByteString -> IO (Either PandocError (Pandoc,MediaBag)))
+
+mkStringReader :: (ReaderOptions -> String -> (Either PandocError Pandoc)) -> Reader
mkStringReader r = StringReader (\o s -> return $ r o s)
-mkStringReaderWithWarnings :: (ReaderOptions -> String -> (Pandoc, [String])) -> Reader
+mkStringReaderWithWarnings :: (ReaderOptions -> String -> Either PandocError (Pandoc, [String])) -> Reader
mkStringReaderWithWarnings r = StringReader $ \o s -> do
- let (doc, warnings) = r o s
- mapM_ warn warnings
- return doc
+ case r o s of
+ Left err -> return $ Left err
+ Right (doc, warnings) -> do
+ mapM_ warn warnings
+ return (Right doc)
-mkBSReader :: (ReaderOptions -> BL.ByteString -> (Pandoc, MediaBag)) -> Reader
+mkBSReader :: (ReaderOptions -> BL.ByteString -> (Either PandocError (Pandoc, MediaBag))) -> Reader
mkBSReader r = ByteStringReader (\o s -> return $ r o s)
-- | Association list of formats and readers.
@@ -225,6 +235,7 @@ readers = [ ("native" , StringReader $ \_ s -> return $ readNative s)
,("markdown_phpextra" , mkStringReaderWithWarnings readMarkdownWithWarnings)
,("markdown_github" , mkStringReaderWithWarnings readMarkdownWithWarnings)
,("markdown_mmd", mkStringReaderWithWarnings readMarkdownWithWarnings)
+ ,("commonmark" , mkStringReader readCommonMark)
,("rst" , mkStringReaderWithWarnings readRSTWithWarnings )
,("mediawiki" , mkStringReader readMediaWiki)
,("docbook" , mkStringReader readDocBook)
@@ -296,6 +307,7 @@ writers = [
,("org" , PureStringWriter writeOrg)
,("asciidoc" , PureStringWriter writeAsciiDoc)
,("haddock" , PureStringWriter writeHaddock)
+ ,("commonmark" , PureStringWriter writeCommonMark)
]
getDefaultExtensions :: String -> Set Extension
@@ -357,8 +369,8 @@ class ToJSONFilter a => ToJsonFilter a
where toJsonFilter :: a -> IO ()
toJsonFilter = toJSONFilter
-readJSON :: ReaderOptions -> String -> Pandoc
-readJSON _ = either error id . eitherDecode' . UTF8.fromStringLazy
+readJSON :: ReaderOptions -> String -> Either PandocError Pandoc
+readJSON _ = mapLeft ParseFailure . eitherDecode' . UTF8.fromStringLazy
writeJSON :: WriterOptions -> Pandoc -> String
writeJSON _ = UTF8.toStringLazy . encode
diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs
new file mode 100644
index 000000000..73d1e8f08
--- /dev/null
+++ b/src/Text/Pandoc/Error.hs
@@ -0,0 +1,64 @@
+{-
+Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+{- |
+ Module : Text.Pandoc.Error
+ Copyright : Copyright (C) 2006-2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+This module provides a standard way to deal with possible errors encounted
+during parsing.
+
+-}
+module Text.Pandoc.Error (PandocError(..), handleError) where
+
+import Text.Parsec.Error
+import Text.Parsec.Pos hiding (Line)
+import Text.Pandoc.Compat.Except
+
+type Input = String
+
+data PandocError = -- | Generic parse failure
+ ParseFailure String
+ -- | Error thrown by a Parsec parser
+ | ParsecError Input ParseError
+ deriving (Show)
+
+
+instance Error PandocError where
+ strMsg = ParseFailure
+
+
+-- | An unsafe method to handle `PandocError`s.
+handleError :: Either PandocError a -> a
+handleError (Right r) = r
+handleError (Left err) =
+ case err of
+ ParseFailure string -> error string
+ ParsecError input err' ->
+ let errPos = errorPos err'
+ errLine = sourceLine errPos
+ errColumn = sourceColumn errPos
+ theline = (lines input ++ [""]) !! (errLine - 1)
+ in error $ "\nError at " ++ show err' ++ "\n" ++
+ theline ++ "\n" ++ replicate (errColumn - 1) ' ' ++
+ "^"
+
diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
index 68b34dcf3..8f0a991ba 100644
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE OverloadedStrings, ScopedTypeVariables #-}
+{-# OPTIONS_GHC -fno-warn-type-defaults #-}
{-
Copyright (C) 2011-2014 John MacFarlane <jgm@berkeley.edu>
@@ -38,8 +39,11 @@ import Control.Monad
import Data.Bits
import Data.Binary
import Data.Binary.Get
-import Text.Pandoc.Shared (safeRead)
+import Text.Pandoc.Shared (safeRead, hush)
import qualified Data.Map as M
+import Text.Pandoc.Compat.Except
+import Control.Monad.Trans
+import Data.Maybe (fromMaybe)
-- quick and dirty functions to get image sizes
-- algorithms borrowed from wwwis.pl
@@ -64,7 +68,7 @@ imageType img = case B.take 4 img of
"%!PS"
| (B.take 4 $ B.drop 1 $ B.dropWhile (/=' ') img) == "EPSF"
-> return Eps
- _ -> fail "Unknown image type"
+ _ -> (hush . Left) "Unknown image type"
imageSize :: ByteString -> Maybe ImageSize
imageSize img = do
@@ -114,7 +118,7 @@ pngSize img = do
([w1,w2,w3,w4,h1,h2,h3,h4] :: [Integer]) -> return
((shift w1 24) + (shift w2 16) + (shift w3 8) + w4,
(shift h1 24) + (shift h2 16) + (shift h3 8) + h4)
- _ -> fail "PNG parse error"
+ _ -> (hush . Left) "PNG parse error"
let (dpix, dpiy) = findpHYs rest''
return $ ImageSize { pxX = x, pxY = y, dpiX = dpix, dpiY = dpiy }
@@ -143,7 +147,7 @@ gifSize img = do
dpiX = 72,
dpiY = 72
}
- _ -> fail "GIF parse error"
+ _ -> (hush . Left) "GIF parse error"
jpegSize :: ByteString -> Maybe ImageSize
jpegSize img = do
@@ -174,36 +178,37 @@ findJfifSize bs = do
Just (c,bs'') | c >= '\xc0' && c <= '\xc3' -> do
case map fromIntegral $ unpack $ B.take 4 $ B.drop 3 bs'' of
[h1,h2,w1,w2] -> return (shift w1 8 + w2, shift h1 8 + h2)
- _ -> fail "JPEG parse error"
+ _ -> (hush . Left) "JPEG parse error"
Just (_,bs'') -> do
case map fromIntegral $ unpack $ B.take 2 bs'' of
[c1,c2] -> do
let len = shift c1 8 + c2
-- skip variables
findJfifSize $ B.drop len bs''
- _ -> fail "JPEG parse error"
- Nothing -> fail "Did not find length record"
+ _ -> (hush . Left) "JPEG parse error"
+ Nothing -> (hush . Left) "Did not find length record"
exifSize :: ByteString -> Maybe ImageSize
-exifSize bs = runGet (Just <$> exifHeader bl) bl
+exifSize bs = hush . runGet header $ bl
where bl = BL.fromChunks [bs]
+ header = runExceptT $ exifHeader bl
-- NOTE: It would be nicer to do
-- runGet ((Just <$> exifHeader) <|> return Nothing)
-- which would prevent pandoc from raising an error when an exif header can't
-- be parsed. But we only get an Alternative instance for Get in binary 0.6,
-- and binary 0.5 ships with ghc 7.6.
-exifHeader :: BL.ByteString -> Get ImageSize
+exifHeader :: BL.ByteString -> ExceptT String Get ImageSize
exifHeader hdr = do
- _app1DataSize <- getWord16be
- exifHdr <- getWord32be
- unless (exifHdr == 0x45786966) $ fail "Did not find exif header"
- zeros <- getWord16be
- unless (zeros == 0) $ fail "Expected zeros after exif header"
+ _app1DataSize <- lift getWord16be
+ exifHdr <- lift getWord32be
+ unless (exifHdr == 0x45786966) $ throwError "Did not find exif header"
+ zeros <- lift getWord16be
+ unless (zeros == 0) $ throwError "Expected zeros after exif header"
-- beginning of tiff header -- we read whole thing to use
-- in getting data from offsets:
let tiffHeader = BL.drop 8 hdr
- byteAlign <- getWord16be
+ byteAlign <- lift getWord16be
let bigEndian = byteAlign == 0x4d4d
let (getWord16, getWord32, getWord64) =
if bigEndian
@@ -213,17 +218,17 @@ exifHeader hdr = do
num <- getWord32
den <- getWord32
return $ fromIntegral num / fromIntegral den
- tagmark <- getWord16
- unless (tagmark == 0x002a) $ fail "Failed alignment sanity check"
- ifdOffset <- getWord32
- skip (fromIntegral ifdOffset - 8) -- skip to IDF
- numentries <- getWord16
- let ifdEntry = do
- tag <- getWord16 >>= \t ->
- maybe (return UnknownTagType) return
- (M.lookup t tagTypeTable)
- dataFormat <- getWord16
- numComponents <- getWord32
+ tagmark <- lift getWord16
+ unless (tagmark == 0x002a) $ throwError "Failed alignment sanity check"
+ ifdOffset <- lift getWord32
+ lift $ skip (fromIntegral ifdOffset - 8) -- skip to IDF
+ numentries <- lift getWord16
+ let ifdEntry :: ExceptT String Get (TagType, DataFormat)
+ ifdEntry = do
+ tag <- fromMaybe UnknownTagType . flip M.lookup tagTypeTable
+ <$> lift getWord16
+ dataFormat <- lift getWord16
+ numComponents <- lift getWord32
(fmt, bytesPerComponent) <-
case dataFormat of
1 -> return (UnsignedByte . runGet getWord8, 1)
@@ -238,9 +243,10 @@ exifHeader hdr = do
10 -> return (SignedRational . runGet getRational, 8)
11 -> return (SingleFloat . runGet getWord32 {- TODO -}, 4)
12 -> return (DoubleFloat . runGet getWord64 {- TODO -}, 8)
- _ -> fail $ "Unknown data format " ++ show dataFormat
+ _ -> throwError $ "Unknown data format " ++ show dataFormat
let totalBytes = fromIntegral $ numComponents * bytesPerComponent
- payload <- if totalBytes <= 4 -- data is right here
+ payload <- lift $
+ if totalBytes <= 4 -- data is right here
then fmt <$>
(getLazyByteString (fromIntegral totalBytes) <*
skip (4 - totalBytes))
@@ -252,9 +258,9 @@ exifHeader hdr = do
entries <- sequence $ replicate (fromIntegral numentries) ifdEntry
subentries <- case lookup ExifOffset entries of
Just (UnsignedLong offset) -> do
- pos <- bytesRead
- skip (fromIntegral offset - (fromIntegral pos - 8))
- numsubentries <- getWord16
+ pos <- lift bytesRead
+ lift $ skip (fromIntegral offset - (fromIntegral pos - 8))
+ numsubentries <- lift getWord16
sequence $
replicate (fromIntegral numsubentries) ifdEntry
_ -> return []
diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs
index a55d5417e..1246cdc8f 100644
--- a/src/Text/Pandoc/MediaBag.hs
+++ b/src/Text/Pandoc/MediaBag.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving, DeriveDataTypeable #-}
{-
Copyright (C) 2014 John MacFarlane <jgm@berkeley.edu>
@@ -46,13 +46,15 @@ import Text.Pandoc.MIME (MimeType, getMimeTypeDef)
import qualified Text.Pandoc.UTF8 as UTF8
import Data.Maybe (fromMaybe)
import System.IO (stderr)
+import Data.Data (Data)
+import Data.Typeable (Typeable)
-- | A container for a collection of binary resources, with names and
-- mime types. Note that a 'MediaBag' is a Monoid, so 'mempty'
-- can be used for an empty 'MediaBag', and '<>' can be used to append
-- two 'MediaBag's.
newtype MediaBag = MediaBag (M.Map [String] (MimeType, BL.ByteString))
- deriving (Monoid)
+ deriving (Monoid, Data, Typeable)
instance Show MediaBag where
show bag = "MediaBag " ++ show (mediaDirectory bag)
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index 24e31fbb6..1776d14e5 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE DeriveDataTypeable #-}
{-
Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu>
@@ -51,6 +52,8 @@ import Data.Default
import Text.Pandoc.Highlighting (Style, pygments)
import Text.Pandoc.MediaBag (MediaBag)
import Data.Monoid
+import Data.Data (Data)
+import Data.Typeable (Typeable)
-- | Individually selectable syntax extensions.
data Extension =
@@ -74,7 +77,7 @@ data Extension =
| Ext_latex_macros -- ^ Parse LaTeX macro definitions (for math only)
| Ext_fenced_code_blocks -- ^ Parse fenced code blocks
| Ext_fenced_code_attributes -- ^ Allow attributes on fenced code blocks
- | Ext_backtick_code_blocks -- ^ Github style ``` code blocks
+ | Ext_backtick_code_blocks -- ^ GitHub style ``` code blocks
| Ext_inline_code_attributes -- ^ Allow attributes on inline code
| Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks
| Ext_native_divs -- ^ Use Div blocks for contents of <div> tags
@@ -109,7 +112,8 @@ data Extension =
| Ext_implicit_header_references -- ^ Implicit reference links for headers
| Ext_line_blocks -- ^ RST style line blocks
| Ext_epub_html_exts -- ^ Recognise the EPUB extended version of HTML
- deriving (Show, Read, Enum, Eq, Ord, Bounded)
+ | Ext_shortcut_reference_links -- ^ Shortcut reference links
+ deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable)
pandocExtensions :: Set Extension
pandocExtensions = Set.fromList
@@ -151,6 +155,7 @@ pandocExtensions = Set.fromList
, Ext_header_attributes
, Ext_implicit_header_references
, Ext_line_blocks
+ , Ext_shortcut_reference_links
]
phpMarkdownExtraExtensions :: Set Extension
@@ -164,6 +169,7 @@ phpMarkdownExtraExtensions = Set.fromList
, Ext_intraword_underscores
, Ext_header_attributes
, Ext_abbreviations
+ , Ext_shortcut_reference_links
]
githubMarkdownExtensions :: Set Extension
@@ -180,6 +186,7 @@ githubMarkdownExtensions = Set.fromList
, Ext_strikeout
, Ext_hard_line_breaks
, Ext_lists_without_preceding_blankline
+ , Ext_shortcut_reference_links
]
multimarkdownExtensions :: Set Extension
@@ -202,7 +209,9 @@ multimarkdownExtensions = Set.fromList
strictExtensions :: Set Extension
strictExtensions = Set.fromList
- [ Ext_raw_html ]
+ [ Ext_raw_html
+ , Ext_shortcut_reference_links
+ ]
data ReaderOptions = ReaderOptions{
readerExtensions :: Set Extension -- ^ Syntax extensions
@@ -220,7 +229,7 @@ data ReaderOptions = ReaderOptions{
, readerDefaultImageExtension :: String -- ^ Default extension for images
, readerTrace :: Bool -- ^ Print debugging info
, readerTrackChanges :: TrackChanges
-} deriving (Show, Read)
+} deriving (Show, Read, Data, Typeable)
instance Default ReaderOptions
where def = ReaderOptions{
@@ -242,7 +251,7 @@ instance Default ReaderOptions
-- Writer options
--
-data EPUBVersion = EPUB2 | EPUB3 deriving (Eq, Show, Read)
+data EPUBVersion = EPUB2 | EPUB3 deriving (Eq, Show, Read, Data, Typeable)
data HTMLMathMethod = PlainMath
| LaTeXMathML (Maybe String) -- url of LaTeXMathML.js
@@ -252,18 +261,18 @@ data HTMLMathMethod = PlainMath
| MathML (Maybe String) -- url of MathMLinHTML.js
| MathJax String -- url of MathJax.js
| KaTeX String String -- url of stylesheet and katex.js
- deriving (Show, Read, Eq)
+ deriving (Show, Read, Eq, Data, Typeable)
data CiteMethod = Citeproc -- use citeproc to render them
| Natbib -- output natbib cite commands
| Biblatex -- output biblatex cite commands
- deriving (Show, Read, Eq)
+ deriving (Show, Read, Eq, Data, Typeable)
-- | Methods for obfuscating email addresses in HTML.
data ObfuscationMethod = NoObfuscation
| ReferenceObfuscation
| JavascriptObfuscation
- deriving (Show, Read, Eq)
+ deriving (Show, Read, Eq, Data, Typeable)
-- | Varieties of HTML slide shows.
data HTMLSlideVariant = S5Slides
@@ -272,13 +281,13 @@ data HTMLSlideVariant = S5Slides
| DZSlides
| RevealJsSlides
| NoSlides
- deriving (Show, Read, Eq)
+ deriving (Show, Read, Eq, Data, Typeable)
-- | Options for accepting or rejecting MS Word track-changes.
data TrackChanges = AcceptChanges
| RejectChanges
| AllChanges
- deriving (Show, Read, Eq)
+ deriving (Show, Read, Eq, Data, Typeable)
-- | Options for writers
data WriterOptions = WriterOptions
@@ -324,7 +333,8 @@ data WriterOptions = WriterOptions
, writerReferenceDocx :: Maybe FilePath -- ^ Path to reference DOCX if specified
, writerMediaBag :: MediaBag -- ^ Media collected by docx or epub reader
, writerVerbose :: Bool -- ^ Verbose debugging output
- } deriving Show
+ , writerLaTeXArgs :: [String] -- ^ Flags to pass to latex-engine
+ } deriving (Show, Data, Typeable)
instance Default WriterOptions where
def = WriterOptions { writerStandalone = False
@@ -368,6 +378,7 @@ instance Default WriterOptions where
, writerReferenceDocx = Nothing
, writerMediaBag = mempty
, writerVerbose = False
+ , writerLaTeXArgs = []
}
-- | Returns True if the given extension is enabled.
diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index ea6699ac4..2d602a0df 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -71,7 +71,8 @@ makePDF :: String -- ^ pdf creator (pdflatex, lualatex, xelatex)
makePDF program writer opts doc = withTempDir "tex2pdf." $ \tmpdir -> do
doc' <- handleImages opts tmpdir doc
let source = writer opts doc'
- tex2pdf' (writerVerbose opts) tmpdir program source
+ args = writerLaTeXArgs opts
+ tex2pdf' (writerVerbose opts) args tmpdir program source
handleImages :: WriterOptions
-> FilePath -- ^ temp dir to store images
@@ -107,8 +108,7 @@ convertImages tmpdir (Image ils (src, tit)) = do
img <- convertImage tmpdir src
newPath <-
case img of
- Left e -> src <$
- warn ("Unable to convert image `" ++ src ++ "':\n" ++ e)
+ Left e -> src <$ warn e
Right fp -> return fp
return (Image ils (newPath, tit))
convertImages _ x = return x
@@ -122,7 +122,8 @@ convertImage tmpdir fname =
Just "application/pdf" -> doNothing
_ -> JP.readImage fname >>= \res ->
case res of
- Left msg -> return $ Left msg
+ Left _ -> return $ Left $ "Unable to convert `" ++
+ fname ++ "' for use with pdflatex."
Right img ->
E.catch (Right fileOut <$ JP.savePngImage fileOut img) $
\(e :: E.SomeException) -> return (Left (show e))
@@ -132,15 +133,16 @@ convertImage tmpdir fname =
doNothing = return (Right fname)
tex2pdf' :: Bool -- ^ Verbose output
+ -> [String] -- ^ Arguments to the latex-engine
-> FilePath -- ^ temp directory for output
-> String -- ^ tex program
-> String -- ^ tex source
-> IO (Either ByteString ByteString)
-tex2pdf' verbose tmpDir program source = do
+tex2pdf' verbose args tmpDir program source = do
let numruns = if "\\tableofcontents" `isInfixOf` source
then 3 -- to get page numbers
else 2 -- 1 run won't give you PDF bookmarks
- (exit, log', mbPdf) <- runTeXProgram verbose program 1 numruns tmpDir source
+ (exit, log', mbPdf) <- runTeXProgram verbose program args 1 numruns tmpDir source
case (exit, mbPdf) of
(ExitFailure _, _) -> do
let logmsg = extractMsg log'
@@ -173,9 +175,9 @@ extractMsg log' = do
-- Run a TeX program on an input bytestring and return (exit code,
-- contents of stdout, contents of produced PDF if any). Rerun
-- a fixed number of times to resolve references.
-runTeXProgram :: Bool -> String -> Int -> Int -> FilePath -> String
+runTeXProgram :: Bool -> String -> [String] -> Int -> Int -> FilePath -> String
-> IO (ExitCode, ByteString, Maybe ByteString)
-runTeXProgram verbose program runNumber numRuns tmpDir source = do
+runTeXProgram verbose program args runNumber numRuns tmpDir source = do
let file = tmpDir </> "input.tex"
exists <- doesFileExist file
unless exists $ UTF8.writeFile file source
@@ -188,7 +190,7 @@ runTeXProgram verbose program runNumber numRuns tmpDir source = do
let file' = file
#endif
let programArgs = ["-halt-on-error", "-interaction", "nonstopmode",
- "-output-directory", tmpDir', file']
+ "-output-directory", tmpDir', file'] ++ args
env' <- getEnvironment
let sep = searchPathSeparator:[]
let texinputs = maybe (tmpDir' ++ sep) ((tmpDir' ++ sep) ++)
@@ -212,7 +214,7 @@ runTeXProgram verbose program runNumber numRuns tmpDir source = do
B.hPutStr stderr err
putStr "\n"
if runNumber <= numRuns
- then runTeXProgram verbose program (runNumber + 1) numRuns tmpDir source
+ then runTeXProgram verbose program args (runNumber + 1) numRuns tmpDir source
else do
let pdfFile = replaceDirectory (replaceExtension file ".pdf") tmpDir
pdfExists <- doesFileExist pdfFile
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index facf4d3b9..33120e55d 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -65,7 +65,8 @@ module Text.Pandoc.Parsing ( anyLine,
widthsFromIndices,
gridTableWith,
readWith,
- readWithWarnings,
+ returnWarnings,
+ returnState,
readWithM,
testStringWith,
guardEnabled,
@@ -104,11 +105,8 @@ module Text.Pandoc.Parsing ( anyLine,
applyMacros',
Parser,
ParserT,
- F(..),
- runF,
- askF,
- asksF,
token,
+ generalize,
-- * Re-exports from Text.Pandoc.Parsec
Stream,
runParser,
@@ -163,7 +161,8 @@ module Text.Pandoc.Parsing ( anyLine,
setSourceColumn,
setSourceLine,
newPos,
- addWarning
+ addWarning,
+ (<+?>)
)
where
@@ -188,30 +187,16 @@ import Data.Default
import qualified Data.Set as Set
import Control.Monad.Reader
import Control.Monad.Identity
-import Control.Applicative ((<$>), (<*>), (*>), (<*), (<$), Applicative)
+import Control.Applicative ((<$>), (<*>), (*>), (<*), (<$))
import Data.Monoid
import Data.Maybe (catMaybes)
+import Text.Pandoc.Error
+
type Parser t s = Parsec t s
type ParserT = ParsecT
-newtype F a = F { unF :: Reader ParserState a } deriving (Monad, Applicative, Functor)
-
-runF :: F a -> ParserState -> a
-runF = runReader . unF
-
-askF :: F ParserState
-askF = F ask
-
-asksF :: (ParserState -> a) -> F a
-asksF f = F $ asks f
-
-instance Monoid a => Monoid (F a) where
- mempty = return mempty
- mappend = liftM2 mappend
- mconcat = liftM mconcat . sequence
-
-- | Parse any line of text
anyLine :: Stream [Char] m Char => ParserT [Char] st m [Char]
anyLine = do
@@ -863,36 +848,30 @@ readWithM :: (Monad m, Functor m)
=> ParserT [Char] st m a -- ^ parser
-> st -- ^ initial state
-> String -- ^ input
- -> m a
+ -> m (Either PandocError a)
readWithM parser state input =
- handleError <$> (runParserT parser state "source" input)
- where
- handleError (Left err') =
- let errPos = errorPos err'
- errLine = sourceLine errPos
- errColumn = sourceColumn errPos
- theline = (lines input ++ [""]) !! (errLine - 1)
- in error $ "\nError at " ++ show err' ++ "\n" ++
- theline ++ "\n" ++ replicate (errColumn - 1) ' ' ++
- "^"
- handleError (Right result) = result
+ mapLeft (ParsecError input) <$> runParserT parser state "source" input
+
-- | Parse a string with a given parser and state
readWith :: Parser [Char] st a
-> st
-> String
- -> a
+ -> Either PandocError a
readWith p t inp = runIdentity $ readWithM p t inp
-readWithWarnings :: Parser [Char] ParserState a
- -> ParserState
- -> String
- -> (a, [String])
-readWithWarnings p = readWith $ do
+returnWarnings :: (Stream s m c)
+ => ParserT s ParserState m a
+ -> ParserT s ParserState m (a, [String])
+returnWarnings p = do
doc <- p
warnings <- stateWarnings <$> getState
return (doc, warnings)
+-- | Return the final internal state with the result of a parser
+returnState :: (Stream s m c) => ParsecT s st m a -> ParsecT s st m (a, st)
+returnState p = (,) <$> p <*> getState
+
-- | Parse a string with @parser@ (for testing).
testStringWith :: (Show a, Stream [Char] Identity Char)
=> ParserT [Char] ParserState Identity a
@@ -914,7 +893,6 @@ data ParserState = ParserState
stateNotes :: NoteTable, -- ^ List of notes (raw bodies)
stateNotes' :: NoteTable', -- ^ List of notes (parsed bodies)
stateMeta :: Meta, -- ^ Document metadata
- stateMeta' :: F Meta, -- ^ Document metadata
stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used
stateHeaders :: M.Map Inlines String, -- ^ List of headers and ids (used for implicit ref links)
stateIdentifiers :: [String], -- ^ List of header identifiers used
@@ -929,7 +907,8 @@ data ParserState = ParserState
stateCaption :: Maybe Inlines, -- ^ Caption in current environment
stateInHtmlBlock :: Maybe String, -- ^ Tag type of HTML block being parsed
stateMarkdownAttribute :: Bool, -- ^ True if in markdown=1 context
- stateWarnings :: [String] -- ^ Warnings generated by the parser
+ stateWarnings :: [String], -- ^ Warnings generated by the parser
+ stateInFootnote :: Bool -- ^ True if in a footnote block.
}
instance Default ParserState where
@@ -1011,7 +990,6 @@ defaultParserState =
stateNotes = [],
stateNotes' = [],
stateMeta = nullMeta,
- stateMeta' = return nullMeta,
stateHeaderTable = [],
stateHeaders = M.empty,
stateIdentifiers = [],
@@ -1024,7 +1002,8 @@ defaultParserState =
stateCaption = Nothing,
stateInHtmlBlock = Nothing,
stateMarkdownAttribute = False,
- stateWarnings = []}
+ stateWarnings = [],
+ stateInFootnote = False }
-- | Succeed only if the extension is enabled.
guardEnabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m ()
@@ -1063,7 +1042,7 @@ data QuoteContext
type NoteTable = [(String, String)]
-type NoteTable' = [(String, F Blocks)] -- used in markdown reader
+type NoteTable' = [(String, Blocks)] -- used in markdown reader
newtype Key = Key String deriving (Show, Read, Eq, Ord)
@@ -1259,8 +1238,15 @@ applyMacros' target = do
else return target
-- | Append a warning to the log.
-addWarning :: Maybe SourcePos -> String -> Parser [Char] ParserState ()
+addWarning :: (Stream s m c) => Maybe SourcePos -> String -> ParserT s ParserState m ()
addWarning mbpos msg =
updateState $ \st -> st{
stateWarnings = (msg ++ maybe "" (\pos -> " " ++ show pos) mbpos) :
stateWarnings st }
+
+generalize :: (Monad m) => Parser s st a -> ParserT s st m a
+generalize m = mkPT (\ s -> (return $ (return . runIdentity) <$> runIdentity (runParsecT m s)))
+
+infixr 5 <+?>
+(<+?>) :: (Monoid a, Monad m) => ParserT s st m a -> ParserT s st m a -> ParserT s st m a
+a <+?> b = a >>= flip fmap (try b <|> return mempty) . (<>)
diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs
index 2f2656086..9a97dfc21 100644
--- a/src/Text/Pandoc/Pretty.hs
+++ b/src/Text/Pandoc/Pretty.hs
@@ -250,6 +250,11 @@ renderDoc :: (IsString a, Monoid a)
=> Doc -> DocState a
renderDoc = renderList . toList . unDoc
+data IsBlock = IsBlock Int [String]
+
+-- This would be nicer with a pattern synonym
+-- pattern VBlock i s <- mkIsBlock -> Just (IsBlock ..)
+
renderList :: (IsString a, Monoid a)
=> [D] -> DocState a
renderList [] = return ()
@@ -323,11 +328,11 @@ renderList (BreakingSpace : xs) = do
outp 1 " "
renderList xs'
-renderList (b1@Block{} : b2@Block{} : xs) =
- renderList (mergeBlocks False b1 b2 : xs)
+renderList (Block i1 s1 : Block i2 s2 : xs) =
+ renderList (mergeBlocks False (IsBlock i1 s1) (IsBlock i2 s2) : xs)
-renderList (b1@Block{} : BreakingSpace : b2@Block{} : xs) =
- renderList (mergeBlocks True b1 b2 : xs)
+renderList (Block i1 s1 : BreakingSpace : Block i2 s2 : xs) =
+ renderList (mergeBlocks True (IsBlock i1 s1) (IsBlock i2 s2) : xs)
renderList (Block width lns : xs) = do
st <- get
@@ -339,15 +344,14 @@ renderList (Block width lns : xs) = do
modify $ \s -> s{ prefix = oldPref }
renderList xs
-mergeBlocks :: Bool -> D -> D -> D
-mergeBlocks addSpace (Block w1 lns1) (Block w2 lns2) =
+mergeBlocks :: Bool -> IsBlock -> IsBlock -> D
+mergeBlocks addSpace (IsBlock w1 lns1) (IsBlock w2 lns2) =
Block (w1 + w2 + if addSpace then 1 else 0) $
zipWith (\l1 l2 -> pad w1 l1 ++ l2) (lns1 ++ empties) (map sp lns2 ++ empties)
where empties = replicate (abs $ length lns1 - length lns2) ""
pad n s = s ++ replicate (n - realLength s) ' '
sp "" = ""
sp xs = if addSpace then (' ' : xs) else xs
-mergeBlocks _ _ _ = error "mergeBlocks tried on non-Block!"
blockToDoc :: Int -> [String] -> Doc
blockToDoc _ lns = text $ intercalate "\n" lns
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
new file mode 100644
index 000000000..51a35c8ad
--- /dev/null
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -0,0 +1,119 @@
+{-
+Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.CommonMark
+ Copyright : Copyright (C) 2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of CommonMark-formatted plain text to 'Pandoc' document.
+
+CommonMark is a strongly specified variant of Markdown: http://commonmark.org.
+-}
+module Text.Pandoc.Readers.CommonMark (readCommonMark)
+where
+
+import CMark
+import Data.Text (unpack, pack)
+import Data.List (groupBy)
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Error
+
+-- | Parse a CommonMark formatted string into a 'Pandoc' structure.
+readCommonMark :: ReaderOptions -> String -> Either PandocError Pandoc
+readCommonMark opts = Right . nodeToPandoc . commonmarkToNode opts' . pack
+ where opts' = if readerSmart opts
+ then [optNormalize, optSmart]
+ else [optNormalize]
+
+nodeToPandoc :: Node -> Pandoc
+nodeToPandoc (Node _ DOCUMENT nodes) =
+ Pandoc nullMeta $ foldr addBlock [] nodes
+nodeToPandoc n = -- shouldn't happen
+ Pandoc nullMeta $ foldr addBlock [] [n]
+
+addBlocks :: [Node] -> [Block]
+addBlocks = foldr addBlock []
+
+addBlock :: Node -> [Block] -> [Block]
+addBlock (Node _ PARAGRAPH nodes) =
+ (Para (addInlines nodes) :)
+addBlock (Node _ HRULE _) =
+ (HorizontalRule :)
+addBlock (Node _ BLOCK_QUOTE nodes) =
+ (BlockQuote (addBlocks nodes) :)
+addBlock (Node _ (HTML t) _) =
+ (RawBlock (Format "html") (unpack t) :)
+addBlock (Node _ (CODE_BLOCK info t) _) =
+ (CodeBlock ("", take 1 (words (unpack info)), []) (unpack t) :)
+addBlock (Node _ (HEADER lev) nodes) =
+ (Header lev ("",[],[]) (addInlines nodes) :)
+addBlock (Node _ (LIST listAttrs) nodes) =
+ (constructor (map (setTightness . addBlocks . children) nodes) :)
+ where constructor = case listType listAttrs of
+ BULLET_LIST -> BulletList
+ ORDERED_LIST -> OrderedList
+ (start, DefaultStyle, delim)
+ start = listStart listAttrs
+ setTightness = if listTight listAttrs
+ then map paraToPlain
+ else id
+ paraToPlain (Para xs) = Plain (xs)
+ paraToPlain x = x
+ delim = case listDelim listAttrs of
+ PERIOD_DELIM -> Period
+ PAREN_DELIM -> OneParen
+addBlock (Node _ ITEM _) = id -- handled in LIST
+addBlock _ = id
+
+children :: Node -> [Node]
+children (Node _ _ ns) = ns
+
+addInlines :: [Node] -> [Inline]
+addInlines = foldr addInline []
+
+addInline :: Node -> [Inline] -> [Inline]
+addInline (Node _ (TEXT t) _) = (map toinl clumps ++)
+ where raw = unpack t
+ clumps = groupBy samekind raw
+ samekind ' ' ' ' = True
+ samekind ' ' _ = False
+ samekind _ ' ' = False
+ samekind _ _ = True
+ toinl (' ':_) = Space
+ toinl xs = Str xs
+addInline (Node _ LINEBREAK _) = (LineBreak :)
+addInline (Node _ SOFTBREAK _) = (Space :)
+addInline (Node _ (INLINE_HTML t) _) =
+ (RawInline (Format "html") (unpack t) :)
+addInline (Node _ (CODE t) _) =
+ (Code ("",[],[]) (unpack t) :)
+addInline (Node _ EMPH nodes) =
+ (Emph (addInlines nodes) :)
+addInline (Node _ STRONG nodes) =
+ (Strong (addInlines nodes) :)
+addInline (Node _ (LINK url title) nodes) =
+ (Link (addInlines nodes) (unpack url, unpack title) :)
+addInline (Node _ (IMAGE url title) nodes) =
+ (Image (addInlines nodes) (unpack url, unpack title) :)
+addInline _ = id
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 663960a87..98a142840 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -15,6 +15,9 @@ import Control.Applicative ((<$>))
import Data.List (intersperse)
import Data.Maybe (fromMaybe)
import Text.TeXMath (readMathML, writeTeX)
+import Text.Pandoc.Error (PandocError)
+import Text.Pandoc.Compat.Except
+import Data.Default
{-
@@ -497,7 +500,7 @@ List of all DocBook tags, with [x] indicating implemented,
[x] ?asciidoc-br? - line break from asciidoc docbook output
-}
-type DB = State DBState
+type DB = ExceptT PandocError (State DBState)
data DBState = DBState{ dbSectionLevel :: Int
, dbQuoteType :: QuoteType
@@ -507,16 +510,18 @@ data DBState = DBState{ dbSectionLevel :: Int
, dbFigureTitle :: Inlines
} deriving Show
-readDocBook :: ReaderOptions -> String -> Pandoc
-readDocBook _ inp = Pandoc (dbMeta st') (toList $ mconcat bs)
- where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp')
- DBState{ dbSectionLevel = 0
- , dbQuoteType = DoubleQuote
- , dbMeta = mempty
- , dbAcceptsMeta = False
- , dbBook = False
- , dbFigureTitle = mempty
- }
+instance Default DBState where
+ def = DBState{ dbSectionLevel = 0
+ , dbQuoteType = DoubleQuote
+ , dbMeta = mempty
+ , dbAcceptsMeta = False
+ , dbBook = False
+ , dbFigureTitle = mempty }
+
+
+readDocBook :: ReaderOptions -> String -> Either PandocError Pandoc
+readDocBook _ inp = (\blocks -> Pandoc (dbMeta st') (toList . mconcat $ blocks)) <$> bs
+ where (bs , st') = flip runState def . runExceptT . mapM parseBlock . normalizeTree . parseXML $ inp'
inp' = handleInstructions inp
-- We treat <?asciidoc-br?> specially (issue #1236), converting it
@@ -863,7 +868,9 @@ parseBlock (Elem e) =
parseRow = mapM (parseMixed plain . elContent) . filterChildren isEntry
sect n = do isbook <- gets dbBook
let n' = if isbook || n == 0 then n + 1 else n
- headerText <- case filterChild (named "title") e of
+ headerText <- case filterChild (named "title") e `mplus`
+ (filterChild (named "info") e >>=
+ filterChild (named "title")) of
Just t -> getInlines t
Nothing -> return mempty
modify $ \st -> st{ dbSectionLevel = n }
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index d4680cb7e..67a97ae85 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -96,14 +96,17 @@ import Control.Applicative ((<$>))
import Data.Sequence (ViewL(..), viewl)
import qualified Data.Sequence as Seq (null)
+import Text.Pandoc.Error
+import Text.Pandoc.Compat.Except
+
readDocx :: ReaderOptions
-> B.ByteString
- -> (Pandoc, MediaBag)
+ -> Either PandocError (Pandoc, MediaBag)
readDocx opts bytes =
case archiveToDocx (toArchive bytes) of
- Right docx -> (Pandoc meta blks, mediaBag) where
- (meta, blks, mediaBag) = (docxToOutput opts docx)
- Left _ -> error $ "couldn't parse docx file"
+ Right docx -> (\(meta, blks, mediaBag) -> (Pandoc meta blks, mediaBag))
+ <$> (docxToOutput opts docx)
+ Left _ -> Left (ParseFailure "couldn't parse docx file")
data DState = DState { docxAnchorMap :: M.Map String String
, docxMediaBag :: MediaBag
@@ -122,10 +125,10 @@ data DEnv = DEnv { docxOptions :: ReaderOptions
instance Default DEnv where
def = DEnv def False
-type DocxContext = ReaderT DEnv (State DState)
+type DocxContext = ExceptT PandocError (ReaderT DEnv (State DState))
-evalDocxContext :: DocxContext a -> DEnv -> DState -> a
-evalDocxContext ctx env st = evalState (runReaderT ctx env) st
+evalDocxContext :: DocxContext a -> DEnv -> DState -> Either PandocError a
+evalDocxContext ctx env st = flip evalState st . flip runReaderT env . runExceptT $ ctx
-- This is empty, but we put it in for future-proofing.
spansToKeep :: [String]
@@ -277,7 +280,13 @@ runToInlines :: Run -> DocxContext Inlines
runToInlines (Run rs runElems)
| Just (s, _) <- rStyle rs
, s `elem` codeStyles =
- return $ code $ concatMap runElemToString runElems
+ let rPr = resolveDependentRunStyle rs
+ codeString = code $ concatMap runElemToString runElems
+ in
+ return $ case rVertAlign rPr of
+ Just SupScrpt -> superscript codeString
+ Just SubScrpt -> subscript codeString
+ _ -> codeString
| otherwise = do
let ils = concatReduce (map runElemToInlines runElems)
return $ (runStyleToTransform $ resolveDependentRunStyle rs) ils
@@ -545,7 +554,7 @@ bodyToOutput (Body bps) = do
blks',
mediaBag)
-docxToOutput :: ReaderOptions -> Docx -> (Meta, [Block], MediaBag)
+docxToOutput :: ReaderOptions -> Docx -> Either PandocError (Meta, [Block], MediaBag)
docxToOutput opts (Docx (Document _ body)) =
let dEnv = def { docxOptions = opts} in
evalDocxContext (bodyToOutput body) dEnv def
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 671d2acf3..cce80fb48 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -65,6 +65,7 @@ import Text.Pandoc.Compat.Except
import Text.TeXMath.Readers.OMML (readOMML)
import Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..))
import Text.TeXMath (Exp)
+import Text.Pandoc.Readers.Docx.Util
import Data.Char (readLitChar, ord, chr, isDigit)
data ReaderEnv = ReaderEnv { envNotes :: Notes
@@ -108,8 +109,6 @@ mapD f xs =
in
concatMapM handler xs
-type NameSpaces = [(String, String)]
-
data Docx = Docx Document
deriving Show
@@ -158,6 +157,7 @@ data ParagraphStyle = ParagraphStyle { pStyle :: [String]
, indentation :: Maybe ParIndentation
, dropCap :: Bool
, pHeading :: Maybe (String, Int)
+ , pNumInfo :: Maybe (String, String)
, pBlockQuote :: Maybe Bool
}
deriving Show
@@ -167,6 +167,7 @@ defaultParagraphStyle = ParagraphStyle { pStyle = []
, indentation = Nothing
, dropCap = False
, pHeading = Nothing
+ , pNumInfo = Nothing
, pBlockQuote = Nothing
}
@@ -224,6 +225,7 @@ data RunStyle = RunStyle { isBold :: Maybe Bool
data ParStyleData = ParStyleData { headingLev :: Maybe (String, Int)
, isBlockQuote :: Maybe Bool
+ , numInfo :: Maybe (String, String)
, psStyle :: Maybe ParStyle}
deriving Show
@@ -246,10 +248,6 @@ type ChangeId = String
type Author = String
type ChangeDate = String
-attrToNSPair :: Attr -> Maybe (String, String)
-attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
-attrToNSPair _ = Nothing
-
archiveToDocx :: Archive -> Either DocxError Docx
archiveToDocx archive = do
let notes = archiveToNotes archive
@@ -266,7 +264,7 @@ archiveToDocument :: Archive -> D Document
archiveToDocument zf = do
entry <- maybeToD $ findEntryByPath "word/document.xml" zf
docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
- let namespaces = mapMaybe attrToNSPair (elAttribs docElem)
+ let namespaces = elemToNameSpaces docElem
bodyElem <- maybeToD $ findChild (elemName namespaces "w" "body") docElem
body <- elemToBody namespaces bodyElem
return $ Document namespaces body
@@ -285,7 +283,7 @@ archiveToStyles zf =
case stylesElem of
Nothing -> (M.empty, M.empty)
Just styElem ->
- let namespaces = mapMaybe attrToNSPair (elAttribs styElem)
+ let namespaces = elemToNameSpaces styElem
in
( M.fromList $ buildBasedOnList namespaces styElem
(Nothing :: Maybe CharStyle),
@@ -353,10 +351,10 @@ archiveToNotes zf =
enElem = findEntryByPath "word/endnotes.xml" zf
>>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
fn_namespaces = case fnElem of
- Just e -> mapMaybe attrToNSPair (elAttribs e)
+ Just e -> elemToNameSpaces e
Nothing -> []
en_namespaces = case enElem of
- Just e -> mapMaybe attrToNSPair (elAttribs e)
+ Just e -> elemToNameSpaces e
Nothing -> []
ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
fn = fnElem >>= (elemToNotes ns "footnote")
@@ -456,7 +454,7 @@ archiveToNumbering' zf = do
Nothing -> Just $ Numbering [] [] []
Just entry -> do
numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
- let namespaces = mapMaybe attrToNSPair (elAttribs numberingElem)
+ let namespaces = elemToNameSpaces numberingElem
numElems = findChildren
(QName "num" (lookup "w" namespaces) (Just "w"))
numberingElem
@@ -485,15 +483,6 @@ elemToNotes _ _ _ = Nothing
---------------------------------------------
---------------------------------------------
-elemName :: NameSpaces -> String -> String -> QName
-elemName ns prefix name = (QName name (lookup prefix ns) (Just prefix))
-
-isElem :: NameSpaces -> String -> String -> Element -> Bool
-isElem ns prefix name element =
- qName (elName element) == name &&
- qURI (elName element) == (lookup prefix ns)
-
-
elemToTblGrid :: NameSpaces -> Element -> D TblGrid
elemToTblGrid ns element | isElem ns "w" "tblGrid" element =
let cols = findChildren (elemName ns "w" "gridCol") element
@@ -546,20 +535,6 @@ elemToParIndentation ns element | isElem ns "w" "ind" element =
stringToInteger}
elemToParIndentation _ _ = Nothing
-
-elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String)
-elemToNumInfo ns element | isElem ns "w" "p" element = do
- let pPr = findChild (elemName ns "w" "pPr") element
- numPr = pPr >>= findChild (elemName ns "w" "numPr")
- lvl <- numPr >>=
- findChild (elemName ns "w" "ilvl") >>=
- findAttr (elemName ns "w" "val")
- numId <- numPr >>=
- findChild (elemName ns "w" "numId") >>=
- findAttr (elemName ns "w" "val")
- return (numId, lvl)
-elemToNumInfo _ _ = Nothing
-
testBitMask :: String -> Int -> Bool
testBitMask bitMaskS n =
case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of
@@ -578,20 +553,28 @@ elemToBodyPart ns element
return $ OMathPara expsLst
elemToBodyPart ns element
| isElem ns "w" "p" element
- , Just (numId, lvl) <- elemToNumInfo ns element = do
+ , Just (numId, lvl) <- getNumInfo ns element = do
sty <- asks envParStyles
let parstyle = elemToParagraphStyle ns element sty
parparts <- mapD (elemToParPart ns) (elChildren element)
num <- asks envNumbering
case lookupLevel numId lvl num of
- Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts
- Nothing -> throwError WrongElem
+ Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts
+ Nothing -> throwError WrongElem
elemToBodyPart ns element
| isElem ns "w" "p" element = do
- sty <- asks envParStyles
- let parstyle = elemToParagraphStyle ns element sty
- parparts <- mapD (elemToParPart ns) (elChildren element)
- return $ Paragraph parstyle parparts
+ sty <- asks envParStyles
+ let parstyle = elemToParagraphStyle ns element sty
+ parparts <- mapD (elemToParPart ns) (elChildren element)
+ case pNumInfo parstyle of
+ Just (numId, lvl) -> do
+ num <- asks envNumbering
+ case lookupLevel numId lvl num of
+ Just levelInfo ->
+ return $ ListItem parstyle numId lvl levelInfo parparts
+ Nothing ->
+ throwError WrongElem
+ Nothing -> return $ Paragraph parstyle parparts
elemToBodyPart ns element
| isElem ns "w" "tbl" element = do
let caption' = findChild (elemName ns "w" "tblPr") element
@@ -771,6 +754,7 @@ elemToParagraphStyle ns element sty
Just _ -> True
Nothing -> False
, pHeading = getParStyleField headingLev sty style
+ , pNumInfo = getParStyleField numInfo sty style
, pBlockQuote = getParStyleField isBlockQuote sty style
}
elemToParagraphStyle _ _ _ = defaultParagraphStyle
@@ -857,12 +841,26 @@ getBlockQuote ns element
, styleName `elem` blockQuoteStyleNames = Just True
getBlockQuote _ _ = Nothing
+getNumInfo :: NameSpaces -> Element -> Maybe (String, String)
+getNumInfo ns element = do
+ let numPr = findChild (elemName ns "w" "pPr") element >>=
+ findChild (elemName ns "w" "numPr")
+ lvl = fromMaybe "0" (numPr >>=
+ findChild (elemName ns "w" "ilvl") >>=
+ findAttr (elemName ns "w" "val"))
+ numId <- numPr >>=
+ findChild (elemName ns "w" "numId") >>=
+ findAttr (elemName ns "w" "val")
+ return (numId, lvl)
+
+
elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> ParStyleData
elemToParStyleData ns element parentStyle =
ParStyleData
{
headingLev = getHeaderLevel ns element
, isBlockQuote = getBlockQuote ns element
+ , numInfo = getNumInfo ns element
, psStyle = parentStyle
}
diff --git a/src/Text/Pandoc/Readers/Docx/StyleMap.hs b/src/Text/Pandoc/Readers/Docx/StyleMap.hs
new file mode 100644
index 000000000..2901ea2a3
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx/StyleMap.hs
@@ -0,0 +1,106 @@
+module Text.Pandoc.Readers.Docx.StyleMap ( StyleMaps(..)
+ , defaultStyleMaps
+ , getStyleMaps
+ , getStyleId
+ , hasStyleName
+ ) where
+
+import Text.XML.Light
+import Text.Pandoc.Readers.Docx.Util
+import Control.Monad.State
+import Data.Char (toLower)
+import qualified Data.Map as M
+
+newtype ParaStyleMap = ParaStyleMap ( M.Map String String )
+newtype CharStyleMap = CharStyleMap ( M.Map String String )
+
+class StyleMap a where
+ alterMap :: (M.Map String String -> M.Map String String) -> a -> a
+ getMap :: a -> M.Map String String
+
+instance StyleMap ParaStyleMap where
+ alterMap f (ParaStyleMap m) = ParaStyleMap $ f m
+ getMap (ParaStyleMap m) = m
+
+instance StyleMap CharStyleMap where
+ alterMap f (CharStyleMap m) = CharStyleMap $ f m
+ getMap (CharStyleMap m) = m
+
+insert :: (StyleMap a) => Maybe String -> Maybe String -> a -> a
+insert (Just k) (Just v) m = alterMap (M.insert k v) m
+insert _ _ m = m
+
+getStyleId :: (StyleMap a) => String -> a -> String
+getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s) . getMap
+
+hasStyleName :: (StyleMap a) => String -> a -> Bool
+hasStyleName styleName = M.member (map toLower styleName) . getMap
+
+data StyleMaps = StyleMaps { sNameSpaces :: NameSpaces
+ , sParaStyleMap :: ParaStyleMap
+ , sCharStyleMap :: CharStyleMap
+ }
+
+data StyleType = ParaStyle | CharStyle
+
+defaultStyleMaps :: StyleMaps
+defaultStyleMaps = StyleMaps { sNameSpaces = []
+ , sParaStyleMap = ParaStyleMap M.empty
+ , sCharStyleMap = CharStyleMap M.empty
+ }
+
+type StateM a = State StyleMaps a
+
+getStyleMaps :: Element -> StyleMaps
+getStyleMaps docElem = execState genStyleMap state'
+ where
+ state' = defaultStyleMaps {sNameSpaces = elemToNameSpaces docElem}
+ genStyleItem e = do
+ styleType <- getStyleType e
+ styleId <- getAttrStyleId e
+ nameValLowercase <- fmap (map toLower) `fmap` getNameVal e
+ case styleType of
+ Just ParaStyle -> modParaStyleMap $ insert nameValLowercase styleId
+ Just CharStyle -> modCharStyleMap $ insert nameValLowercase styleId
+ _ -> return ()
+ genStyleMap = do
+ style <- elemName' "style"
+ let styles = findChildren style docElem
+ forM_ styles genStyleItem
+
+modParaStyleMap :: (ParaStyleMap -> ParaStyleMap) -> StateM ()
+modParaStyleMap f = modify $ \s ->
+ s {sParaStyleMap = f $ sParaStyleMap s}
+
+modCharStyleMap :: (CharStyleMap -> CharStyleMap) -> StateM ()
+modCharStyleMap f = modify $ \s ->
+ s {sCharStyleMap = f $ sCharStyleMap s}
+
+getStyleType :: Element -> StateM (Maybe StyleType)
+getStyleType e = do
+ styleTypeStr <- getAttrType e
+ case styleTypeStr of
+ Just "paragraph" -> return $ Just ParaStyle
+ Just "character" -> return $ Just CharStyle
+ _ -> return Nothing
+
+getAttrType :: Element -> StateM (Maybe String)
+getAttrType el = do
+ name <- elemName' "type"
+ return $ findAttr name el
+
+getAttrStyleId :: Element -> StateM (Maybe String)
+getAttrStyleId el = do
+ name <- elemName' "styleId"
+ return $ findAttr name el
+
+getNameVal :: Element -> StateM (Maybe String)
+getNameVal el = do
+ name <- elemName' "name"
+ val <- elemName' "val"
+ return $ findChild name el >>= findAttr val
+
+elemName' :: String -> StateM QName
+elemName' name = do
+ namespaces <- gets sNameSpaces
+ return $ elemName namespaces "w" name
diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs
new file mode 100644
index 000000000..891f107b0
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Docx/Util.hs
@@ -0,0 +1,26 @@
+module Text.Pandoc.Readers.Docx.Util (
+ NameSpaces
+ , elemName
+ , isElem
+ , elemToNameSpaces
+ ) where
+
+import Text.XML.Light
+import Data.Maybe (mapMaybe)
+
+type NameSpaces = [(String, String)]
+
+elemToNameSpaces :: Element -> NameSpaces
+elemToNameSpaces = mapMaybe attrToNSPair . elAttribs
+
+attrToNSPair :: Attr -> Maybe (String, String)
+attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
+attrToNSPair _ = Nothing
+
+elemName :: NameSpaces -> String -> String -> QName
+elemName ns prefix name = QName name (lookup prefix ns) (Just prefix)
+
+isElem :: NameSpaces -> String -> String -> Element -> Bool
+isElem ns prefix name element =
+ qName (elName element) == name &&
+ qURI (elName element) == lookup prefix ns
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index b061d8683..338540533 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -35,18 +35,20 @@ import Control.DeepSeq.Generics (deepseq, NFData)
import Debug.Trace (trace)
+import Text.Pandoc.Error
+
type Items = M.Map String (FilePath, MimeType)
-readEPUB :: ReaderOptions -> BL.ByteString -> (Pandoc, MediaBag)
+readEPUB :: ReaderOptions -> BL.ByteString -> Either PandocError (Pandoc, MediaBag)
readEPUB opts bytes = runEPUB (archiveToEPUB opts $ toArchive bytes)
-runEPUB :: Except String a -> a
-runEPUB = either error id . runExcept
+runEPUB :: Except PandocError a -> Either PandocError a
+runEPUB = runExcept
-- Note that internal reference are aggresively normalised so that all ids
-- are of the form "filename#id"
--
-archiveToEPUB :: (MonadError String m) => ReaderOptions -> Archive -> m (Pandoc, MediaBag)
+archiveToEPUB :: (MonadError PandocError m) => ReaderOptions -> Archive -> m (Pandoc, MediaBag)
archiveToEPUB os archive = do
-- root is path to folder with manifest file in
(root, content) <- getManifest archive
@@ -64,19 +66,20 @@ archiveToEPUB os archive = do
return $ (ast, mediaBag)
where
os' = os {readerParseRaw = True}
- parseSpineElem :: MonadError String m => FilePath -> (FilePath, MimeType) -> m Pandoc
+ parseSpineElem :: MonadError PandocError m => FilePath -> (FilePath, MimeType) -> m Pandoc
parseSpineElem (normalise -> r) (normalise -> path, mime) = do
when (readerTrace os) (traceM path)
doc <- mimeToReader mime r path
let docSpan = B.doc $ B.para $ B.spanWith (takeFileName path, [], []) mempty
return $ docSpan <> doc
- mimeToReader :: MonadError String m => MimeType -> FilePath -> FilePath -> m Pandoc
+ mimeToReader :: MonadError PandocError m => MimeType -> FilePath -> FilePath -> m Pandoc
mimeToReader "application/xhtml+xml" (normalise -> root) (normalise -> path) = do
fname <- findEntryByPathE (root </> path) archive
- return $ fixInternalReferences path .
+ html <- either throwError return .
readHtml os' .
UTF8.toStringLazy $
fromEntry fname
+ return $ fixInternalReferences path html
mimeToReader s _ path
| s `elem` imageMimes = return $ imageToPandoc path
| otherwise = return $ mempty
@@ -114,7 +117,7 @@ imageMimes = ["image/gif", "image/jpeg", "image/png"]
type CoverImage = FilePath
-parseManifest :: (MonadError String m) => Element -> m (Maybe CoverImage, Items)
+parseManifest :: (MonadError PandocError m) => Element -> m (Maybe CoverImage, Items)
parseManifest content = do
manifest <- findElementE (dfName "manifest") content
let items = findChildren (dfName "item") manifest
@@ -130,7 +133,7 @@ parseManifest content = do
mime <- findAttrE (emptyName "media-type") e
return (uid, (href, mime))
-parseSpine :: MonadError String m => Items -> Element -> m [(FilePath, MimeType)]
+parseSpine :: MonadError PandocError m => Items -> Element -> m [(FilePath, MimeType)]
parseSpine is e = do
spine <- findElementE (dfName "spine") e
let itemRefs = findChildren (dfName "itemref") spine
@@ -141,7 +144,7 @@ parseSpine is e = do
guard linear
findAttr (emptyName "idref") ref
-parseMeta :: MonadError String m => Element -> m Meta
+parseMeta :: MonadError PandocError m => Element -> m Meta
parseMeta content = do
meta <- findElementE (dfName "metadata") content
let dcspace (QName _ (Just "http://purl.org/dc/elements/1.1/") (Just "dc")) = True
@@ -159,7 +162,7 @@ renameMeta :: String -> String
renameMeta "creator" = "author"
renameMeta s = s
-getManifest :: MonadError String m => Archive -> m (String, Element)
+getManifest :: MonadError PandocError m => Archive -> m (String, Element)
getManifest archive = do
metaEntry <- findEntryByPathE ("META-INF" </> "container.xml") archive
docElem <- (parseXMLDocE . UTF8.toStringLazy . fromEntry) metaEntry
@@ -266,18 +269,18 @@ emptyName s = QName s Nothing Nothing
-- Convert Maybe interface to Either
-findAttrE :: MonadError String m => QName -> Element -> m String
+findAttrE :: MonadError PandocError m => QName -> Element -> m String
findAttrE q e = mkE "findAttr" $ findAttr q e
-findEntryByPathE :: MonadError String m => FilePath -> Archive -> m Entry
+findEntryByPathE :: MonadError PandocError m => FilePath -> Archive -> m Entry
findEntryByPathE (normalise -> path) a =
mkE ("No entry on path: " ++ path) $ findEntryByPath path a
-parseXMLDocE :: MonadError String m => String -> m Element
+parseXMLDocE :: MonadError PandocError m => String -> m Element
parseXMLDocE doc = mkE "Unable to parse XML doc" $ parseXMLDoc doc
-findElementE :: MonadError String m => QName -> Element -> m Element
+findElementE :: MonadError PandocError m => QName -> Element -> m Element
findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x
-mkE :: MonadError String m => String -> Maybe a -> m a
-mkE s = maybe (throwError s) return
+mkE :: MonadError PandocError m => String -> Maybe a -> m a
+mkE s = maybe (throwError . ParseFailure $ s) return
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 02ff07e73..52358e51e 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -1,4 +1,5 @@
-{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses #-}
+{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses,
+ViewPatterns#-}
{-
Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
@@ -43,7 +44,7 @@ import Text.Pandoc.Definition
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..))
import Text.Pandoc.Shared ( extractSpaces, renderTags'
- , escapeURI, safeRead )
+ , escapeURI, safeRead, mapLeft )
import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace)
, Extension (Ext_epub_html_exts,
Ext_native_divs, Ext_native_spans))
@@ -62,15 +63,18 @@ import Text.TeXMath (readMathML, writeTeX)
import Data.Default (Default (..), def)
import Control.Monad.Reader (Reader,ask, asks, local, runReader)
+import Text.Pandoc.Error
+
+import Text.Parsec.Error
+
-- | Convert HTML-formatted string to 'Pandoc' document.
readHtml :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assumes @'\n'@ line endings)
- -> Pandoc
+ -> Either PandocError Pandoc
readHtml opts inp =
- case flip runReader def $ runParserT parseDoc (HTMLState def{ stateOptions = opts } []) "source" tags of
- Left err' -> error $ "\nError at " ++ show err'
- Right result -> result
+ mapLeft (ParseFailure . getError) . flip runReader def $
+ runParserT parseDoc (HTMLState def{ stateOptions = opts } []) "source" tags
where tags = stripPrefixes . canonicalizeTags $
parseTagsOptions parseOptions{ optTagPosition = True } inp
parseDoc = do
@@ -78,6 +82,9 @@ readHtml opts inp =
meta <- stateMeta . parserState <$> getState
bs' <- replaceNotes (B.toList blocks)
return $ Pandoc meta bs'
+ getError (errorMessages -> ms) = case ms of
+ [] -> ""
+ (m:_) -> messageString m
replaceNotes :: [Block] -> TagParser [Block]
replaceNotes = walkM replaceNotes'
@@ -880,7 +887,7 @@ htmlTag :: Monad m
=> (Tag String -> Bool)
-> ParserT [Char] st m (Tag String, String)
htmlTag f = try $ do
- lookAhead $ char '<' >> (oneOf "/!?" <|> letter)
+ lookAhead $ char '<' >> ((oneOf "/!?" >> nonspaceChar) <|> letter)
(next : _) <- getInput >>= return . canonicalizeTags . parseTags
guard $ f next
-- advance the parser
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index c03382c17..aa2534afc 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -26,15 +26,17 @@ import Documentation.Haddock.Parser
import Documentation.Haddock.Types
import Debug.Trace (trace)
+import Text.Pandoc.Error
+
-- | Parse Haddock markup and return a 'Pandoc' document.
readHaddock :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse
- -> Pandoc
+ -> Either PandocError Pandoc
readHaddock opts =
#if MIN_VERSION_haddock_library(1,2,0)
- B.doc . docHToBlocks . trace' . _doc . parseParas
+ Right . B.doc . docHToBlocks . trace' . _doc . parseParas
#else
- B.doc . docHToBlocks . trace' . parseParas
+ Right . B.doc . docHToBlocks . trace' . parseParas
#endif
where trace' x = if readerTrace opts
then trace (show x) x
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 942b9f3b3..08aa0b20e 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -42,26 +42,25 @@ import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding ((<|>), many, optional, space,
mathDisplay, mathInline)
import qualified Text.Pandoc.UTF8 as UTF8
-import Data.Char ( chr, ord )
+import Data.Char ( chr, ord, isLetter, isAlphaNum )
import Control.Monad.Trans (lift)
import Control.Monad
import Text.Pandoc.Builder
-import Data.Char (isLetter, isAlphaNum)
import Control.Applicative
import Data.Monoid
-import Data.Maybe (fromMaybe)
+import Data.Maybe (fromMaybe, maybeToList)
import System.Environment (getEnv)
-import System.FilePath (replaceExtension, (</>))
-import Data.List (intercalate, intersperse)
+import System.FilePath (replaceExtension, (</>), takeExtension, addExtension)
+import Data.List (intercalate)
import qualified Data.Map as M
import qualified Control.Exception as E
-import System.FilePath (takeExtension, addExtension)
import Text.Pandoc.Highlighting (fromListingsLanguage)
+import Text.Pandoc.Error
-- | Parse LaTeX from string and return 'Pandoc' document.
readLaTeX :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assumes @'\n'@ line endings)
- -> Pandoc
+ -> Either PandocError Pandoc
readLaTeX opts = readWith parseLaTeX def{ stateOptions = opts }
parseLaTeX :: LP Pandoc
@@ -73,17 +72,16 @@ parseLaTeX = do
let (Pandoc _ bs') = doc bs
return $ Pandoc meta bs'
-type LP = Parser [Char] ParserState
+type LP = Parser String ParserState
anyControlSeq :: LP String
anyControlSeq = do
char '\\'
next <- option '\n' anyChar
- name <- case next of
- '\n' -> return ""
- c | isLetter c -> (c:) <$> (many letter <* optional sp)
- | otherwise -> return [c]
- return name
+ case next of
+ '\n' -> return ""
+ c | isLetter c -> (c:) <$> (many letter <* optional sp)
+ | otherwise -> return [c]
controlSeq :: String -> LP String
controlSeq name = try $ do
@@ -103,7 +101,7 @@ dimenarg = try $ do
sp :: LP ()
sp = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t')
- <|> (try $ newline <* lookAhead anyChar <* notFollowedBy blankline)
+ <|> try (newline <* lookAhead anyChar <* notFollowedBy blankline)
isLowerHex :: Char -> Bool
isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
@@ -161,30 +159,28 @@ mathInline :: LP String -> LP Inlines
mathInline p = math <$> (try p >>= applyMacros')
mathChars :: LP String
-mathChars = concat <$>
- many ( many1 (satisfy (\c -> c /= '$' && c /='\\'))
- <|> (\c -> ['\\',c]) <$> (try $ char '\\' *> anyChar)
- )
+mathChars = (concat <$>) $
+ many $
+ many1 (satisfy (\c -> c /= '$' && c /='\\'))
+ <|> (\c -> ['\\',c]) <$> try (char '\\' *> anyChar)
quoted' :: (Inlines -> Inlines) -> LP String -> LP () -> LP Inlines
quoted' f starter ender = do
startchs <- starter
try ((f . mconcat) <$> manyTill inline ender) <|> lit startchs
-double_quote :: LP Inlines
-double_quote =
- ( quoted' doubleQuoted (try $ string "``") (void $ try $ string "''")
+doubleQuote :: LP Inlines
+doubleQuote =
+ quoted' doubleQuoted (try $ string "``") (void $ try $ string "''")
<|> quoted' doubleQuoted (string "“") (void $ char '”')
-- the following is used by babel for localized quotes:
<|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'")
<|> quoted' doubleQuoted (string "\"") (void $ char '"')
- )
-single_quote :: LP Inlines
-single_quote =
- ( quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter)
+singleQuote :: LP Inlines
+singleQuote =
+ quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter)
<|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter)
- )
inline :: LP Inlines
inline = (mempty <$ comment)
@@ -194,17 +190,17 @@ inline = (mempty <$ comment)
<|> inlineEnvironment
<|> inlineGroup
<|> (char '-' *> option (str "-")
- ((char '-') *> option (str "–") (str "—" <$ char '-')))
- <|> double_quote
- <|> single_quote
+ (char '-' *> option (str "–") (str "—" <$ char '-')))
+ <|> doubleQuote
+ <|> singleQuote
<|> (str "”" <$ try (string "''"))
<|> (str "”" <$ char '”')
<|> (str "’" <$ char '\'')
<|> (str "’" <$ char '’')
<|> (str "\160" <$ char '~')
- <|> (mathDisplay $ string "$$" *> mathChars <* string "$$")
- <|> (mathInline $ char '$' *> mathChars <* char '$')
- <|> (superscript <$> (char '^' *> tok))
+ <|> mathDisplay (string "$$" *> mathChars <* string "$$")
+ <|> mathInline (char '$' *> mathChars <* char '$')
+ <|> try (superscript <$> (char '^' *> tok))
<|> (subscript <$> (char '_' *> tok))
<|> (guardEnabled Ext_literate_haskell *> char '|' *> doLHSverb)
<|> (str . (:[]) <$> tildeEscape)
@@ -238,20 +234,32 @@ block = (mempty <$ comment)
blocks :: LP Blocks
blocks = mconcat <$> many block
+getRawCommand :: String -> LP String
+getRawCommand name' = do
+ rawargs <- withRaw (skipopts *> option "" dimenarg *> many braced)
+ return $ '\\' : name' ++ snd rawargs
+
+lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v
+lookupListDefault d = (fromMaybe d .) . lookupList
+ where
+ lookupList l m = msum $ map (`M.lookup` m) l
+
blockCommand :: LP Blocks
blockCommand = try $ do
name <- anyControlSeq
guard $ name /= "begin" && name /= "end"
star <- option "" (string "*" <* optional sp)
let name' = name ++ star
- case M.lookup name' blockCommands of
- Just p -> p
- Nothing -> case M.lookup name blockCommands of
- Just p -> p
- Nothing -> mzero
+ let raw = do
+ rawcommand <- getRawCommand name'
+ transformed <- applyMacros' rawcommand
+ guard $ transformed /= rawcommand
+ notFollowedBy $ parseFromString inlines transformed
+ parseFromString blocks transformed
+ lookupListDefault raw [name',name] blockCommands
inBrackets :: Inlines -> Inlines
-inBrackets x = (str "[") <> x <> (str "]")
+inBrackets x = str "[" <> x <> str "]"
-- eat an optional argument and one or more arguments in braces
ignoreInlines :: String -> (String, LP Inlines)
@@ -259,19 +267,21 @@ ignoreInlines name = (name, doraw <|> (mempty <$ optargs))
where optargs = skipopts *> skipMany (try $ optional sp *> braced)
contseq = '\\':name
doraw = (rawInline "latex" . (contseq ++) . snd) <$>
- (getOption readerParseRaw >>= guard >> (withRaw optargs))
+ (getOption readerParseRaw >>= guard >> withRaw optargs)
ignoreBlocks :: String -> (String, LP Blocks)
ignoreBlocks name = (name, doraw <|> (mempty <$ optargs))
where optargs = skipopts *> skipMany (try $ optional sp *> braced)
contseq = '\\':name
doraw = (rawBlock "latex" . (contseq ++) . snd) <$>
- (getOption readerParseRaw >>= guard >> (withRaw optargs))
+ (getOption readerParseRaw >>= guard >> withRaw optargs)
blockCommands :: M.Map String (LP Blocks)
blockCommands = M.fromList $
[ ("par", mempty <$ skipopts)
- , ("title", mempty <$ (skipopts *> tok >>= addMeta "title"))
+ , ("title", mempty <$ (skipopts *>
+ (grouped inline >>= addMeta "title")
+ <|> (grouped block >>= addMeta "title")))
, ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle"))
, ("author", mempty <$ (skipopts *> authors))
-- -- in letter class, temp. store address & sig as title, author
@@ -302,7 +312,7 @@ blockCommands = M.fromList $
--
, ("hrule", pure horizontalRule)
, ("rule", skipopts *> tok *> tok *> pure horizontalRule)
- , ("item", skipopts *> loose_item)
+ , ("item", skipopts *> looseItem)
, ("documentclass", skipopts *> braced *> preamble)
, ("centerline", (para . trimInlines) <$> (skipopts *> tok))
, ("caption", skipopts *> setCaption)
@@ -341,7 +351,7 @@ setCaption :: LP Blocks
setCaption = do
ils <- tok
mblabel <- option Nothing $
- try $ spaces >> controlSeq "label" >> (Just <$> tok)
+ try $ spaces' >> controlSeq "label" >> (Just <$> tok)
let ils' = case mblabel of
Just lab -> ils <> spanWith
("",[],[("data-label", stringify lab)]) mempty
@@ -369,7 +379,7 @@ section (ident, classes, kvs) lvl = do
let lvl' = if hasChapters then lvl + 1 else lvl
skipopts
contents <- grouped inline
- lab <- option ident $ try (spaces >> controlSeq "label" >> spaces >> braced)
+ lab <- option ident $ try (spaces' >> controlSeq "label" >> spaces' >> braced)
attr' <- registerHeader (lab, classes, kvs) contents
return $ headerWith attr' lvl' contents
@@ -382,25 +392,21 @@ inlineCommand = try $ do
star <- option "" (string "*")
let name' = name ++ star
let raw = do
- rawargs <- withRaw (skipopts *> option "" dimenarg *> many braced)
- let rawcommand = '\\' : name ++ star ++ snd rawargs
+ rawcommand <- getRawCommand name'
transformed <- applyMacros' rawcommand
if transformed /= rawcommand
then parseFromString inlines transformed
else if parseRaw
then return $ rawInline "latex" rawcommand
else return mempty
- case M.lookup name' inlineCommands of
- Just p -> p <|> raw
- Nothing -> case M.lookup name inlineCommands of
- Just p -> p <|> raw
- Nothing -> raw
+ lookupListDefault mzero [name',name] inlineCommands
+ <|> raw
unlessParseRaw :: LP ()
unlessParseRaw = getOption readerParseRaw >>= guard . not
isBlockCommand :: String -> Bool
-isBlockCommand s = maybe False (const True) $ M.lookup s blockCommands
+isBlockCommand s = s `M.member` blockCommands
inlineEnvironments :: M.Map String (LP Inlines)
@@ -446,7 +452,7 @@ inlineCommands = M.fromList $
, ("cref", unlessParseRaw >> (inBrackets <$> tok)) -- from cleveref.sty
, ("(", mathInline $ manyTill anyChar (try $ string "\\)"))
, ("[", mathDisplay $ manyTill anyChar (try $ string "\\]"))
- , ("ensuremath", mathInline $ braced)
+ , ("ensuremath", mathInline braced)
, ("texorpdfstring", (\_ x -> x) <$> tok <*> tok)
, ("P", lit "¶")
, ("S", lit "§")
@@ -495,7 +501,7 @@ inlineCommands = M.fromList $
, ("v", option (str "v") $ try $ tok >>= accent hacek)
, ("u", option (str "u") $ try $ tok >>= accent breve)
, ("i", lit "i")
- , ("\\", linebreak <$ (optional (bracketed inline) *> optional sp))
+ , ("\\", linebreak <$ (optional (bracketed inline) *> spaces'))
, (",", pure mempty)
, ("@", pure mempty)
, (" ", lit "\160")
@@ -508,7 +514,7 @@ inlineCommands = M.fromList $
, ("thanks", (note . mconcat) <$> (char '{' *> manyTill block (char '}')))
, ("footnote", (note . mconcat) <$> (char '{' *> manyTill block (char '}')))
, ("verb", doverb)
- , ("lstinline", doverb)
+ , ("lstinline", skipopts *> doverb)
, ("Verb", doverb)
, ("texttt", (code . stringify . toList) <$> tok)
, ("url", (unescapeURL <$> braced) >>= \url ->
@@ -618,7 +624,7 @@ lit = pure . str
accent :: (Char -> String) -> Inlines -> LP Inlines
accent f ils =
case toList ils of
- (Str (x:xs) : ys) -> return $ fromList $ (Str (f x ++ xs) : ys)
+ (Str (x:xs) : ys) -> return $ fromList (Str (f x ++ xs) : ys)
[] -> mzero
_ -> return ils
@@ -807,7 +813,7 @@ breve 'u' = "ŭ"
breve c = [c]
tok :: LP Inlines
-tok = try $ grouped inline <|> inlineCommand <|> str <$> (count 1 $ inlineChar)
+tok = try $ grouped inline <|> inlineCommand <|> str <$> count 1 inlineChar
opt :: LP Inlines
opt = bracketed inline <* optional sp
@@ -825,17 +831,14 @@ environment :: LP Blocks
environment = do
controlSeq "begin"
name <- braced
- case M.lookup name environments of
- Just p -> p <|> rawEnv name
- Nothing -> rawEnv name
+ M.findWithDefault mzero name environments
+ <|> rawEnv name
inlineEnvironment :: LP Inlines
inlineEnvironment = try $ do
controlSeq "begin"
name <- braced
- case M.lookup name inlineEnvironments of
- Just p -> p
- Nothing -> mzero
+ M.findWithDefault mzero name inlineEnvironments
rawEnv :: String -> LP Blocks
rawEnv name = do
@@ -848,15 +851,11 @@ rawEnv name = do
----
-type IncludeParser = ParserT [Char] [String] IO String
+type IncludeParser = ParserT String [String] IO String
-- | Replace "include" commands with file contents.
-handleIncludes :: String -> IO String
-handleIncludes s = do
- res <- runParserT includeParser' [] "input" s
- case res of
- Right s' -> return s'
- Left e -> error $ show e
+handleIncludes :: String -> IO (Either PandocError String)
+handleIncludes s = mapLeft (ParsecError s) <$> runParserT includeParser' [] "input" s
includeParser' :: IncludeParser
includeParser' =
@@ -912,7 +911,7 @@ include' = do
<|> try (string "input")
<|> string "usepackage"
-- skip options
- skipMany $ try $ char '[' *> (manyTill anyChar (char ']'))
+ skipMany $ try $ char '[' *> manyTill anyChar (char ']')
fs <- (map trim . splitBy (==',')) <$> braced'
return $ if name == "usepackage"
then map (maybeAddExtension ".sty") fs
@@ -985,14 +984,14 @@ keyvals = try $ char '[' *> manyTill keyval (char ']')
alltt :: String -> LP Blocks
alltt t = walk strToCode <$> parseFromString blocks
(substitute " " "\\ " $ substitute "%" "\\%" $
- concat $ intersperse "\\\\\n" $ lines t)
+ intercalate "\\\\\n" $ lines t)
where strToCode (Str s) = Code nullAttr s
strToCode x = x
-rawLaTeXBlock :: Parser [Char] ParserState String
+rawLaTeXBlock :: LP String
rawLaTeXBlock = snd <$> try (withRaw (environment <|> blockCommand))
-rawLaTeXInline :: Parser [Char] ParserState Inline
+rawLaTeXInline :: LP Inline
rawLaTeXInline = do
raw <- (snd <$> withRaw inlineCommand) <|> (snd <$> withRaw blockCommand)
RawInline "latex" <$> applyMacros' raw
@@ -1001,24 +1000,24 @@ addImageCaption :: Blocks -> LP Blocks
addImageCaption = walkM go
where go (Image alt (src,tit)) = do
mbcapt <- stateCaption <$> getState
- case mbcapt of
- Just ils -> return (Image (toList ils) (src, "fig:"))
- Nothing -> return (Image alt (src,tit))
+ return $ case mbcapt of
+ Just ils -> Image (toList ils) (src, "fig:")
+ Nothing -> Image alt (src,tit)
go x = return x
addTableCaption :: Blocks -> LP Blocks
addTableCaption = walkM go
where go (Table c als ws hs rs) = do
mbcapt <- stateCaption <$> getState
- case mbcapt of
- Just ils -> return (Table (toList ils) als ws hs rs)
- Nothing -> return (Table c als ws hs rs)
+ return $ case mbcapt of
+ Just ils -> Table (toList ils) als ws hs rs
+ Nothing -> Table c als ws hs rs
go x = return x
environments :: M.Map String (LP Blocks)
environments = M.fromList
[ ("document", env "document" blocks <* skipMany anyChar)
- , ("letter", env "letter" letter_contents)
+ , ("letter", env "letter" letterContents)
, ("figure", env "figure" $
resetCaption *> skipopts *> blocks >>= addImageCaption)
, ("center", env "center" blocks)
@@ -1031,12 +1030,12 @@ environments = M.fromList
, ("verse", blockQuote <$> env "verse" blocks)
, ("itemize", bulletList <$> listenv "itemize" (many item))
, ("description", definitionList <$> listenv "description" (many descItem))
- , ("enumerate", ordered_list)
+ , ("enumerate", orderedList')
, ("alltt", alltt =<< verbEnv "alltt")
, ("code", guardEnabled Ext_literate_haskell *>
(codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
verbEnv "code"))
- , ("verbatim", codeBlock <$> (verbEnv "verbatim"))
+ , ("verbatim", codeBlock <$> verbEnv "verbatim")
, ("Verbatim", do options <- option [] keyvals
let kvs = [ (if k == "firstnumber"
then "startFrom"
@@ -1044,17 +1043,17 @@ environments = M.fromList
let classes = [ "numberLines" |
lookup "numbers" options == Just "left" ]
let attr = ("",classes,kvs)
- codeBlockWith attr <$> (verbEnv "Verbatim"))
+ codeBlockWith attr <$> verbEnv "Verbatim")
, ("lstlisting", do options <- option [] keyvals
let kvs = [ (if k == "firstnumber"
then "startFrom"
else k, v) | (k,v) <- options ]
let classes = [ "numberLines" |
lookup "numbers" options == Just "left" ]
- ++ maybe [] (:[]) (lookup "language" options
+ ++ maybeToList (lookup "language" options
>>= fromListingsLanguage)
let attr = (fromMaybe "" (lookup "label" options),classes,kvs)
- codeBlockWith attr <$> (verbEnv "lstlisting"))
+ codeBlockWith attr <$> verbEnv "lstlisting")
, ("minted", do options <- option [] keyvals
lang <- grouped (many1 $ satisfy (/='}'))
let kvs = [ (if k == "firstnumber"
@@ -1064,7 +1063,7 @@ environments = M.fromList
[ "numberLines" |
lookup "linenos" options == Just "true" ]
let attr = ("",classes,kvs)
- codeBlockWith attr <$> (verbEnv "minted"))
+ codeBlockWith attr <$> verbEnv "minted")
, ("obeylines", parseFromString
(para . trimInlines . mconcat <$> many inline) =<<
intercalate "\\\\\n" . lines <$> verbEnv "obeylines")
@@ -1083,8 +1082,8 @@ environments = M.fromList
, ("alignat*", mathEnv para (Just "aligned") "alignat*")
]
-letter_contents :: LP Blocks
-letter_contents = do
+letterContents :: LP Blocks
+letterContents = do
bs <- blocks
st <- getState
-- add signature (author) and address (title)
@@ -1111,8 +1110,8 @@ closing = do
item :: LP Blocks
item = blocks *> controlSeq "item" *> skipopts *> blocks
-loose_item :: LP Blocks
-loose_item = do
+looseItem :: LP Blocks
+looseItem = do
ctx <- stateParserContext `fmap` getState
if ctx == ListItemState
then mzero
@@ -1155,8 +1154,8 @@ verbEnv name = do
res <- manyTill anyChar endEnv
return $ stripTrailingNewlines res
-ordered_list :: LP Blocks
-ordered_list = do
+orderedList' :: LP Blocks
+orderedList' = do
optional sp
(_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
try $ char '[' *> anyOrderedListMarker <* char ']'
@@ -1168,7 +1167,7 @@ ordered_list = do
optional sp
num <- grouped (many1 digit)
spaces
- return $ (read num + 1 :: Int)
+ return (read num + 1 :: Int)
bs <- listenv "enumerate" (many item)
return $ orderedListWith (start, style, delim) bs
@@ -1182,14 +1181,14 @@ paragraph = do
preamble :: LP Blocks
preamble = mempty <$> manyTill preambleBlock beginDoc
where beginDoc = lookAhead $ try $ controlSeq "begin" *> string "{document}"
- preambleBlock = (void comment)
- <|> (void sp)
- <|> (void blanklines)
- <|> (void macro)
- <|> (void blockCommand)
- <|> (void anyControlSeq)
- <|> (void braced)
- <|> (void anyChar)
+ preambleBlock = void comment
+ <|> void sp
+ <|> void blanklines
+ <|> void macro
+ <|> void blockCommand
+ <|> void anyControlSeq
+ <|> void braced
+ <|> void anyChar
-------
@@ -1265,7 +1264,7 @@ complexNatbibCitation mode = try $ do
suff <- ils
skipSpaces
optional $ char ';'
- return $ addPrefix pref $ addSuffix suff $ cits'
+ return $ addPrefix pref $ addSuffix suff cits'
(c:cits, raw) <- withRaw $ grouped parseOne
return $ cite (c{ citationMode = mode }:cits)
(rawInline "latex" $ "\\citetext" ++ raw)
@@ -1275,7 +1274,7 @@ complexNatbibCitation mode = try $ do
parseAligns :: LP [Alignment]
parseAligns = try $ do
char '{'
- let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ try (string "@{}")
+ let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ (char '@' >> braced)
maybeBar
let cAlign = AlignCenter <$ char 'c'
let lAlign = AlignLeft <$ char 'l'
@@ -1289,13 +1288,13 @@ parseAligns = try $ do
return aligns'
hline :: LP ()
-hline = () <$ (try $ spaces >> controlSeq "hline")
+hline = () <$ try (spaces' *> controlSeq "hline" <* spaces')
lbreak :: LP ()
-lbreak = () <$ (try $ spaces *> controlSeq "\\")
+lbreak = () <$ try (spaces' *> controlSeq "\\" <* spaces')
amp :: LP ()
-amp = () <$ (try $ spaces *> char '&')
+amp = () <$ try (spaces' *> char '&')
parseTableRow :: Int -- ^ number of columns
-> LP [Blocks]
@@ -1308,20 +1307,22 @@ parseTableRow cols = try $ do
guard $ cells' /= [mempty]
-- note: a & b in a three-column table leaves an empty 3rd cell:
let cells'' = cells' ++ replicate (cols - numcells) mempty
- spaces
+ spaces'
return cells''
+spaces' :: LP ()
+spaces' = spaces *> skipMany (comment *> spaces)
+
simpTable :: Bool -> LP Blocks
simpTable hasWidthParameter = try $ do
- when hasWidthParameter $ () <$ (spaces >> tok)
- spaces
+ when hasWidthParameter $ () <$ (spaces' >> tok)
+ skipopts
aligns <- parseAligns
let cols = length aligns
optional hline
header' <- option [] $ try (parseTableRow cols <* lbreak <* hline)
rows <- sepEndBy (parseTableRow cols) (lbreak <* optional hline)
- spaces
- skipMany (comment *> spaces)
+ spaces'
let header'' = if null header'
then replicate cols mempty
else header'
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 187b479c3..5e0cef4f8 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7
+{-# LANGUAGE ScopedTypeVariables #-}
{-
Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
@@ -31,11 +32,11 @@ Conversion of markdown-formatted plain text to 'Pandoc' document.
module Text.Pandoc.Readers.Markdown ( readMarkdown,
readMarkdownWithWarnings ) where
-import Data.List ( transpose, sortBy, findIndex, intersperse, intercalate )
+import Data.List ( transpose, sortBy, intersperse, intercalate, elemIndex)
import qualified Data.Map as M
import Data.Scientific (coefficient, base10Exponent)
import Data.Ord ( comparing )
-import Data.Char ( isAlphaNum, toLower )
+import Data.Char ( isSpace, isAlphaNum, toLower )
import Data.Maybe
import Text.Pandoc.Definition
import qualified Data.Text as T
@@ -55,34 +56,41 @@ import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag,
isTextTag, isCommentTag )
import Data.Monoid (mconcat, mempty)
-import Control.Applicative ((<$>), (<*), (*>), (<$))
+import Control.Applicative ((<$>), (<*), (*>), (<$), (<*>))
import Control.Monad
+import Control.Monad.Reader
import System.FilePath (takeExtension, addExtension)
import Text.HTML.TagSoup
import Text.HTML.TagSoup.Match (tagOpen)
import qualified Data.Set as Set
import Text.Printf (printf)
import Debug.Trace (trace)
+import Text.Pandoc.Error
-type MarkdownParser = Parser [Char] ParserState
+type MarkdownParser a = ParserT [Char] ParserState (Reader ParserState) a
-- | Read markdown from an input string and return a Pandoc document.
readMarkdown :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Pandoc
+ -> Either PandocError Pandoc
readMarkdown opts s =
- (readWith parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n")
+ runMarkdown opts s parseMarkdown
-- | Read markdown from an input string and return a pair of a Pandoc document
-- and a list of warnings.
readMarkdownWithWarnings :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> (Pandoc, [String])
-readMarkdownWithWarnings opts s =
- (readWithWarnings parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n")
-
-trimInlinesF :: F Inlines -> F Inlines
-trimInlinesF = liftM trimInlines
+ -> Either PandocError (Pandoc, [String])
+readMarkdownWithWarnings opts s = runMarkdown opts s (returnWarnings parseMarkdown)
+
+runMarkdown :: forall a . ReaderOptions -> String -> MarkdownParser a -> Either PandocError a
+runMarkdown opts inp p = fst <$> res
+ where
+ imd = readWithM (returnState p) def{ stateOptions = opts } (inp ++ "\n\n")
+ res :: Either PandocError (a, ParserState)
+ res = runReader imd s
+ s :: ParserState
+ s = either def snd res
--
-- Constants and data structure definitions
@@ -119,10 +127,10 @@ inList = do
ctx <- stateParserContext <$> getState
guard (ctx == ListItemState)
-isNull :: F Inlines -> Bool
-isNull ils = B.isNull $ runF ils def
+isNull :: Inlines -> Bool
+isNull = B.isNull
-spnl :: Parser [Char] st ()
+spnl :: Monad m => ParserT [Char] st m ()
spnl = try $ do
skipSpaces
optional newline
@@ -162,9 +170,9 @@ litChar = escapedChar'
-- | Parse a sequence of inline elements between square brackets,
-- including inlines between balanced pairs of square brackets.
-inlinesInBalancedBrackets :: MarkdownParser (F Inlines)
+inlinesInBalancedBrackets :: MarkdownParser Inlines
inlinesInBalancedBrackets = charsInBalancedBrackets >>=
- parseFromString (trimInlinesF . mconcat <$> many inline)
+ parseFromString (trimInlines . mconcat <$> many inline)
charsInBalancedBrackets :: MarkdownParser [Char]
charsInBalancedBrackets = do
@@ -181,16 +189,16 @@ charsInBalancedBrackets = do
-- document structure
--
-titleLine :: MarkdownParser (F Inlines)
+titleLine :: MarkdownParser Inlines
titleLine = try $ do
char '%'
skipSpaces
res <- many $ (notFollowedBy newline >> inline)
<|> try (endline >> whitespace)
newline
- return $ trimInlinesF $ mconcat res
+ return $ trimInlines $ mconcat res
-authorsLine :: MarkdownParser (F [Inlines])
+authorsLine :: MarkdownParser [Inlines]
authorsLine = try $ do
char '%'
skipSpaces
@@ -199,13 +207,13 @@ authorsLine = try $ do
(char ';' <|>
try (newline >> notFollowedBy blankline >> spaceChar))
newline
- return $ sequence $ filter (not . isNull) $ map (trimInlinesF . mconcat) authors
+ return $ filter (not . isNull) $ map (trimInlines . mconcat) authors
-dateLine :: MarkdownParser (F Inlines)
+dateLine :: MarkdownParser Inlines
dateLine = try $ do
char '%'
skipSpaces
- trimInlinesF . mconcat <$> manyTill inline newline
+ trimInlines . mconcat <$> manyTill inline newline
titleBlock :: MarkdownParser ()
titleBlock = pandocTitleBlock <|> mmdTitleBlock
@@ -215,20 +223,16 @@ pandocTitleBlock = try $ do
guardEnabled Ext_pandoc_title_block
lookAhead (char '%')
title <- option mempty titleLine
- author <- option (return []) authorsLine
+ author <- option [] authorsLine
date <- option mempty dateLine
optional blanklines
- let meta' = do title' <- title
- author' <- author
- date' <- date
- return $
- (if B.isNull title' then id else B.setMeta "title" title')
- . (if null author' then id else B.setMeta "author" author')
- . (if B.isNull date' then id else B.setMeta "date" date')
- $ nullMeta
- updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
-
-yamlMetaBlock :: MarkdownParser (F Blocks)
+ let meta' = (if B.isNull title then id else B.setMeta "title" title)
+ . (if null author then id else B.setMeta "author" author)
+ . (if B.isNull date then id else B.setMeta "date" date)
+ $ nullMeta
+ updateState $ \st -> st{ stateMeta = stateMeta st <> meta' }
+
+yamlMetaBlock :: MarkdownParser Blocks
yamlMetaBlock = try $ do
guardEnabled Ext_yaml_metadata_block
pos <- getPosition
@@ -241,17 +245,18 @@ yamlMetaBlock = try $ do
optional blanklines
opts <- stateOptions <$> getState
meta' <- case Yaml.decodeEither' $ UTF8.fromString rawYaml of
- Right (Yaml.Object hashmap) -> return $ return $
+ Right (Yaml.Object hashmap) -> return $
H.foldrWithKey (\k v m ->
if ignorable k
then m
- else B.setMeta (T.unpack k)
- (yamlToMeta opts v) m)
+ else case yamlToMeta opts v of
+ Left _ -> m
+ Right v' -> B.setMeta (T.unpack k) v' m)
nullMeta hashmap
- Right Yaml.Null -> return $ return nullMeta
+ Right Yaml.Null -> return nullMeta
Right _ -> do
addWarning (Just pos) "YAML header is not an object"
- return $ return nullMeta
+ return nullMeta
Left err' -> do
case err' of
InvalidYaml (Just YamlParseException{
@@ -270,41 +275,50 @@ yamlMetaBlock = try $ do
_ -> addWarning (Just pos)
$ "Could not parse YAML header: " ++
show err'
- return $ return nullMeta
- updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
+ return nullMeta
+ updateState $ \st -> st{ stateMeta = stateMeta st <> meta' }
return mempty
-- ignore fields ending with _
ignorable :: Text -> Bool
-ignorable t = (T.pack "_") `T.isSuffixOf` t
-
-toMetaValue :: ReaderOptions -> Text -> MetaValue
-toMetaValue opts x =
- case readMarkdown opts (T.unpack x) of
- Pandoc _ [Plain xs] -> MetaInlines xs
- Pandoc _ [Para xs]
+ignorable t = T.pack "_" `T.isSuffixOf` t
+
+toMetaValue :: ReaderOptions -> Text -> Either PandocError MetaValue
+toMetaValue opts x = toMeta <$> readMarkdown opts' (T.unpack x)
+ where
+ toMeta p =
+ case p of
+ Pandoc _ [Plain xs] -> MetaInlines xs
+ Pandoc _ [Para xs]
| endsWithNewline x -> MetaBlocks [Para xs]
| otherwise -> MetaInlines xs
- Pandoc _ bs -> MetaBlocks bs
- where endsWithNewline t = (T.pack "\n") `T.isSuffixOf` t
-
-yamlToMeta :: ReaderOptions -> Yaml.Value -> MetaValue
+ Pandoc _ bs -> MetaBlocks bs
+ endsWithNewline t = T.pack "\n" `T.isSuffixOf` t
+ opts' = opts{readerExtensions=readerExtensions opts `Set.difference` meta_exts}
+ meta_exts = Set.fromList [ Ext_pandoc_title_block
+ , Ext_mmd_title_block
+ , Ext_yaml_metadata_block
+ ]
+
+yamlToMeta :: ReaderOptions -> Yaml.Value -> Either PandocError MetaValue
yamlToMeta opts (Yaml.String t) = toMetaValue opts t
yamlToMeta _ (Yaml.Number n)
-- avoid decimal points for numbers that don't need them:
- | base10Exponent n >= 0 = MetaString $ show
+ | base10Exponent n >= 0 = return $ MetaString $ show
$ coefficient n * (10 ^ base10Exponent n)
- | otherwise = MetaString $ show n
-yamlToMeta _ (Yaml.Bool b) = MetaBool b
-yamlToMeta opts (Yaml.Array xs) = B.toMetaValue $ map (yamlToMeta opts)
- $ V.toList xs
-yamlToMeta opts (Yaml.Object o) = MetaMap $ H.foldrWithKey (\k v m ->
+ | otherwise = return $ MetaString $ show n
+yamlToMeta _ (Yaml.Bool b) = return $ MetaBool b
+yamlToMeta opts (Yaml.Array xs) = B.toMetaValue <$> mapM (yamlToMeta opts)
+ (V.toList xs)
+yamlToMeta opts (Yaml.Object o) = MetaMap <$> H.foldrWithKey (\k v m ->
if ignorable k
then m
- else M.insert (T.unpack k)
- (yamlToMeta opts v) m)
- M.empty o
-yamlToMeta _ _ = MetaString ""
+ else (do
+ v' <- yamlToMeta opts v
+ m' <- m
+ return (M.insert (T.unpack k) v' m')))
+ (return M.empty) o
+yamlToMeta _ _ = return $ MetaString ""
stopLine :: MarkdownParser ()
stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
@@ -314,17 +328,21 @@ mmdTitleBlock = try $ do
guardEnabled Ext_mmd_title_block
kvPairs <- many1 kvPair
blanklines
- updateState $ \st -> st{ stateMeta' = stateMeta' st <>
- return (Meta $ M.fromList kvPairs) }
+ updateState $ \st -> st{ stateMeta = stateMeta st <>
+ (Meta $ M.fromList kvPairs) }
kvPair :: MarkdownParser (String, MetaValue)
kvPair = try $ do
key <- many1Till (alphaNum <|> oneOf "_- ") (char ':')
+ skipMany1 spaceNoNewline
val <- manyTill anyChar
(try $ newline >> lookAhead (blankline <|> nonspaceChar))
+ guard $ not . null . trim $ val
let key' = concat $ words $ map toLower key
let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ trim val
return (key',val')
+ where
+ spaceNoNewline = satisfy (\x -> isSpace x && (x/='\n') && (x/='\r'))
parseMarkdown :: MarkdownParser Pandoc
parseMarkdown = do
@@ -335,11 +353,11 @@ parseMarkdown = do
optional titleBlock
blocks <- parseBlocks
st <- getState
- let meta = runF (stateMeta' st) st
- let Pandoc _ bs = B.doc $ runF blocks st
+ let meta = stateMeta st
+ let Pandoc _ bs = B.doc blocks
return $ Pandoc meta bs
-referenceKey :: MarkdownParser (F Blocks)
+referenceKey :: MarkdownParser Blocks
referenceKey = try $ do
pos <- getPosition
skipNonindentSpaces
@@ -366,7 +384,7 @@ referenceKey = try $ do
Just _ -> addWarning (Just pos) $ "Duplicate link reference `" ++ raw ++ "'"
Nothing -> return ()
updateState $ \s -> s { stateKeys = M.insert key target oldkeys }
- return $ return mempty
+ return mempty
referenceTitle :: MarkdownParser String
referenceTitle = try $ do
@@ -386,7 +404,7 @@ quotedTitle c = try $ do
-- | PHP Markdown Extra style abbreviation key. Currently
-- we just skip them, since Pandoc doesn't have an element for
-- an abbreviation.
-abbrevKey :: MarkdownParser (F Blocks)
+abbrevKey :: MarkdownParser Blocks
abbrevKey = do
guardEnabled Ext_abbreviations
try $ do
@@ -395,7 +413,7 @@ abbrevKey = do
char ':'
skipMany (satisfy (/= '\n'))
blanklines
- return $ return mempty
+ return mempty
noteMarker :: MarkdownParser String
noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']')
@@ -413,7 +431,7 @@ rawLines = do
rest <- many rawLine
return $ unlines (first:rest)
-noteBlock :: MarkdownParser (F Blocks)
+noteBlock :: MarkdownParser Blocks
noteBlock = try $ do
pos <- getPosition
skipNonindentSpaces
@@ -425,7 +443,7 @@ noteBlock = try $ do
rest <- many $ try $ blanklines >> indentSpaces >> rawLines
let raw = unlines (first:rest) ++ "\n"
optional blanklines
- parsed <- parseFromString parseBlocks raw
+ parsed <- parseFromString (inFootnote parseBlocks) raw
let newnote = (ref, parsed)
oldnotes <- stateNotes' <$> getState
case lookup ref oldnotes of
@@ -434,32 +452,40 @@ noteBlock = try $ do
updateState $ \s -> s { stateNotes' = newnote : oldnotes }
return mempty
+inFootnote :: MarkdownParser a -> MarkdownParser a
+inFootnote p = do
+ st <- stateInFootnote <$> getState
+ updateState (\s -> s { stateInFootnote = True } )
+ r <- p
+ updateState (\s -> s { stateInFootnote = st } )
+ return r
+
--
-- parsing blocks
--
-parseBlocks :: MarkdownParser (F Blocks)
+parseBlocks :: MarkdownParser Blocks
parseBlocks = mconcat <$> manyTill block eof
-block :: MarkdownParser (F Blocks)
+block :: MarkdownParser Blocks
block = do
tr <- getOption readerTrace
pos <- getPosition
res <- choice [ mempty <$ blanklines
, codeBlockFenced
, yamlMetaBlock
- , guardEnabled Ext_latex_macros *> (macro >>= return . return)
+ , guardEnabled Ext_latex_macros *> macro
-- note: bulletList needs to be before header because of
-- the possibility of empty list items: -
, bulletList
, header
, lhsCodeBlock
- , rawTeXBlock
, divHtml
, htmlBlock
, table
- , lineBlock
, codeBlockIndented
+ , rawTeXBlock
+ , lineBlock
, blockQuote
, hrule
, orderedList
@@ -470,29 +496,28 @@ block = do
, para
, plain
] <?> "block"
- when tr $ do
- st <- getState
+ when tr $
trace (printf "line %d: %s" (sourceLine pos)
- (take 60 $ show $ B.toList $ runF res st)) (return ())
+ (take 60 . show . B.toList $ res)) (return ())
return res
--
-- header blocks
--
-header :: MarkdownParser (F Blocks)
+header :: MarkdownParser Blocks
header = setextHeader <|> atxHeader <?> "header"
-atxHeader :: MarkdownParser (F Blocks)
+atxHeader :: MarkdownParser Blocks
atxHeader = try $ do
- level <- many1 (char '#') >>= return . length
+ level <- length <$> many1 (char '#')
notFollowedBy $ guardEnabled Ext_fancy_lists >>
(char '.' <|> char ')') -- this would be a list
skipSpaces
- text <- trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline)
+ text <- trimInlines . mconcat <$> many (notFollowedBy atxClosing >> inline)
attr <- atxClosing
- attr' <- registerHeader attr (runF text defaultParserState)
- return $ B.headerWith attr' level <$> text
+ attr' <- registerHeader attr text
+ return $ B.headerWith attr' level text
atxClosing :: MarkdownParser Attr
atxClosing = try $ do
@@ -519,25 +544,25 @@ mmdHeaderIdentifier = do
skipSpaces
return (ident,[],[])
-setextHeader :: MarkdownParser (F Blocks)
+setextHeader :: MarkdownParser Blocks
setextHeader = try $ do
-- This lookahead prevents us from wasting time parsing Inlines
-- unless necessary -- it gives a significant performance boost.
lookAhead $ anyLine >> many1 (oneOf setextHChars) >> blankline
- text <- trimInlinesF . mconcat <$> many1 (notFollowedBy setextHeaderEnd >> inline)
+ text <- trimInlines . mconcat <$> many1 (notFollowedBy setextHeaderEnd >> inline)
attr <- setextHeaderEnd
underlineChar <- oneOf setextHChars
many (char underlineChar)
blanklines
- let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1
- attr' <- registerHeader attr (runF text defaultParserState)
- return $ B.headerWith attr' level <$> text
+ let level = (fromMaybe 0 $ elemIndex underlineChar setextHChars) + 1
+ attr' <- registerHeader attr text
+ return $ B.headerWith attr' level text
--
-- hrule block
--
-hrule :: Parser [Char] st (F Blocks)
+hrule :: Monad m => ParserT [Char] st m Blocks
hrule = try $ do
skipSpaces
start <- satisfy isHruleChar
@@ -545,24 +570,24 @@ hrule = try $ do
skipMany (spaceChar <|> char start)
newline
optional blanklines
- return $ return B.horizontalRule
+ return B.horizontalRule
--
-- code blocks
--
indentedLine :: MarkdownParser String
-indentedLine = indentSpaces >> anyLine >>= return . (++ "\n")
+indentedLine = indentSpaces >> ((++ "\n") <$> anyLine)
-blockDelimiter :: (Char -> Bool)
+blockDelimiter :: Monad m
+ => (Char -> Bool)
-> Maybe Int
- -> Parser [Char] st Int
+ -> ParserT [Char] st m Int
blockDelimiter f len = try $ do
c <- lookAhead (satisfy f)
case len of
Just l -> count l (char c) >> many (char c) >> return l
- Nothing -> count 3 (char c) >> many (char c) >>=
- return . (+ 3) . length
+ Nothing -> count 3 (char c) >> ((+ 3) . length <$> many (char c))
attributes :: MarkdownParser Attr
attributes = try $ do
@@ -607,7 +632,7 @@ specialAttr = do
char '-'
return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs)
-codeBlockFenced :: MarkdownParser (F Blocks)
+codeBlockFenced :: MarkdownParser Blocks
codeBlockFenced = try $ do
c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~'))
<|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`'))
@@ -619,7 +644,7 @@ codeBlockFenced = try $ do
blankline
contents <- manyTill anyLine (blockDelimiter (== c) (Just size))
blanklines
- return $ return $ B.codeBlockWith attr $ intercalate "\n" contents
+ return $ B.codeBlockWith attr $ intercalate "\n" contents
-- correctly handle github language identifiers
toLanguageId :: String -> String
@@ -628,7 +653,7 @@ toLanguageId = map toLower . go
go "objective-c" = "objectivec"
go x = x
-codeBlockIndented :: MarkdownParser (F Blocks)
+codeBlockIndented :: MarkdownParser Blocks
codeBlockIndented = do
contents <- many1 (indentedLine <|>
try (do b <- blanklines
@@ -636,15 +661,15 @@ codeBlockIndented = do
return $ b ++ l))
optional blanklines
classes <- getOption readerIndentedCodeClasses
- return $ return $ B.codeBlockWith ("", classes, []) $
+ return $ B.codeBlockWith ("", classes, []) $
stripTrailingNewlines $ concat contents
-lhsCodeBlock :: MarkdownParser (F Blocks)
+lhsCodeBlock :: MarkdownParser Blocks
lhsCodeBlock = do
guardEnabled Ext_literate_haskell
- (return . B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
+ (B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
(lhsCodeBlockBird <|> lhsCodeBlockLaTeX))
- <|> (return . B.codeBlockWith ("",["sourceCode","haskell"],[]) <$>
+ <|> (B.codeBlockWith ("",["sourceCode","haskell"],[]) <$>
lhsCodeBlockInverseBird)
lhsCodeBlockLaTeX :: MarkdownParser String
@@ -673,7 +698,7 @@ lhsCodeBlockBirdWith c = try $ do
blanklines
return $ intercalate "\n" lns'
-birdTrackLine :: Char -> Parser [Char] st String
+birdTrackLine :: Monad m => Char -> ParserT [Char] st m String
birdTrackLine c = try $ do
char c
-- allow html tags on left margin:
@@ -701,12 +726,12 @@ emailBlockQuote = try $ do
optional blanklines
return raw
-blockQuote :: MarkdownParser (F Blocks)
+blockQuote :: MarkdownParser Blocks
blockQuote = do
raw <- emailBlockQuote
-- parse the extracted block, which may contain various block elements:
- contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n"
- return $ B.blockQuote <$> contents
+ contents <- parseFromString parseBlocks $ intercalate "\n" raw ++ "\n\n"
+ return $ B.blockQuote contents
--
-- list blocks
@@ -749,7 +774,7 @@ anyOrderedListStart = try $ do
return res
listStart :: MarkdownParser ()
-listStart = bulletListStart <|> (anyOrderedListStart >> return ())
+listStart = bulletListStart <|> void anyOrderedListStart
listLine :: MarkdownParser String
listLine = try $ do
@@ -804,7 +829,7 @@ listContinuationLine = try $ do
return $ result ++ "\n"
listItem :: MarkdownParser a
- -> MarkdownParser (F Blocks)
+ -> MarkdownParser Blocks
listItem start = try $ do
first <- rawListItem start
continuations <- many listContinuation
@@ -820,14 +845,14 @@ listItem start = try $ do
updateState (\st -> st {stateParserContext = oldContext})
return contents
-orderedList :: MarkdownParser (F Blocks)
+orderedList :: MarkdownParser Blocks
orderedList = try $ do
(start, style, delim) <- lookAhead anyOrderedListStart
unless (style `elem` [DefaultStyle, Decimal, Example] &&
delim `elem` [DefaultDelim, Period]) $
guardEnabled Ext_fancy_lists
when (style == Example) $ guardEnabled Ext_example_lists
- items <- fmap sequence $ many1 $ listItem
+ items <- many1 $ listItem
( try $ do
optional newline -- if preceded by Plain block in a list
startpos <- sourceColumn <$> getPosition
@@ -839,12 +864,12 @@ orderedList = try $ do
atMostSpaces (tabStop - (endpos - startpos))
return res )
start' <- option 1 $ guardEnabled Ext_startnum >> return start
- return $ B.orderedListWith (start', style, delim) <$> fmap compactify' items
+ return $ B.orderedListWith (start', style, delim) (compactify' items)
-bulletList :: MarkdownParser (F Blocks)
+bulletList :: MarkdownParser Blocks
bulletList = do
- items <- fmap sequence $ many1 $ listItem bulletListStart
- return $ B.bulletList <$> fmap compactify' items
+ items <- many1 $ listItem bulletListStart
+ return $ B.bulletList (compactify' items)
-- definition lists
@@ -859,14 +884,14 @@ defListMarker = do
else mzero
return ()
-definitionListItem :: Bool -> MarkdownParser (F (Inlines, [Blocks]))
+definitionListItem :: Bool -> MarkdownParser (Inlines, [Blocks])
definitionListItem compact = try $ do
rawLine' <- anyLine
raw <- many1 $ defRawBlock compact
- term <- parseFromString (trimInlinesF . mconcat <$> many inline) rawLine'
+ term <- parseFromString (trimInlines . mconcat <$> many inline) rawLine'
contents <- mapM (parseFromString parseBlocks) raw
optional blanklines
- return $ liftM2 (,) term (sequence contents)
+ return (term, contents)
defRawBlock :: Bool -> MarkdownParser String
defRawBlock compact = try $ do
@@ -889,32 +914,32 @@ defRawBlock compact = try $ do
return $ trimr (firstline ++ "\n" ++ unlines rawlines ++ cont) ++
if hasBlank || not (null cont) then "\n\n" else ""
-definitionList :: MarkdownParser (F Blocks)
+definitionList :: MarkdownParser Blocks
definitionList = try $ do
lookAhead (anyLine >> optional blankline >> defListMarker)
compactDefinitionList <|> normalDefinitionList
-compactDefinitionList :: MarkdownParser (F Blocks)
+compactDefinitionList :: MarkdownParser Blocks
compactDefinitionList = do
guardEnabled Ext_compact_definition_lists
- items <- fmap sequence $ many1 $ definitionListItem True
- return $ B.definitionList <$> fmap compactify'DL items
+ items <- many1 $ definitionListItem True
+ return $ B.definitionList (compactify'DL items)
-normalDefinitionList :: MarkdownParser (F Blocks)
+normalDefinitionList :: MarkdownParser Blocks
normalDefinitionList = do
guardEnabled Ext_definition_lists
- items <- fmap sequence $ many1 $ definitionListItem False
- return $ B.definitionList <$> items
+ items <- many1 $ definitionListItem False
+ return $ B.definitionList items
--
-- paragraph block
--
-para :: MarkdownParser (F Blocks)
+para :: MarkdownParser Blocks
para = try $ do
exts <- getOption readerExtensions
- result <- trimInlinesF . mconcat <$> many1 inline
- option (B.plain <$> result)
+ result <- trimInlines . mconcat <$> many1 inline
+ option (B.plain result)
$ try $ do
newline
(blanklines >> return mempty)
@@ -931,18 +956,17 @@ para = try $ do
Just "div" -> () <$
lookAhead (htmlTag (~== TagClose "div"))
_ -> mzero
- return $ do
- result' <- result
- case B.toList result' of
+ return $
+ case B.toList result of
[Image alt (src,tit)]
| Ext_implicit_figures `Set.member` exts ->
-- the fig: at beginning of title indicates a figure
- return $ B.para $ B.singleton
+ B.para $ B.singleton
$ Image alt (src,'f':'i':'g':':':tit)
- _ -> return $ B.para result'
+ _ -> B.para result
-plain :: MarkdownParser (F Blocks)
-plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline
+plain :: MarkdownParser Blocks
+plain = B.plain . trimInlines . mconcat <$> many1 inline
--
-- raw html
@@ -953,13 +977,13 @@ htmlElement = rawVerbatimBlock
<|> strictHtmlBlock
<|> liftM snd (htmlTag isBlockTag)
-htmlBlock :: MarkdownParser (F Blocks)
+htmlBlock :: MarkdownParser Blocks
htmlBlock = do
guardEnabled Ext_raw_html
try (do
(TagOpen t attrs) <- lookAhead $ fst <$> htmlTag isBlockTag
(guard (t `elem` ["pre","style","script"]) >>
- (return . B.rawBlock "html") <$> rawVerbatimBlock)
+ B.rawBlock "html" <$> rawVerbatimBlock)
<|> (do guardEnabled Ext_markdown_attribute
oldMarkdownAttribute <- stateMarkdownAttribute <$> getState
markdownAttribute <-
@@ -978,35 +1002,35 @@ htmlBlock = do
<|> (guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks))
<|> htmlBlock'
-htmlBlock' :: MarkdownParser (F Blocks)
+htmlBlock' :: MarkdownParser Blocks
htmlBlock' = try $ do
first <- htmlElement
skipMany spaceChar
optional blanklines
- return $ return $ B.rawBlock "html" first
+ return $ B.rawBlock "html" first
strictHtmlBlock :: MarkdownParser String
strictHtmlBlock = htmlInBalanced (not . isInlineTag)
rawVerbatimBlock :: MarkdownParser String
rawVerbatimBlock = try $ do
- (TagOpen tag _, open) <- htmlTag (tagOpen (flip elem
- ["pre", "style", "script"])
- (const True))
+ (TagOpen tag _, open) <-
+ htmlTag (tagOpen (`elem` ["pre", "style", "script"])
+ (const True))
contents <- manyTill anyChar (htmlTag (~== TagClose tag))
return $ open ++ contents ++ renderTags' [TagClose tag]
-rawTeXBlock :: MarkdownParser (F Blocks)
+rawTeXBlock :: MarkdownParser Blocks
rawTeXBlock = do
guardEnabled Ext_raw_tex
result <- (B.rawBlock "latex" . concat <$>
- rawLaTeXBlock `sepEndBy1` blankline)
+ generalize rawLaTeXBlock `sepEndBy1` blankline)
<|> (B.rawBlock "context" . concat <$>
rawConTeXtEnvironment `sepEndBy1` blankline)
spaces
- return $ return result
+ return result
-rawHtmlBlocks :: MarkdownParser (F Blocks)
+rawHtmlBlocks :: MarkdownParser Blocks
rawHtmlBlocks = do
(TagOpen tagtype _, raw) <- htmlTag isBlockTag
-- try to find closing tag
@@ -1018,10 +1042,10 @@ rawHtmlBlocks = do
contents <- mconcat <$> many (notFollowedBy' closer >> block)
result <-
(closer >>= \(_, rawcloser) -> return (
- return (B.rawBlock "html" $ stripMarkdownAttribute raw) <>
+ (B.rawBlock "html" $ stripMarkdownAttribute raw) <>
contents <>
- return (B.rawBlock "html" rawcloser)))
- <|> return (return (B.rawBlock "html" raw) <> contents)
+ (B.rawBlock "html" rawcloser)))
+ <|> return (B.rawBlock "html" raw <> contents)
updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
return result
@@ -1036,12 +1060,12 @@ stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s
-- line block
--
-lineBlock :: MarkdownParser (F Blocks)
+lineBlock :: MarkdownParser Blocks
lineBlock = try $ do
guardEnabled Ext_line_blocks
lines' <- lineBlockLines >>=
- mapM (parseFromString (trimInlinesF . mconcat <$> many inline))
- return $ B.para <$> (mconcat $ intersperse (return B.linebreak) lines')
+ mapM (parseFromString (trimInlines . mconcat <$> many inline))
+ return $ B.para (mconcat $ intersperse B.linebreak lines')
--
-- Tables
@@ -1049,17 +1073,19 @@ lineBlock = try $ do
-- Parse a dashed line with optional trailing spaces; return its length
-- and the length including trailing space.
-dashedLine :: Char
- -> Parser [Char] st (Int, Int)
+dashedLine :: Monad m => Char
+ -> ParserT [Char] st m (Int, Int)
dashedLine ch = do
dashes <- many1 (char ch)
sp <- many spaceChar
- return $ (length dashes, length $ dashes ++ sp)
+ let lengthDashes = length dashes
+ lengthSp = length sp
+ return (lengthDashes, lengthDashes + lengthSp)
-- Parse a table header with dashed lines of '-' preceded by
-- one (or zero) line of text.
simpleTableHeader :: Bool -- ^ Headerless table
- -> MarkdownParser (F [Blocks], [Alignment], [Int])
+ -> MarkdownParser ([Blocks], [Alignment], [Int])
simpleTableHeader headless = try $ do
rawContent <- if headless
then return ""
@@ -1078,9 +1104,8 @@ simpleTableHeader headless = try $ do
let rawHeads' = if headless
then replicate (length dashes) ""
else rawHeads
- heads <- fmap sequence
- $ mapM (parseFromString (mconcat <$> many plain))
- $ map trim rawHeads'
+ heads <-
+ mapM (parseFromString (mconcat <$> many plain) . trim) rawHeads'
return (heads, aligns, indices)
-- Returns an alignment type for a table, based on a list of strings
@@ -1121,30 +1146,30 @@ rawTableLine indices = do
-- Parse a table line and return a list of lists of blocks (columns).
tableLine :: [Int]
- -> MarkdownParser (F [Blocks])
+ -> MarkdownParser [Blocks]
tableLine indices = rawTableLine indices >>=
- fmap sequence . mapM (parseFromString (mconcat <$> many plain))
+ mapM (parseFromString (mconcat <$> many plain))
-- Parse a multiline table row and return a list of blocks (columns).
multilineRow :: [Int]
- -> MarkdownParser (F [Blocks])
+ -> MarkdownParser [Blocks]
multilineRow indices = do
colLines <- many1 (rawTableLine indices)
let cols = map unlines $ transpose colLines
- fmap sequence $ mapM (parseFromString (mconcat <$> many plain)) cols
+ mapM (parseFromString (mconcat <$> many plain)) cols
-- Parses a table caption: inlines beginning with 'Table:'
-- and followed by blank lines.
-tableCaption :: MarkdownParser (F Inlines)
+tableCaption :: MarkdownParser Inlines
tableCaption = try $ do
guardEnabled Ext_table_captions
skipNonindentSpaces
string ":" <|> string "Table:"
- trimInlinesF . mconcat <$> many1 inline <* blanklines
+ trimInlines . mconcat <$> many1 inline <* blanklines
-- Parse a simple table with '---' header and one line per row.
simpleTable :: Bool -- ^ Headerless table
- -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]])
+ -> MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]])
simpleTable headless = do
(aligns, _widths, heads', lines') <-
tableWith (simpleTableHeader headless) tableLine
@@ -1158,12 +1183,12 @@ simpleTable headless = do
-- which may be multiline, separated by blank lines, and
-- ending with a footer (dashed line followed by blank line).
multilineTable :: Bool -- ^ Headerless table
- -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]])
+ -> MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]])
multilineTable headless =
tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter
multilineTableHeader :: Bool -- ^ Headerless table
- -> MarkdownParser (F [Blocks], [Alignment], [Int])
+ -> MarkdownParser ([Blocks], [Alignment], [Int])
multilineTableHeader headless = try $ do
unless headless $
tableSep >> notFollowedBy blankline
@@ -1185,7 +1210,7 @@ multilineTableHeader headless = try $ do
let rawHeads = if headless
then replicate (length dashes) ""
else map (unlines . map trim) rawHeadsList
- heads <- fmap sequence $
+ heads <-
mapM (parseFromString (mconcat <$> many plain)) $
map trim rawHeads
return (heads, aligns, indices)
@@ -1195,7 +1220,7 @@ multilineTableHeader headless = try $ do
-- which may be grid, separated by blank lines, and
-- ending with a footer (dashed line followed by blank line).
gridTable :: Bool -- ^ Headerless table
- -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]])
+ -> MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]])
gridTable headless =
tableWith (gridTableHeader headless) gridTableRow
(gridTableSep '-') gridTableFooter
@@ -1204,13 +1229,14 @@ gridTableSplitLine :: [Int] -> String -> [String]
gridTableSplitLine indices line = map removeFinalBar $ tail $
splitStringByIndices (init indices) $ trimr line
-gridPart :: Char -> Parser [Char] st (Int, Int)
+gridPart :: Monad m => Char -> ParserT [Char] st m (Int, Int)
gridPart ch = do
dashes <- many1 (char ch)
char '+'
- return (length dashes, length dashes + 1)
+ let lengthDashes = length dashes
+ return (lengthDashes, lengthDashes + 1)
-gridDashedLines :: Char -> Parser [Char] st [(Int,Int)]
+gridDashedLines :: Monad m => Char -> ParserT [Char] st m [(Int,Int)]
gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
removeFinalBar :: String -> String
@@ -1223,7 +1249,7 @@ gridTableSep ch = try $ gridDashedLines ch >> return '\n'
-- | Parse header for a grid table.
gridTableHeader :: Bool -- ^ Headerless table
- -> MarkdownParser (F [Blocks], [Alignment], [Int])
+ -> MarkdownParser ([Blocks], [Alignment], [Int])
gridTableHeader headless = try $ do
optional blanklines
dashes <- gridDashedLines '-'
@@ -1232,9 +1258,7 @@ gridTableHeader headless = try $ do
else many1
(notFollowedBy (gridTableSep '=') >> char '|' >>
many1Till anyChar newline)
- if headless
- then return ()
- else gridTableSep '=' >> return ()
+ unless headless (void $ gridTableSep '=')
let lines' = map snd dashes
let indices = scanl (+) 0 lines'
let aligns = replicate (length lines') AlignDefault
@@ -1243,7 +1267,7 @@ gridTableHeader headless = try $ do
then replicate (length dashes) ""
else map (unlines . map trim) $ transpose
$ map (gridTableSplitLine indices) rawContent
- heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads
+ heads <- mapM (parseFromString parseBlocks . trim) rawHeads
return (heads, aligns, indices)
gridTableRawLine :: [Int] -> MarkdownParser [String]
@@ -1254,12 +1278,12 @@ gridTableRawLine indices = do
-- | Parse row of grid table.
gridTableRow :: [Int]
- -> MarkdownParser (F [Blocks])
+ -> MarkdownParser [Blocks]
gridTableRow indices = do
colLines <- many1 (gridTableRawLine indices)
let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $
transpose colLines
- fmap compactify' <$> fmap sequence (mapM (parseFromString parseBlocks) cols)
+ compactify' <$> mapM (parseFromString parseBlocks) cols
removeOneLeadingSpace :: [String] -> [String]
removeOneLeadingSpace xs =
@@ -1285,16 +1309,12 @@ pipeBreak = try $ do
blankline
return (first:rest)
-pipeTable :: MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]])
+pipeTable :: MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]])
pipeTable = try $ do
- (heads,aligns) <- try ( pipeBreak >>= \als ->
- return (return $ replicate (length als) mempty, als))
- <|> ( pipeTableRow >>= \row -> pipeBreak >>= \als ->
-
- return (row, als) )
- lines' <- sequence <$> many1 pipeTableRow
+ (heads,aligns) <- (,) <$> pipeTableRow <*> pipeBreak
+ lines' <- many pipeTableRow
let widths = replicate (length aligns) 0.0
- return $ (aligns, widths, heads, lines')
+ return (aligns, widths, heads, lines')
sepPipe :: MarkdownParser ()
sepPipe = try $ do
@@ -1302,7 +1322,7 @@ sepPipe = try $ do
notFollowedBy blankline
-- parse a row, also returning probable alignments for org-table cells
-pipeTableRow :: MarkdownParser (F [Blocks])
+pipeTableRow :: MarkdownParser [Blocks]
pipeTableRow = do
nonindentSpaces
openPipe <- (True <$ char '|') <|> return False
@@ -1314,16 +1334,14 @@ pipeTableRow = do
guard $ not (null rest && not openPipe)
optional (char '|')
blankline
- let cells = sequence (first:rest)
- return $ do
- cells' <- cells
- return $ map
- (\ils ->
+ let cells = first:rest
+ return $
+ map (\ils ->
case trimInlines ils of
ils' | B.isNull ils' -> mempty
- | otherwise -> B.plain $ ils') cells'
+ | otherwise -> B.plain ils') cells
-pipeTableHeaderPart :: Parser [Char] st Alignment
+pipeTableHeaderPart :: Monad m => ParserT [Char] st m Alignment
pipeTableHeaderPart = try $ do
skipMany spaceChar
left <- optionMaybe (char ':')
@@ -1338,7 +1356,7 @@ pipeTableHeaderPart = try $ do
(Just _,Just _) -> AlignCenter
-- Succeed only if current line contains a pipe.
-scanForPipe :: Parser [Char] st ()
+scanForPipe :: Monad m => ParserT [Char] st m ()
scanForPipe = do
inp <- getInput
case break (\c -> c == '\n' || c == '|') inp of
@@ -1348,22 +1366,22 @@ scanForPipe = do
-- | Parse a table using 'headerParser', 'rowParser',
-- 'lineParser', and 'footerParser'. Variant of the version in
-- Text.Pandoc.Parsing.
-tableWith :: MarkdownParser (F [Blocks], [Alignment], [Int])
- -> ([Int] -> MarkdownParser (F [Blocks]))
+tableWith :: MarkdownParser ([Blocks], [Alignment], [Int])
+ -> ([Int] -> MarkdownParser [Blocks])
-> MarkdownParser sep
-> MarkdownParser end
- -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]])
+ -> MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]])
tableWith headerParser rowParser lineParser footerParser = try $ do
(heads, aligns, indices) <- headerParser
- lines' <- fmap sequence $ rowParser indices `sepEndBy1` lineParser
+ lines' <- rowParser indices `sepEndBy1` lineParser
footerParser
numColumns <- getOption readerColumns
- let widths = if (indices == [])
- then replicate (length aligns) 0.0
- else widthsFromIndices numColumns indices
- return $ (aligns, widths, heads, lines')
+ let widths = case indices of
+ [] -> replicate (length aligns) 0.0
+ _ -> widthsFromIndices numColumns indices
+ return (aligns, widths, heads, lines')
-table :: MarkdownParser (F Blocks)
+table :: MarkdownParser Blocks
table = try $ do
frontCaption <- option Nothing (Just <$> tableCaption)
(aligns, widths, heads, lns) <-
@@ -1378,19 +1396,15 @@ table = try $ do
(gridTable False <|> gridTable True)) <?> "table"
optional blanklines
caption <- case frontCaption of
- Nothing -> option (return mempty) tableCaption
+ Nothing -> option mempty tableCaption
Just c -> return c
- return $ do
- caption' <- caption
- heads' <- heads
- lns' <- lns
- return $ B.table caption' (zip aligns widths) heads' lns'
+ return $ B.table caption (zip aligns widths) heads lns
--
-- inline
--
-inline :: MarkdownParser (F Inlines)
+inline :: MarkdownParser Inlines
inline = choice [ whitespace
, bareURL
, str
@@ -1413,7 +1427,7 @@ inline = choice [ whitespace
, rawLaTeXInline'
, exampleRef
, smart
- , return . B.singleton <$> charRef
+ , B.singleton <$> charRef
, symbol
, ltSign
] <?> "inline"
@@ -1424,43 +1438,42 @@ escapedChar' = try $ do
(guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum))
<|> oneOf "\\`*_{}[]()>#+-.!~\""
-escapedChar :: MarkdownParser (F Inlines)
+escapedChar :: MarkdownParser Inlines
escapedChar = do
result <- escapedChar'
case result of
- ' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space
+ ' ' -> return $ B.str "\160" -- "\ " is a nonbreaking space
'\n' -> guardEnabled Ext_escaped_line_breaks >>
- return (return B.linebreak) -- "\[newline]" is a linebreak
- _ -> return $ return $ B.str [result]
+ return B.linebreak -- "\[newline]" is a linebreak
+ _ -> return $ B.str [result]
-ltSign :: MarkdownParser (F Inlines)
+ltSign :: MarkdownParser Inlines
ltSign = do
guardDisabled Ext_raw_html
<|> (notFollowedByHtmlCloser >> notFollowedBy' (htmlTag isBlockTag))
char '<'
- return $ return $ B.str "<"
+ return $ B.str "<"
-exampleRef :: MarkdownParser (F Inlines)
+exampleRef :: MarkdownParser Inlines
exampleRef = try $ do
guardEnabled Ext_example_lists
char '@'
lab <- many1 (alphaNum <|> oneOf "-_")
- return $ do
- st <- askF
- return $ case M.lookup lab (stateExamples st) of
- Just n -> B.str (show n)
- Nothing -> B.str ('@':lab)
+ st <- ask
+ return $ case M.lookup lab (stateExamples st) of
+ Just n -> B.str (show n)
+ Nothing -> B.str ('@':lab)
-symbol :: MarkdownParser (F Inlines)
+symbol :: MarkdownParser Inlines
symbol = do
result <- noneOf "<\\\n\t "
<|> try (do lookAhead $ char '\\'
notFollowedBy' (() <$ rawTeXBlock)
char '\\')
- return $ return $ B.str [result]
+ return $ B.str [result]
-- parses inline code, between n `s and n `s
-code :: MarkdownParser (F Inlines)
+code :: MarkdownParser Inlines
code = try $ do
starts <- many1 (char '`')
skipSpaces
@@ -1470,16 +1483,17 @@ code = try $ do
notFollowedBy (char '`')))
attr <- option ([],[],[]) (try $ guardEnabled Ext_inline_code_attributes >>
optional whitespace >> attributes)
- return $ return $ B.codeWith attr $ trim $ concat result
+ return $ B.codeWith attr $ trim $ concat result
-math :: MarkdownParser (F Inlines)
-math = (return . B.displayMath <$> (mathDisplay >>= applyMacros'))
- <|> (return . B.math <$> (mathInline >>= applyMacros'))
+math :: MarkdownParser Inlines
+math = (B.displayMath <$> (mathDisplay >>= applyMacros'))
+ <|> ((B.math <$> (mathInline >>= applyMacros')) <+?>
+ ((getOption readerSmart >>= guard) *> apostrophe <* notFollowedBy space))
-- Parses material enclosed in *s, **s, _s, or __s.
-- Designed to avoid backtracking.
enclosure :: Char
- -> MarkdownParser (F Inlines)
+ -> MarkdownParser Inlines
enclosure c = do
-- we can't start an enclosure with _ if after a string and
-- the intraword_underscores extension is enabled:
@@ -1487,13 +1501,13 @@ enclosure c = do
<|> guard (c == '*')
<|> (guard =<< notAfterString)
cs <- many1 (char c)
- (return (B.str cs) <>) <$> whitespace
- <|> do
+ (B.str cs <>) <$> whitespace
+ <|>
case length cs of
3 -> three c
2 -> two c mempty
1 -> one c mempty
- _ -> return (return $ B.str cs)
+ _ -> return $ B.str cs
ender :: Char -> Int -> MarkdownParser ()
ender c n = try $ do
@@ -1506,74 +1520,74 @@ ender c n = try $ do
-- If one c, emit emph and then parse two.
-- If two cs, emit strong and then parse one.
-- Otherwise, emit ccc then the results.
-three :: Char -> MarkdownParser (F Inlines)
+three :: Char -> MarkdownParser Inlines
three c = do
contents <- mconcat <$> many (notFollowedBy (ender c 1) >> inline)
- (ender c 3 >> return ((B.strong . B.emph) <$> contents))
- <|> (ender c 2 >> one c (B.strong <$> contents))
- <|> (ender c 1 >> two c (B.emph <$> contents))
- <|> return (return (B.str [c,c,c]) <> contents)
+ (ender c 3 >> return ((B.strong . B.emph) contents))
+ <|> (ender c 2 >> one c (B.strong contents))
+ <|> (ender c 1 >> two c (B.emph contents))
+ <|> return (B.str [c,c,c] <> contents)
-- Parse inlines til you hit two c's, and emit strong.
-- If you never do hit two cs, emit ** plus inlines parsed.
-two :: Char -> F Inlines -> MarkdownParser (F Inlines)
+two :: Char -> Inlines -> MarkdownParser Inlines
two c prefix' = do
contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline)
- (ender c 2 >> return (B.strong <$> (prefix' <> contents)))
- <|> return (return (B.str [c,c]) <> (prefix' <> contents))
+ (ender c 2 >> return (B.strong (prefix' <> contents)))
+ <|> return (B.str [c,c] <> (prefix' <> contents))
-- Parse inlines til you hit a c, and emit emph.
-- If you never hit a c, emit * plus inlines parsed.
-one :: Char -> F Inlines -> MarkdownParser (F Inlines)
+one :: Char -> Inlines -> MarkdownParser Inlines
one c prefix' = do
contents <- mconcat <$> many ( (notFollowedBy (ender c 1) >> inline)
<|> try (string [c,c] >>
notFollowedBy (ender c 1) >>
two c mempty) )
- (ender c 1 >> return (B.emph <$> (prefix' <> contents)))
- <|> return (return (B.str [c]) <> (prefix' <> contents))
+ (ender c 1 >> return (B.emph (prefix' <> contents)))
+ <|> return (B.str [c] <> (prefix' <> contents))
-strongOrEmph :: MarkdownParser (F Inlines)
+strongOrEmph :: MarkdownParser Inlines
strongOrEmph = enclosure '*' <|> enclosure '_'
--- | Parses a list of inlines between start and end delimiters.
+-- | Parses a list oInlines between start and end delimiters.
inlinesBetween :: (Show b)
=> MarkdownParser a
-> MarkdownParser b
- -> MarkdownParser (F Inlines)
+ -> MarkdownParser Inlines
inlinesBetween start end =
- (trimInlinesF . mconcat) <$> try (start >> many1Till inner end)
+ (trimInlines . mconcat) <$> try (start >> many1Till inner end)
where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
innerSpace = try $ whitespace <* notFollowedBy' end
-strikeout :: MarkdownParser (F Inlines)
-strikeout = fmap B.strikeout <$>
+strikeout :: MarkdownParser Inlines
+strikeout = B.strikeout <$>
(guardEnabled Ext_strikeout >> inlinesBetween strikeStart strikeEnd)
where strikeStart = string "~~" >> lookAhead nonspaceChar
>> notFollowedBy (char '~')
strikeEnd = try $ string "~~"
-superscript :: MarkdownParser (F Inlines)
-superscript = fmap B.superscript <$> try (do
+superscript :: MarkdownParser Inlines
+superscript = B.superscript <$> try (do
guardEnabled Ext_superscript
char '^'
mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '^'))
-subscript :: MarkdownParser (F Inlines)
-subscript = fmap B.subscript <$> try (do
+subscript :: MarkdownParser Inlines
+subscript = B.subscript <$> try (do
guardEnabled Ext_subscript
char '~'
mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '~'))
-whitespace :: MarkdownParser (F Inlines)
-whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
+whitespace :: MarkdownParser Inlines
+whitespace = spaceChar >> (lb <|> regsp) <?> "whitespace"
where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak)
regsp = skipMany spaceChar >> return B.space
-nonEndline :: Parser [Char] st Char
+nonEndline :: Monad m => ParserT [Char] st m Char
nonEndline = satisfy (/='\n')
-str :: MarkdownParser (F Inlines)
+str :: MarkdownParser Inlines
str = do
result <- many1 alphaNum
updateLastStrPos
@@ -1581,14 +1595,14 @@ str = do
isSmart <- getOption readerSmart
if isSmart
then case likelyAbbrev result of
- [] -> return $ return $ B.str result
+ [] -> return $ B.str result
xs -> choice (map (\x ->
try (string x >> oneOf " \n" >>
lookAhead alphaNum >>
- return (return $ B.str
- $ result ++ spacesToNbr x ++ "\160"))) xs)
- <|> (return $ return $ B.str result)
- else return $ return $ B.str result
+ return (B.str $
+ result ++ spacesToNbr x ++ "\160"))) xs)
+ <|> (return $ B.str result)
+ else return $ B.str result
-- | if the string matches the beginning of an abbreviation (before
-- the first period, return strings that would finish the abbreviation.
@@ -1603,7 +1617,7 @@ likelyAbbrev x =
in map snd $ filter (\(y,_) -> y == x) abbrPairs
-- an endline character that can be treated as a space, not a structural break
-endline :: MarkdownParser (F Inlines)
+endline :: MarkdownParser Inlines
endline = try $ do
newline
notFollowedBy blankline
@@ -1616,18 +1630,18 @@ endline = try $ do
notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced))
notFollowedByHtmlCloser
(eof >> return mempty)
- <|> (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak))
+ <|> (guardEnabled Ext_hard_line_breaks >> return B.linebreak)
<|> (guardEnabled Ext_ignore_line_breaks >> return mempty)
- <|> (return $ return B.space)
+ <|> return B.space
--
-- links
--
-- a reference label for a link
-reference :: MarkdownParser (F Inlines, String)
+reference :: MarkdownParser (Inlines, String)
reference = do notFollowedBy' (string "[^") -- footnote reference
- withRaw $ trimInlinesF <$> inlinesInBalancedBrackets
+ withRaw $ trimInlines <$> inlinesInBalancedBrackets
parenthesizedChars :: MarkdownParser [Char]
parenthesizedChars = do
@@ -1655,7 +1669,7 @@ source = do
linkTitle :: MarkdownParser String
linkTitle = quotedTitle '"' <|> quotedTitle '\''
-link :: MarkdownParser (F Inlines)
+link :: MarkdownParser Inlines
link = try $ do
st <- getState
guard $ stateAllowLinks st
@@ -1665,44 +1679,43 @@ link = try $ do
regLink B.link lab <|> referenceLink B.link (lab,raw)
regLink :: (String -> String -> Inlines -> Inlines)
- -> F Inlines -> MarkdownParser (F Inlines)
+ -> Inlines -> MarkdownParser Inlines
regLink constructor lab = try $ do
(src, tit) <- source
- return $ constructor src tit <$> lab
+ return $ constructor src tit lab
-- a link like [this][ref] or [this][] or [this]
referenceLink :: (String -> String -> Inlines -> Inlines)
- -> (F Inlines, String) -> MarkdownParser (F Inlines)
+ -> (Inlines, String) -> MarkdownParser Inlines
referenceLink constructor (lab, raw) = do
sp <- (True <$ lookAhead (char ' ')) <|> return False
(ref,raw') <- option (mempty, "") $
lookAhead (try (spnl >> normalCite >> return (mempty, "")))
<|>
try (spnl >> reference)
+ when (raw' == "") $ guardEnabled Ext_shortcut_reference_links
let labIsRef = raw' == "" || raw' == "[]"
let key = toKey $ if labIsRef then raw else raw'
parsedRaw <- parseFromString (mconcat <$> many inline) raw'
fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw
implicitHeaderRefs <- option False $
True <$ guardEnabled Ext_implicit_header_references
- let makeFallback = do
- parsedRaw' <- parsedRaw
- fallback' <- fallback
- return $ B.str "[" <> fallback' <> B.str "]" <>
+ let makeFallback =
+ B.str "[" <> fallback <> B.str "]" <>
(if sp && not (null raw) then B.space else mempty) <>
- parsedRaw'
- return $ do
- keys <- asksF stateKeys
- case M.lookup key keys of
- Nothing -> do
- headers <- asksF stateHeaders
- ref' <- if labIsRef then lab else ref
- if implicitHeaderRefs
- then case M.lookup ref' headers of
- Just ident -> constructor ('#':ident) "" <$> lab
- Nothing -> makeFallback
- else makeFallback
- Just (src,tit) -> constructor src tit <$> lab
+ parsedRaw
+ keys <- asks stateKeys
+ headers <- asks stateHeaders
+ return $
+ case M.lookup key keys of
+ Nothing ->
+ let ref' = if labIsRef then lab else ref in
+ if implicitHeaderRefs
+ then case M.lookup ref' headers of
+ Just ident -> constructor ('#':ident) "" lab
+ Nothing -> makeFallback
+ else makeFallback
+ Just (src,tit) -> constructor src tit lab
dropBrackets :: String -> String
dropBrackets = reverse . dropRB . reverse . dropLB
@@ -1711,14 +1724,14 @@ dropBrackets = reverse . dropRB . reverse . dropLB
dropLB ('[':xs) = xs
dropLB xs = xs
-bareURL :: MarkdownParser (F Inlines)
+bareURL :: MarkdownParser Inlines
bareURL = try $ do
guardEnabled Ext_autolink_bare_uris
(orig, src) <- uri <|> emailAddress
notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a")
- return $ return $ B.link src "" (B.str orig)
+ return $ B.link src "" (B.str orig)
-autoLink :: MarkdownParser (F Inlines)
+autoLink :: MarkdownParser Inlines
autoLink = try $ do
char '<'
(orig, src) <- uri <|> emailAddress
@@ -1727,9 +1740,9 @@ autoLink = try $ do
-- final punctuation. for example: in `<http://hi---there>`,
-- the URI parser will stop before the dashes.
extra <- fromEntities <$> manyTill nonspaceChar (char '>')
- return $ return $ B.link (src ++ escapeURI extra) "" (B.str $ orig ++ extra)
+ return $ B.link (src ++ escapeURI extra) "" (B.str $ orig ++ extra)
-image :: MarkdownParser (F Inlines)
+image :: MarkdownParser Inlines
image = try $ do
char '!'
(lab,raw) <- reference
@@ -1739,38 +1752,33 @@ image = try $ do
_ -> B.image src
regLink constructor lab <|> referenceLink constructor (lab,raw)
-note :: MarkdownParser (F Inlines)
+note :: MarkdownParser Inlines
note = try $ do
guardEnabled Ext_footnotes
+ (stateInFootnote <$> getState) >>= guard . not
ref <- noteMarker
- return $ do
- notes <- asksF stateNotes'
+ notes <- asks stateNotes'
+ return $
case lookup ref notes of
- Nothing -> return $ B.str $ "[^" ++ ref ++ "]"
- Just contents -> do
- st <- askF
- -- process the note in a context that doesn't resolve
- -- notes, to avoid infinite looping with notes inside
- -- notes:
- let contents' = runF contents st{ stateNotes' = [] }
- return $ B.note contents'
-
-inlineNote :: MarkdownParser (F Inlines)
+ Nothing -> B.str $ "[^" ++ ref ++ "]"
+ Just contents -> B.note contents
+
+inlineNote :: MarkdownParser Inlines
inlineNote = try $ do
guardEnabled Ext_inline_notes
char '^'
contents <- inlinesInBalancedBrackets
- return $ B.note . B.para <$> contents
+ return . B.note . B.para $ contents
-rawLaTeXInline' :: MarkdownParser (F Inlines)
+rawLaTeXInline' :: MarkdownParser Inlines
rawLaTeXInline' = try $ do
guardEnabled Ext_raw_tex
lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env
- RawInline _ s <- rawLaTeXInline
- return $ return $ B.rawInline "tex" s
+ RawInline _ s <- generalize rawLaTeXInline
+ return $ B.rawInline "tex" s
-- "tex" because it might be context or latex
-rawConTeXtEnvironment :: Parser [Char] st String
+rawConTeXtEnvironment :: Monad m => ParserT [Char] st m String
rawConTeXtEnvironment = try $ do
string "\\start"
completion <- inBrackets (letter <|> digit <|> spaceChar)
@@ -1779,14 +1787,14 @@ rawConTeXtEnvironment = try $ do
(try $ string "\\stop" >> string completion)
return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion
-inBrackets :: (Parser [Char] st Char) -> Parser [Char] st String
+inBrackets :: Monad m => (ParserT [Char] st m Char) -> ParserT [Char] st m String
inBrackets parser = do
char '['
contents <- many parser
char ']'
return $ "[" ++ contents ++ "]"
-spanHtml :: MarkdownParser (F Inlines)
+spanHtml :: MarkdownParser Inlines
spanHtml = try $ do
guardEnabled Ext_native_spans
(TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" [])
@@ -1798,10 +1806,10 @@ spanHtml = try $ do
Just s | null ident && null classes &&
map toLower (filter (`notElem` " \t;") s) ==
"font-variant:small-caps"
- -> return $ B.smallcaps <$> contents
- _ -> return $ B.spanWith (ident, classes, keyvals) <$> contents
+ -> return $ B.smallcaps contents
+ _ -> return $ B.spanWith (ident, classes, keyvals) contents
-divHtml :: MarkdownParser (F Blocks)
+divHtml :: MarkdownParser Blocks
divHtml = try $ do
guardEnabled Ext_native_divs
(TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" [])
@@ -1819,11 +1827,11 @@ divHtml = try $ do
let ident = fromMaybe "" $ lookup "id" attrs
let classes = maybe [] words $ lookup "class" attrs
let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
- return $ B.divWith (ident, classes, keyvals) <$> contents
+ return $ B.divWith (ident, classes, keyvals) contents
else -- avoid backtracing
- return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents
+ return $ B.rawBlock "html" (rawtag <> bls) <> contents
-rawHtmlInline :: MarkdownParser (F Inlines)
+rawHtmlInline :: MarkdownParser Inlines
rawHtmlInline = do
guardEnabled Ext_raw_html
inHtmlBlock <- stateInHtmlBlock <$> getState
@@ -1838,19 +1846,17 @@ rawHtmlInline = do
then (\x -> isInlineTag x &&
not (isCloseBlockTag x))
else not . isTextTag
- return $ return $ B.rawInline "html" result
+ return $ B.rawInline "html" result
-- Citations
-cite :: MarkdownParser (F Inlines)
+cite :: MarkdownParser Inlines
cite = do
guardEnabled Ext_citations
- citations <- textualCite
- <|> do (cs, raw) <- withRaw normalCite
- return $ (flip B.cite (B.text raw)) <$> cs
- return citations
+ textualCite <|> do (cs, raw) <- withRaw normalCite
+ return $ B.cite cs (B.text raw)
-textualCite :: MarkdownParser (F Inlines)
+textualCite :: MarkdownParser Inlines
textualCite = try $ do
(_, key) <- citeKey
let first = Citation{ citationId = key
@@ -1864,29 +1870,26 @@ textualCite = try $ do
case mbrest of
Just (rest, raw) ->
return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw) . (first:))
- <$> rest
+ rest
Nothing ->
(do (cs, raw) <- withRaw $ bareloc first
- return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw)) <$> cs)
- <|> return (do st <- askF
- return $ case M.lookup key (stateExamples st) of
- Just n -> B.str (show n)
- _ -> B.cite [first] $ B.str $ '@':key)
+ return $ B.cite cs (B.text $ '@':key ++ " " ++ raw))
+ <|> do st <- ask
+ return $ case M.lookup key (stateExamples st) of
+ Just n -> B.str (show n)
+ _ -> B.cite [first] $ B.str $ '@':key
-bareloc :: Citation -> MarkdownParser (F [Citation])
+bareloc :: Citation -> MarkdownParser [Citation]
bareloc c = try $ do
spnl
char '['
suff <- suffix
- rest <- option (return []) $ try $ char ';' >> citeList
+ rest <- option [] $ try $ char ';' >> citeList
spnl
char ']'
- return $ do
- suff' <- suff
- rest' <- rest
- return $ c{ citationSuffix = B.toList suff' } : rest'
+ return $ c{ citationSuffix = B.toList suff } : rest
-normalCite :: MarkdownParser (F [Citation])
+normalCite :: MarkdownParser [Citation]
normalCite = try $ do
char '['
spnl
@@ -1895,60 +1898,57 @@ normalCite = try $ do
char ']'
return citations
-suffix :: MarkdownParser (F Inlines)
+suffix :: MarkdownParser Inlines
suffix = try $ do
hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
spnl
- rest <- trimInlinesF . mconcat <$> many (notFollowedBy (oneOf ";]") >> inline)
+ rest <- trimInlines . mconcat <$> many (notFollowedBy (oneOf ";]") >> inline)
return $ if hasSpace
- then (B.space <>) <$> rest
+ then B.space <> rest
else rest
-prefix :: MarkdownParser (F Inlines)
-prefix = trimInlinesF . mconcat <$>
+prefix :: MarkdownParser Inlines
+prefix = trimInlines . mconcat <$>
manyTill inline (char ']' <|> liftM (const ']') (lookAhead citeKey))
-citeList :: MarkdownParser (F [Citation])
-citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl)
+citeList :: MarkdownParser [Citation]
+citeList = sepBy1 citation (try $ char ';' >> spnl)
-citation :: MarkdownParser (F Citation)
+citation :: MarkdownParser Citation
citation = try $ do
pref <- prefix
(suppress_author, key) <- citeKey
suff <- suffix
- return $ do
- x <- pref
- y <- suff
- return $ Citation{ citationId = key
- , citationPrefix = B.toList x
- , citationSuffix = B.toList y
- , citationMode = if suppress_author
- then SuppressAuthor
- else NormalCitation
- , citationNoteNum = 0
- , citationHash = 0
- }
-
-smart :: MarkdownParser (F Inlines)
+ return Citation{ citationId = key
+ , citationPrefix = B.toList pref
+ , citationSuffix = B.toList suff
+ , citationMode = if suppress_author
+ then SuppressAuthor
+ else NormalCitation
+ , citationNoteNum = 0
+ , citationHash = 0
+ }
+
+smart :: MarkdownParser Inlines
smart = do
getOption readerSmart >>= guard
doubleQuoted <|> singleQuoted <|>
- choice (map (return <$>) [apostrophe, dash, ellipses])
+ choice [apostrophe, dash, ellipses]
-singleQuoted :: MarkdownParser (F Inlines)
+singleQuoted :: MarkdownParser Inlines
singleQuoted = try $ do
singleQuoteStart
withQuoteContext InSingleQuote $
- fmap B.singleQuoted . trimInlinesF . mconcat <$>
+ B.singleQuoted . trimInlines . mconcat <$>
many1Till inline singleQuoteEnd
-- doubleQuoted will handle regular double-quoted sections, as well
-- as dialogues with an open double-quote without a close double-quote
-- in the same paragraph.
-doubleQuoted :: MarkdownParser (F Inlines)
+doubleQuoted :: MarkdownParser Inlines
doubleQuoted = try $ do
doubleQuoteStart
contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
- (withQuoteContext InDoubleQuote $ doubleQuoteEnd >> return
- (fmap B.doubleQuoted . trimInlinesF $ contents))
- <|> (return $ return (B.str "\8220") <> contents)
+ (withQuoteContext InDoubleQuote doubleQuoteEnd >> return
+ (B.doubleQuoted . trimInlines $ contents))
+ <|> return (B.str "\8220" <> contents)
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index e43b8a86c..939d10fb2 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -58,21 +58,21 @@ import Data.Maybe (fromMaybe)
import Text.Printf (printf)
import Debug.Trace (trace)
+import Text.Pandoc.Error
+
-- | Read mediawiki from an input string and return a Pandoc document.
readMediaWiki :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Pandoc
+ -> Either PandocError Pandoc
readMediaWiki opts s =
- case runParser parseMediaWiki MWState{ mwOptions = opts
+ readWith parseMediaWiki MWState{ mwOptions = opts
, mwMaxNestingLevel = 4
, mwNextLinkNumber = 1
, mwCategoryLinks = []
, mwHeaderMap = M.empty
, mwIdentifierList = []
}
- "source" (s ++ "\n") of
- Left err' -> error $ "\nError:\n" ++ show err'
- Right result -> result
+ (s ++ "\n")
data MWState = MWState { mwOptions :: ReaderOptions
, mwMaxNestingLevel :: Int
@@ -593,11 +593,17 @@ imageOption =
<|> try (many1 (oneOf "x0123456789") <* string "px")
<|> try (oneOfStrings ["link=","alt=","page=","class="] <* many (noneOf "|]"))
+collapseUnderscores :: String -> String
+collapseUnderscores [] = []
+collapseUnderscores ('_':'_':xs) = collapseUnderscores ('_':xs)
+collapseUnderscores (x:xs) = x : collapseUnderscores xs
+
+addUnderscores :: String -> String
+addUnderscores = collapseUnderscores . intercalate "_" . words
+
internalLink :: MWParser Inlines
internalLink = try $ do
sym "[["
- let addUnderscores x = let (pref,suff) = break (=='#') x
- in pref ++ intercalate "_" (words suff)
pagename <- unwords . words <$> many (noneOf "|]")
label <- option (B.text pagename) $ char '|' *>
( (mconcat <$> many1 (notFollowedBy (char ']') *> inline))
diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs
index f4dfa62c1..fc6b3362a 100644
--- a/src/Text/Pandoc/Readers/Native.hs
+++ b/src/Text/Pandoc/Readers/Native.hs
@@ -3,7 +3,7 @@ Copyright (C) 2011-2014 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
+the Free Software Foundation; Either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
@@ -33,6 +33,9 @@ module Text.Pandoc.Readers.Native ( readNative ) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared (safeRead)
+import Text.Pandoc.Error
+import Control.Applicative
+
-- | Read native formatted text and return a Pandoc document.
-- The input may be a full pandoc document, a block list, a block,
-- an inline list, or an inline. Thus, for example,
@@ -44,33 +47,18 @@ import Text.Pandoc.Shared (safeRead)
-- > Pandoc nullMeta [Plain [Str "hi"]]
--
readNative :: String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Pandoc
-readNative s =
- case safeRead s of
- Just d -> d
- Nothing -> Pandoc nullMeta $ readBlocks s
+ -> Either PandocError Pandoc
+readNative s = maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s)
-readBlocks :: String -> [Block]
-readBlocks s =
- case safeRead s of
- Just d -> d
- Nothing -> [readBlock s]
+readBlocks :: String -> Either PandocError [Block]
+readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s)
-readBlock :: String -> Block
-readBlock s =
- case safeRead s of
- Just d -> d
- Nothing -> Plain $ readInlines s
+readBlock :: String -> Either PandocError Block
+readBlock s = maybe (Plain <$> readInlines s) Right (safeRead s)
-readInlines :: String -> [Inline]
-readInlines s =
- case safeRead s of
- Just d -> d
- Nothing -> [readInline s]
+readInlines :: String -> Either PandocError [Inline]
+readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead s)
-readInline :: String -> Inline
-readInline s =
- case safeRead s of
- Just d -> d
- Nothing -> error "Cannot parse document"
+readInline :: String -> Either PandocError Inline
+readInline s = maybe (Left . ParseFailure $ "Could not read: " ++ s) Right (safeRead s)
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index 35d01e877..19ddba36b 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE FlexibleContexts #-}
module Text.Pandoc.Readers.OPML ( readOPML ) where
import Data.Char (toUpper)
import Text.Pandoc.Options
@@ -11,8 +12,11 @@ import Data.Generics
import Data.Monoid
import Control.Monad.State
import Control.Applicative ((<$>), (<$))
+import Data.Default
+import Text.Pandoc.Compat.Except
+import Text.Pandoc.Error
-type OPML = State OPMLState
+type OPML = ExceptT PandocError (State OPMLState)
data OPMLState = OPMLState{
opmlSectionLevel :: Int
@@ -21,17 +25,19 @@ data OPMLState = OPMLState{
, opmlDocDate :: Inlines
} deriving Show
-readOPML :: ReaderOptions -> String -> Pandoc
+instance Default OPMLState where
+ def = OPMLState{ opmlSectionLevel = 0
+ , opmlDocTitle = mempty
+ , opmlDocAuthors = []
+ , opmlDocDate = mempty
+ }
+
+readOPML :: ReaderOptions -> String -> Either PandocError Pandoc
readOPML _ inp = setTitle (opmlDocTitle st')
- $ setAuthors (opmlDocAuthors st')
- $ setDate (opmlDocDate st')
- $ doc $ mconcat bs
- where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp)
- OPMLState{ opmlSectionLevel = 0
- , opmlDocTitle = mempty
- , opmlDocAuthors = []
- , opmlDocDate = mempty
- }
+ . setAuthors (opmlDocAuthors st')
+ . setDate (opmlDocDate st')
+ . doc . mconcat <$> bs
+ where (bs, st') = flip runState def . runExceptT $ (mapM parseBlock $ normalizeTree $ parseXML inp)
-- normalize input, consolidating adjacent Text and CRef elements
normalizeTree :: [Content] -> [Content]
@@ -58,14 +64,16 @@ attrValue attr elt =
Just z -> z
Nothing -> ""
-asHtml :: String -> Inlines
-asHtml s = case readHtml def s of
- Pandoc _ [Plain ils] -> fromList ils
- _ -> mempty
+exceptT :: Either PandocError a -> OPML a
+exceptT = either throwError return
+
+asHtml :: String -> OPML Inlines
+asHtml s = (\(Pandoc _ bs) -> case bs of
+ [Plain ils] -> fromList ils
+ _ -> mempty) <$> exceptT (readHtml def s)
-asMarkdown :: String -> Blocks
-asMarkdown s = fromList bs
- where Pandoc _ bs = readMarkdown def s
+asMarkdown :: String -> OPML Blocks
+asMarkdown s = (\(Pandoc _ bs) -> fromList bs) <$> exceptT (readMarkdown def s)
getBlocks :: Element -> OPML Blocks
getBlocks e = mconcat <$> (mapM parseBlock $ elContent e)
@@ -82,8 +90,8 @@ parseBlock (Elem e) =
"outline" -> gets opmlSectionLevel >>= sect . (+1)
"?xml" -> return mempty
_ -> getBlocks e
- where sect n = do let headerText = asHtml $ attrValue "text" e
- let noteBlocks = asMarkdown $ attrValue "_note" e
+ where sect n = do headerText <- asHtml $ attrValue "text" e
+ noteBlocks <- asMarkdown $ attrValue "_note" e
modify $ \st -> st{ opmlSectionLevel = n }
bs <- getBlocks e
modify $ \st -> st{ opmlSectionLevel = n - 1 }
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index f16aed48d..fc63cc11e 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -1,6 +1,9 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
{-
Copyright (C) 2014 Albert Krewinkel <tarleb@moltkeplatz.de>
@@ -36,8 +39,7 @@ import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..), (<>)
import Text.Pandoc.Definition
import Text.Pandoc.Options
import qualified Text.Pandoc.Parsing as P
-import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
- , newline, orderedListMarker
+import Text.Pandoc.Parsing hiding ( newline, orderedListMarker
, parseFromString, blanklines
)
import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
@@ -45,34 +47,47 @@ import Text.Pandoc.Shared (compactify', compactify'DL)
import Text.TeXMath (readTeX, writePandoc, DisplayType(..))
import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
-import Control.Applicative ( Applicative, pure
+import Control.Applicative ( pure
, (<$>), (<$), (<*>), (<*), (*>) )
import Control.Arrow (first)
-import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when)
-import Control.Monad.Reader (Reader, runReader, ask, asks)
+import Control.Monad (guard, mplus, mzero, when)
+import Control.Monad.Reader (Reader, runReader, asks, local)
import Data.Char (isAlphaNum, toLower)
import Data.Default
-import Data.List (intersperse, isPrefixOf, isSuffixOf)
+import Data.List (intersperse, isPrefixOf, isSuffixOf, foldl')
import qualified Data.Map as M
import Data.Maybe (fromMaybe, isJust)
-import Data.Monoid (Monoid, mconcat, mempty, mappend)
+import Data.Monoid (mconcat, mempty, mappend)
import Network.HTTP (urlEncode)
+import Text.Pandoc.Error
+
-- | Parse org-mode string and return a Pandoc document.
readOrg :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Pandoc
-readOrg opts s = readWith parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
+ -> Either PandocError Pandoc
+readOrg opts s = runOrg opts s parseOrg
+
+data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext
+ , finalState :: OrgParserState }
+
+type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
-type OrgParser = Parser [Char] OrgParserState
+runOrg :: ReaderOptions -> String -> OrgParser a -> Either PandocError a
+runOrg opts inp p = fst <$> res
+ where
+ imd = readWithM (returnState p) def{ orgStateOptions = opts } (inp ++ "\n\n")
+ res = runReader imd def { finalState = s }
+ s :: OrgParserState
+ s = either def snd res
parseOrg :: OrgParser Pandoc
parseOrg = do
blocks' <- parseBlocks
st <- getState
- let meta = runF (orgStateMeta' st) st
+ let meta = orgStateMeta st
let removeUnwantedBlocks = dropCommentTrees . filter (/= Null)
- return $ Pandoc meta $ removeUnwantedBlocks (B.toList $ runF blocks' st)
+ return $ Pandoc meta $ removeUnwantedBlocks (B.toList $ blocks')
-- | Drop COMMENT headers and the document tree below those headers.
dropCommentTrees :: [Block] -> [Block]
@@ -102,7 +117,7 @@ isHeaderLevelLowerEq n blk =
-- Parser State for Org
--
-type OrgNoteRecord = (String, F Blocks)
+type OrgNoteRecord = (String, Blocks)
type OrgNoteTable = [OrgNoteRecord]
type OrgBlockAttributes = M.Map String String
@@ -121,10 +136,12 @@ data OrgParserState = OrgParserState
, orgStateLastStrPos :: Maybe SourcePos
, orgStateLinkFormatters :: OrgLinkFormatters
, orgStateMeta :: Meta
- , orgStateMeta' :: F Meta
, orgStateNotes' :: OrgNoteTable
}
+instance Default OrgParserLocal where
+ def = OrgParserLocal NoQuote def
+
instance HasReaderOptions OrgParserState where
extractReaderOptions = orgStateOptions
@@ -138,6 +155,10 @@ instance HasLastStrPosition OrgParserState where
getLastStrPos = orgStateLastStrPos
setLastStrPos pos st = st{ orgStateLastStrPos = Just pos }
+instance HasQuoteContext st (Reader OrgParserLocal) where
+ getQuoteContext = asks orgLocalQuoteContext
+ withQuoteContext q = local (\s -> s{orgLocalQuoteContext = q})
+
instance Default OrgParserState where
def = defaultOrgParserState
@@ -153,13 +174,13 @@ defaultOrgParserState = OrgParserState
, orgStateLastStrPos = Nothing
, orgStateLinkFormatters = M.empty
, orgStateMeta = nullMeta
- , orgStateMeta' = return nullMeta
, orgStateNotes' = []
}
recordAnchorId :: String -> OrgParser ()
recordAnchorId i = updateState $ \s ->
- s{ orgStateAnchorIds = i : (orgStateAnchorIds s) }
+ let as = orgStateAnchorIds s in
+ s{ orgStateAnchorIds = i : as }
addBlockAttribute :: String -> String -> OrgParser ()
addBlockAttribute key val = updateState $ \s ->
@@ -238,30 +259,6 @@ parseFromString parser str' = do
-- Adaptions and specializations of parsing utilities
--
-newtype F a = F { unF :: Reader OrgParserState a
- } deriving (Monad, Applicative, Functor)
-
-runF :: F a -> OrgParserState -> a
-runF = runReader . unF
-
-askF :: F OrgParserState
-askF = F ask
-
-asksF :: (OrgParserState -> a) -> F a
-asksF f = F $ asks f
-
-instance Monoid a => Monoid (F a) where
- mempty = return mempty
- mappend = liftM2 mappend
- mconcat = fmap mconcat . sequence
-
-trimInlinesF :: F Inlines -> F Inlines
-trimInlinesF = liftM trimInlines
-
-returnF :: a -> OrgParser (F a)
-returnF = return . return
-
-
-- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
newline :: OrgParser Char
newline =
@@ -280,10 +277,10 @@ blanklines =
-- parsing blocks
--
-parseBlocks :: OrgParser (F Blocks)
+parseBlocks :: OrgParser Blocks
parseBlocks = mconcat <$> manyTill block eof
-block :: OrgParser (F Blocks)
+block :: OrgParser Blocks
block = choice [ mempty <$ blanklines
, optionalAttributes $ choice
[ orgBlock
@@ -294,14 +291,14 @@ block = choice [ mempty <$ blanklines
, drawer
, specialLine
, header
- , return <$> hline
+ , hline
, list
, latexFragment
, noteBlock
, paraOrPlain
] <?> "block"
-optionalAttributes :: OrgParser (F Blocks) -> OrgParser (F Blocks)
+optionalAttributes :: OrgParser Blocks -> OrgParser Blocks
optionalAttributes parser = try $
resetBlockAttributes *> parseBlockAttributes *> parser
@@ -321,7 +318,7 @@ parseAndAddAttribute key value = do
let key' = map toLower key
() <$ addBlockAttribute key' value
-lookupInlinesAttr :: String -> OrgParser (Maybe (F Inlines))
+lookupInlinesAttr :: String -> OrgParser (Maybe Inlines)
lookupInlinesAttr attr = try $ do
val <- lookupBlockAttribute attr
maybe (return Nothing)
@@ -335,20 +332,20 @@ lookupInlinesAttr attr = try $ do
type BlockProperties = (Int, String) -- (Indentation, Block-Type)
-orgBlock :: OrgParser (F Blocks)
+orgBlock :: OrgParser Blocks
orgBlock = try $ do
blockProp@(_, blkType) <- blockHeaderStart
($ blockProp) $
case blkType of
"comment" -> withRaw' (const mempty)
- "html" -> withRaw' (return . (B.rawBlock blkType))
- "latex" -> withRaw' (return . (B.rawBlock blkType))
- "ascii" -> withRaw' (return . (B.rawBlock blkType))
- "example" -> withRaw' (return . exampleCode)
- "quote" -> withParsed (fmap B.blockQuote)
+ "html" -> withRaw' (B.rawBlock blkType)
+ "latex" -> withRaw' (B.rawBlock blkType)
+ "ascii" -> withRaw' (B.rawBlock blkType)
+ "example" -> withRaw' exampleCode
+ "quote" -> withParsed B.blockQuote
"verse" -> verseBlock
"src" -> codeBlock
- _ -> withParsed (fmap $ divWithClass blkType)
+ _ -> withParsed (divWithClass blkType)
blockHeaderStart :: OrgParser (Int, String)
blockHeaderStart = try $ (,) <$> indent <*> blockType
@@ -356,10 +353,10 @@ blockHeaderStart = try $ (,) <$> indent <*> blockType
indent = length <$> many spaceChar
blockType = map toLower <$> (stringAnyCase "#+begin_" *> orgArgWord)
-withRaw' :: (String -> F Blocks) -> BlockProperties -> OrgParser (F Blocks)
+withRaw' :: (String -> Blocks) -> BlockProperties -> OrgParser Blocks
withRaw' f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp))
-withParsed :: (F Blocks -> F Blocks) -> BlockProperties -> OrgParser (F Blocks)
+withParsed :: (Blocks -> Blocks) -> BlockProperties -> OrgParser Blocks
withParsed f blockProp = (ignHeaders *> (f <$> parsedBlockContent blockProp))
ignHeaders :: OrgParser ()
@@ -368,11 +365,11 @@ ignHeaders = (() <$ newline) <|> (() <$ anyLine)
divWithClass :: String -> Blocks -> Blocks
divWithClass cls = B.divWith ("", [cls], [])
-verseBlock :: BlockProperties -> OrgParser (F Blocks)
+verseBlock :: BlockProperties -> OrgParser Blocks
verseBlock blkProp = try $ do
ignHeaders
content <- rawBlockContent blkProp
- fmap B.para . mconcat . intersperse (pure B.linebreak)
+ B.para . mconcat . intersperse B.linebreak
<$> mapM (parseFromString parseInlines) (lines content)
exportsCode :: [(String, String)] -> Bool
@@ -389,7 +386,7 @@ followingResultsBlock =
*> blankline
*> (unlines <$> many1 exampleLine))
-codeBlock :: BlockProperties -> OrgParser (F Blocks)
+codeBlock :: BlockProperties -> OrgParser Blocks
codeBlock blkProp = do
skipSpaces
(classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders)
@@ -399,17 +396,15 @@ codeBlock blkProp = do
let includeCode = exportsCode kv
let includeResults = exportsResults kv
let codeBlck = B.codeBlockWith ( id', classes, kv ) content
- labelledBlck <- maybe (pure codeBlck)
- (labelDiv codeBlck)
+ labelledBlck <- maybe codeBlck (labelDiv codeBlck)
<$> lookupInlinesAttr "caption"
- let resultBlck = pure $ maybe mempty (exampleCode) resultsContent
+ let resultBlck = maybe mempty exampleCode resultsContent
return $ (if includeCode then labelledBlck else mempty)
<> (if includeResults then resultBlck else mempty)
where
labelDiv blk value =
- B.divWith nullAttr <$> (mappend <$> labelledBlock value
- <*> pure blk)
- labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
+ B.divWith nullAttr (labelledBlock value <> blk)
+ labelledBlock = B.plain . B.spanWith ("", ["label"], [])
rawBlockContent :: BlockProperties -> OrgParser String
rawBlockContent (indent, blockType) = try $
@@ -418,7 +413,7 @@ rawBlockContent (indent, blockType) = try $
indentedLine = try $ ("" <$ blankline) <|> (indentWith indent *> anyLine)
blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType)
-parsedBlockContent :: BlockProperties -> OrgParser (F Blocks)
+parsedBlockContent :: BlockProperties -> OrgParser Blocks
parsedBlockContent blkProps = try $ do
raw <- rawBlockContent blkProps
parseFromString parseBlocks (raw ++ "\n")
@@ -509,9 +504,9 @@ commaEscaped (',':cs@('*':_)) = cs
commaEscaped (',':cs@('#':'+':_)) = cs
commaEscaped cs = cs
-example :: OrgParser (F Blocks)
+example :: OrgParser Blocks
example = try $ do
- return . return . exampleCode =<< unlines <$> many1 exampleLine
+ return . exampleCode =<< unlines <$> many1 exampleLine
exampleCode :: String -> Blocks
exampleCode = B.codeBlockWith ("", ["example"], [])
@@ -520,7 +515,7 @@ exampleLine :: OrgParser String
exampleLine = try $ skipSpaces *> string ": " *> anyLine
-- Drawers for properties or a logbook
-drawer :: OrgParser (F Blocks)
+drawer :: OrgParser Blocks
drawer = try $ do
drawerStart
manyTill drawerLine (try drawerEnd)
@@ -546,14 +541,12 @@ drawerEnd = try $
--
-- Figures (Image on a line by itself, preceded by name and/or caption)
-figure :: OrgParser (F Blocks)
+figure :: OrgParser Blocks
figure = try $ do
(cap, nam) <- nameAndCaption
src <- skipSpaces *> selfTarget <* skipSpaces <* P.newline
guard (isImageFilename src)
- return $ do
- cap' <- cap
- return $ B.para $ B.image src nam cap'
+ return $ B.para $ B.image src nam cap
where
nameAndCaption =
do
@@ -569,8 +562,8 @@ figure = try $ do
--
-- Comments, Options and Metadata
-specialLine :: OrgParser (F Blocks)
-specialLine = fmap return . try $ metaLine <|> commentLine
+specialLine :: OrgParser Blocks
+specialLine = try $ metaLine <|> commentLine
metaLine :: OrgParser Blocks
metaLine = try $ mempty
@@ -590,14 +583,14 @@ commentLineStart = try $ mappend <$> many spaceChar <*> string "# "
declarationLine :: OrgParser ()
declarationLine = try $ do
key <- metaKey
- inlinesF <- metaInlines
+ inlines <- metaInlines
updateState $ \st ->
- let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta
- in st { orgStateMeta' = orgStateMeta' st <> meta' }
+ let meta' = B.setMeta key inlines nullMeta
+ in st { orgStateMeta = orgStateMeta st <> meta' }
return ()
-metaInlines :: OrgParser (F MetaValue)
-metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline
+metaInlines :: OrgParser MetaValue
+metaInlines = (MetaInlines . B.toList) <$> inlinesTillNewline
metaKey :: OrgParser String
metaKey = map toLower <$> many1 (noneOf ": \n\r")
@@ -638,11 +631,11 @@ parseFormat = try $ do
--
-- | Headers
-header :: OrgParser (F Blocks)
+header :: OrgParser Blocks
header = try $ do
level <- headerStart
title <- inlinesTillNewline
- return $ B.header level <$> title
+ return $ B.header level title
headerStart :: OrgParser Int
headerStart = try $
@@ -666,7 +659,7 @@ hline = try $ do
-- Tables
--
-data OrgTableRow = OrgContentRow (F [Blocks])
+data OrgTableRow = OrgContentRow [Blocks]
| OrgAlignRow [Alignment]
| OrgHlineRow
@@ -677,13 +670,13 @@ data OrgTable = OrgTable
, orgTableRows :: [[Blocks]]
}
-table :: OrgParser (F Blocks)
+table :: OrgParser Blocks
table = try $ do
lookAhead tableStart
do
rows <- tableRows
- cptn <- fromMaybe (pure "") <$> lookupInlinesAttr "caption"
- return $ (<$> cptn) . orgToPandocTable . normalizeTable =<< rowsToTable rows
+ (cptn :: Inlines) <- fromMaybe "" <$> lookupInlinesAttr "caption"
+ return $ ($ cptn) . orgToPandocTable . normalizeTable . rowsToTable $ rows
orgToPandocTable :: OrgTable
-> Inlines
@@ -699,11 +692,11 @@ tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow)
tableContentRow :: OrgParser OrgTableRow
tableContentRow = try $
- OrgContentRow . sequence <$> (tableStart *> manyTill tableContentCell newline)
+ OrgContentRow <$> (tableStart *> manyTill tableContentCell newline)
-tableContentCell :: OrgParser (F Blocks)
+tableContentCell :: OrgParser Blocks
tableContentCell = try $
- fmap B.plain . trimInlinesF . mconcat <$> many1Till inline endOfCell
+ B.plain . trimInlines . mconcat <$> many1Till inline endOfCell
endOfCell :: OrgParser Char
endOfCell = try $ char '|' <|> lookAhead newline
@@ -735,8 +728,8 @@ tableHline = try $
OrgHlineRow <$ (tableStart *> char '-' *> anyLine)
rowsToTable :: [OrgTableRow]
- -> F OrgTable
-rowsToTable = foldM (flip rowToContent) zeroTable
+ -> OrgTable
+rowsToTable = foldl' (flip rowToContent) zeroTable
where zeroTable = OrgTable 0 mempty mempty mempty
normalizeTable :: OrgTable
@@ -755,45 +748,43 @@ normalizeTable (OrgTable cols aligns heads lns) =
-- line as a header. All other horizontal lines are discarded.
rowToContent :: OrgTableRow
-> OrgTable
- -> F OrgTable
+ -> OrgTable
rowToContent OrgHlineRow t = maybeBodyToHeader t
-rowToContent (OrgAlignRow as) t = setLongestRow as =<< setAligns as t
-rowToContent (OrgContentRow rf) t = do
- rs <- rf
- setLongestRow rs =<< appendToBody rs t
+rowToContent (OrgAlignRow as) t = setLongestRow as . setAligns as $ t
+rowToContent (OrgContentRow rf) t = setLongestRow rf . appendToBody rf $ t
setLongestRow :: [a]
-> OrgTable
- -> F OrgTable
+ -> OrgTable
setLongestRow rs t =
- return t{ orgTableColumns = max (length rs) (orgTableColumns t) }
+ t{ orgTableColumns = max (length rs) (orgTableColumns t) }
maybeBodyToHeader :: OrgTable
- -> F OrgTable
+ -> OrgTable
maybeBodyToHeader t = case t of
OrgTable{ orgTableHeader = [], orgTableRows = b:[] } ->
- return t{ orgTableHeader = b , orgTableRows = [] }
- _ -> return t
+ t{ orgTableHeader = b , orgTableRows = [] }
+ _ -> t
appendToBody :: [Blocks]
-> OrgTable
- -> F OrgTable
-appendToBody r t = return t{ orgTableRows = orgTableRows t ++ [r] }
+ -> OrgTable
+appendToBody r t = t{ orgTableRows = orgTableRows t ++ [r] }
setAligns :: [Alignment]
-> OrgTable
- -> F OrgTable
-setAligns aligns t = return $ t{ orgTableAlignments = aligns }
+ -> OrgTable
+setAligns aligns t = t{ orgTableAlignments = aligns }
--
-- LaTeX fragments
--
-latexFragment :: OrgParser (F Blocks)
+latexFragment :: OrgParser Blocks
latexFragment = try $ do
envName <- latexEnvStart
content <- mconcat <$> manyTill anyLineNewline (latexEnd envName)
- return . return $ B.rawBlock "latex" (content `inLatexEnv` envName)
+ return $ B.rawBlock "latex" (content `inLatexEnv` envName)
where
c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n"
, c
@@ -823,7 +814,7 @@ latexEnvName = try $ do
--
-- Footnote defintions
--
-noteBlock :: OrgParser (F Blocks)
+noteBlock :: OrgParser Blocks
noteBlock = try $ do
ref <- noteMarker <* skipSpaces
content <- mconcat <$> blocksTillHeaderOrNote
@@ -835,37 +826,37 @@ noteBlock = try $ do
<|> () <$ lookAhead headerStart)
-- Paragraphs or Plain text
-paraOrPlain :: OrgParser (F Blocks)
+paraOrPlain :: OrgParser Blocks
paraOrPlain = try $ do
ils <- parseInlines
nl <- option False (newline >> return True)
try (guard nl >> notFollowedBy (orderedListStart <|> bulletListStart) >>
- return (B.para <$> ils))
- <|> (return (B.plain <$> ils))
+ (return $ B.para ils))
+ <|> (return $ B.plain ils)
-inlinesTillNewline :: OrgParser (F Inlines)
-inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline
+inlinesTillNewline :: OrgParser Inlines
+inlinesTillNewline = trimInlines . mconcat <$> manyTill inline newline
--
-- list blocks
--
-list :: OrgParser (F Blocks)
+list :: OrgParser Blocks
list = choice [ definitionList, bulletList, orderedList ] <?> "list"
-definitionList :: OrgParser (F Blocks)
+definitionList :: OrgParser Blocks
definitionList = try $ do n <- lookAhead (bulletListStart' Nothing)
- fmap B.definitionList . fmap compactify'DL . sequence
+ B.definitionList . compactify'DL
<$> many1 (definitionListItem $ bulletListStart' (Just n))
-bulletList :: OrgParser (F Blocks)
+bulletList :: OrgParser Blocks
bulletList = try $ do n <- lookAhead (bulletListStart' Nothing)
- fmap B.bulletList . fmap compactify' . sequence
+ B.bulletList . compactify'
<$> many1 (listItem (bulletListStart' $ Just n))
-orderedList :: OrgParser (F Blocks)
-orderedList = fmap B.orderedList . fmap compactify' . sequence
+orderedList :: OrgParser Blocks
+orderedList = B.orderedList . compactify'
<$> many1 (listItem orderedListStart)
genericListStart :: OrgParser String
@@ -902,7 +893,7 @@ orderedListStart = genericListStart orderedListMarker
where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)")
definitionListItem :: OrgParser Int
- -> OrgParser (F (Inlines, [Blocks]))
+ -> OrgParser (Inlines, [Blocks])
definitionListItem parseMarkerGetLength = try $ do
markerLength <- parseMarkerGetLength
term <- manyTill (noneOf "\n\r") (try $ string "::")
@@ -911,12 +902,12 @@ definitionListItem parseMarkerGetLength = try $ do
cont <- concat <$> many (listContinuation markerLength)
term' <- parseFromString parseInlines term
contents' <- parseFromString parseBlocks $ line1 ++ blank ++ cont
- return $ (,) <$> term' <*> fmap (:[]) contents'
+ return (term', [contents'])
-- parse raw text for one list item, excluding start marker and continuations
listItem :: OrgParser Int
- -> OrgParser (F Blocks)
+ -> OrgParser Blocks
listItem start = try $ do
markerLength <- try start
firstLine <- anyLineNewline
@@ -942,7 +933,7 @@ anyLineNewline = (++ "\n") <$> anyLine
-- inline
--
-inline :: OrgParser (F Inlines)
+inline :: OrgParser Inlines
inline =
choice [ whitespace
, linebreak
@@ -964,35 +955,36 @@ inline =
, subscript
, superscript
, inlineLaTeX
+ , smart
, symbol
] <* (guard =<< newlinesCountWithinLimits)
<?> "inline"
-parseInlines :: OrgParser (F Inlines)
-parseInlines = trimInlinesF . mconcat <$> many1 inline
+parseInlines :: OrgParser Inlines
+parseInlines = trimInlines . mconcat <$> many1 inline
-- treat these as potentially non-text when parsing inline:
specialChars :: [Char]
specialChars = "\"$'()*+-,./:<=>[\\]^_{|}~"
-whitespace :: OrgParser (F Inlines)
-whitespace = pure B.space <$ skipMany1 spaceChar
+whitespace :: OrgParser Inlines
+whitespace = B.space <$ skipMany1 spaceChar
<* updateLastPreCharPos
<* updateLastForbiddenCharPos
<?> "whitespace"
-linebreak :: OrgParser (F Inlines)
-linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline
+linebreak :: OrgParser Inlines
+linebreak = try $ B.linebreak <$ string "\\\\" <* skipSpaces <* newline
-str :: OrgParser (F Inlines)
-str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
+str :: OrgParser Inlines
+str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
<* updateLastStrPos
-- | An endline character that can be treated as a space, not a structural
-- break. This should reflect the values of the Emacs variable
-- @org-element-pagaraph-separate@.
-endline :: OrgParser (F Inlines)
+endline :: OrgParser Inlines
endline = try $ do
newline
notFollowedBy blankline
@@ -1010,77 +1002,72 @@ endline = try $ do
decEmphasisNewlinesCount
guard =<< newlinesCountWithinLimits
updateLastPreCharPos
- return . return $ B.space
+ return $ B.space
-cite :: OrgParser (F Inlines)
+cite :: OrgParser Inlines
cite = try $ do
guardEnabled Ext_citations
(cs, raw) <- withRaw normalCite
- return $ (flip B.cite (B.text raw)) <$> cs
+ return $ flip B.cite (B.text raw) cs
-normalCite :: OrgParser (F [Citation])
+normalCite :: OrgParser [Citation]
normalCite = try $ char '['
*> skipSpaces
*> citeList
<* skipSpaces
<* char ']'
-citeList :: OrgParser (F [Citation])
-citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces)
+citeList :: OrgParser [Citation]
+citeList = sepBy1 citation (try $ char ';' *> skipSpaces)
-citation :: OrgParser (F Citation)
+citation :: OrgParser Citation
citation = try $ do
pref <- prefix
(suppress_author, key) <- citeKey
suff <- suffix
- return $ do
- x <- pref
- y <- suff
- return $ Citation{ citationId = key
- , citationPrefix = B.toList x
- , citationSuffix = B.toList y
- , citationMode = if suppress_author
- then SuppressAuthor
- else NormalCitation
- , citationNoteNum = 0
- , citationHash = 0
- }
+ return $ Citation{ citationId = key
+ , citationPrefix = B.toList pref
+ , citationSuffix = B.toList suff
+ , citationMode = if suppress_author
+ then SuppressAuthor
+ else NormalCitation
+ , citationNoteNum = 0
+ , citationHash = 0
+ }
where
- prefix = trimInlinesF . mconcat <$>
+ prefix = trimInlines . mconcat <$>
manyTill inline (char ']' <|> (']' <$ lookAhead citeKey))
suffix = try $ do
hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
skipSpaces
- rest <- trimInlinesF . mconcat <$>
+ rest <- trimInlines . mconcat <$>
many (notFollowedBy (oneOf ";]") *> inline)
- return $ if hasSpace
- then (B.space <>) <$> rest
- else rest
+ return $
+ if hasSpace
+ then B.space <> rest
+ else rest
-footnote :: OrgParser (F Inlines)
+footnote :: OrgParser Inlines
footnote = try $ inlineNote <|> referencedNote
-inlineNote :: OrgParser (F Inlines)
+inlineNote :: OrgParser Inlines
inlineNote = try $ do
string "[fn:"
ref <- many alphaNum
char ':'
- note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']')
+ note <- B.para . trimInlines . mconcat <$> many1Till inline (char ']')
when (not $ null ref) $
addToNotesTable ("fn:" ++ ref, note)
- return $ B.note <$> note
+ return $ B.note note
-referencedNote :: OrgParser (F Inlines)
+referencedNote :: OrgParser Inlines
referencedNote = try $ do
ref <- noteMarker
- return $ do
- notes <- asksF orgStateNotes'
+ notes <- asks (orgStateNotes' . finalState)
+ return $
case lookup ref notes of
- Nothing -> return $ B.str $ "[" ++ ref ++ "]"
- Just contents -> do
- st <- askF
- let contents' = runF contents st{ orgStateNotes' = [] }
- return $ B.note contents'
+ Just contents -> B.note contents
+ Nothing -> B.str $ "[" ++ ref ++ "]"
noteMarker :: OrgParser String
noteMarker = try $ do
@@ -1090,37 +1077,37 @@ noteMarker = try $ do
<*> many1Till (noneOf "\n\r\t ") (char ']')
]
-linkOrImage :: OrgParser (F Inlines)
+linkOrImage :: OrgParser Inlines
linkOrImage = explicitOrImageLink
<|> selflinkOrImage
<|> angleLink
<|> plainLink
<?> "link or image"
-explicitOrImageLink :: OrgParser (F Inlines)
+explicitOrImageLink :: OrgParser Inlines
explicitOrImageLink = try $ do
char '['
- srcF <- applyCustomLinkFormat =<< possiblyEmptyLinkTarget
+ src <- applyCustomLinkFormat =<< possiblyEmptyLinkTarget
title <- enclosedRaw (char '[') (char ']')
title' <- parseFromString (mconcat <$> many inline) title
char ']'
- return $ do
- src <- srcF
- if isImageFilename src && isImageFilename title
- then pure $ B.link src "" $ B.image title mempty mempty
- else linkToInlinesF src =<< title'
+ alt <- internalLink src title'
+ return $
+ (if isImageFilename title
+ then B.link src "" $ B.image title mempty mempty
+ else fromMaybe alt (linkToInlines src title'))
-selflinkOrImage :: OrgParser (F Inlines)
+selflinkOrImage :: OrgParser Inlines
selflinkOrImage = try $ do
src <- char '[' *> linkTarget <* char ']'
- return $ linkToInlinesF src (B.str src)
+ return $ fromMaybe "" (linkToInlines src (B.str src))
-plainLink :: OrgParser (F Inlines)
+plainLink :: OrgParser Inlines
plainLink = try $ do
(orig, src) <- uri
- returnF $ B.link src "" (B.str orig)
+ return $ B.link src "" (B.str orig)
-angleLink :: OrgParser (F Inlines)
+angleLink :: OrgParser Inlines
angleLink = try $ do
char '<'
link <- plainLink
@@ -1136,26 +1123,31 @@ linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]")
possiblyEmptyLinkTarget :: OrgParser String
possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]")
-applyCustomLinkFormat :: String -> OrgParser (F String)
+applyCustomLinkFormat :: String -> OrgParser String
applyCustomLinkFormat link = do
let (linkType, rest) = break (== ':') link
- return $ do
- formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters
- return $ maybe link ($ drop 1 rest) formatter
+ fmts <- asks finalState
+ return $
+ case M.lookup linkType (orgStateLinkFormatters fmts) of
+ Just v -> (v (drop 1 rest))
+ Nothing -> link
-- TODO: might be a lot smarter/cleaner to use parsec and ADTs for this kind
-- of parsing.
-linkToInlinesF :: String -> Inlines -> F Inlines
-linkToInlinesF s =
+linkToInlines :: String -> Inlines -> Maybe Inlines
+linkToInlines = \s ->
case s of
- "" -> pure . B.link "" ""
- ('#':_) -> pure . B.link s ""
- _ | isImageFilename s -> const . pure $ B.image s "" ""
- _ | isFileLink s -> pure . B.link (dropLinkType s) ""
- _ | isUri s -> pure . B.link s ""
- _ | isAbsoluteFilePath s -> pure . B.link ("file://" ++ s) ""
- _ | isRelativeFilePath s -> pure . B.link s ""
- _ -> internalLink s
+ _ | null s -> Just . B.link "" ""
+ _ | isAnchor s -> Just . B.link s ""
+ _ | isImageFilename s -> const . Just $ B.image s "" ""
+ _ | isFileLink s -> Just . B.link (dropLinkType s) ""
+ _ | isUri s -> Just . B.link s ""
+ _ | isAbsoluteFilePath s -> Just . B.link ("file://" ++ s) ""
+ _ | isRelativeFilePath s -> Just . B.link s ""
+ _ -> const Nothing
+
+isAnchor :: String -> Bool
+isAnchor s = "#" `isPrefixOf` s
isFileLink :: String -> Bool
isFileLink s = ("file:" `isPrefixOf` s) && not ("file://" `isPrefixOf` s)
@@ -1184,12 +1176,13 @@ isImageFilename filename =
imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
protocols = [ "file", "http", "https" ]
-internalLink :: String -> Inlines -> F Inlines
+internalLink :: String -> Inlines -> OrgParser Inlines
internalLink link title = do
- anchorB <- (link `elem`) <$> asksF orgStateAnchorIds
- if anchorB
- then return $ B.link ('#':link) "" title
- else return $ B.emph title
+ anchorB <- asks finalState
+ return $
+ if link `elem` (orgStateAnchorIds anchorB)
+ then B.link ('#':link) "" title
+ else B.emph title
-- | Parse an anchor like @<<anchor-id>>@ and return an empty span with
-- @anchor-id@ set as id. Legal anchors in org-mode are defined through
@@ -1197,11 +1190,11 @@ internalLink link title = do
-- @anchor-id@ contains spaces, we are more restrictive in what is accepted as
-- an anchor.
-anchor :: OrgParser (F Inlines)
+anchor :: OrgParser Inlines
anchor = try $ do
anchorId <- parseAnchor
recordAnchorId anchorId
- returnF $ B.spanWith (solidify anchorId, [], []) mempty
+ return $ B.spanWith (solidify anchorId, [], []) mempty
where
parseAnchor = string "<<"
*> many1 (noneOf "\t\n\r<>\"' ")
@@ -1219,7 +1212,7 @@ solidify = map replaceSpecialChar
| otherwise = '-'
-- | Parses an inline code block and marks it as an babel block.
-inlineCodeBlock :: OrgParser (F Inlines)
+inlineCodeBlock :: OrgParser Inlines
inlineCodeBlock = try $ do
string "src_"
lang <- many1 orgArgWordChar
@@ -1227,7 +1220,7 @@ inlineCodeBlock = try $ do
inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r")
let attrClasses = [translateLang lang, rundocBlockClass]
let attrKeyVal = map toRundocAttrib (("language", lang) : opts)
- returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
+ return $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
enclosedByPair :: Char -- ^ opening char
-> Char -- ^ closing char
@@ -1235,50 +1228,51 @@ enclosedByPair :: Char -- ^ opening char
-> OrgParser [a]
enclosedByPair s e p = char s *> many1Till p (char e)
-emph :: OrgParser (F Inlines)
-emph = fmap B.emph <$> emphasisBetween '/'
+emph :: OrgParser Inlines
+emph = B.emph <$> emphasisBetween '/'
-strong :: OrgParser (F Inlines)
-strong = fmap B.strong <$> emphasisBetween '*'
+strong :: OrgParser Inlines
+strong = B.strong <$> emphasisBetween '*'
-strikeout :: OrgParser (F Inlines)
-strikeout = fmap B.strikeout <$> emphasisBetween '+'
+strikeout :: OrgParser Inlines
+strikeout = B.strikeout <$> emphasisBetween '+'
-- There is no underline, so we use strong instead.
-underline :: OrgParser (F Inlines)
-underline = fmap B.strong <$> emphasisBetween '_'
+underline :: OrgParser Inlines
+underline = B.strong <$> emphasisBetween '_'
-verbatim :: OrgParser (F Inlines)
-verbatim = return . B.code <$> verbatimBetween '='
+verbatim :: OrgParser Inlines
+verbatim = B.code <$> verbatimBetween '='
-code :: OrgParser (F Inlines)
-code = return . B.code <$> verbatimBetween '~'
+code :: OrgParser Inlines
+code = B.code <$> verbatimBetween '~'
-subscript :: OrgParser (F Inlines)
-subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr)
+subscript :: OrgParser Inlines
+subscript = B.subscript <$> try (char '_' *> subOrSuperExpr)
-superscript :: OrgParser (F Inlines)
-superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr)
+superscript :: OrgParser Inlines
+superscript = B.superscript <$> try (char '^' *> subOrSuperExpr)
-math :: OrgParser (F Inlines)
-math = return . B.math <$> choice [ math1CharBetween '$'
+math :: OrgParser Inlines
+math = B.math <$> choice [ math1CharBetween '$'
, mathStringBetween '$'
, rawMathBetween "\\(" "\\)"
]
-displayMath :: OrgParser (F Inlines)
-displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
- , rawMathBetween "$$" "$$"
- ]
-symbol :: OrgParser (F Inlines)
-symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
+displayMath :: OrgParser Inlines
+displayMath = B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
+ , rawMathBetween "$$" "$$"
+ ]
+
+symbol :: OrgParser Inlines
+symbol = B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
where updatePositions c = do
when (c `elem` emphasisPreChars) updateLastPreCharPos
when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos
return c
emphasisBetween :: Char
- -> OrgParser (F Inlines)
+ -> OrgParser Inlines
emphasisBetween c = try $ do
startEmphasisNewlinesCounting emphasisAllowedNewlines
res <- enclosedInlines (emphasisStart c) (emphasisEnd c)
@@ -1355,9 +1349,9 @@ mathEnd c = try $ do
enclosedInlines :: OrgParser a
-> OrgParser b
- -> OrgParser (F Inlines)
+ -> OrgParser Inlines
enclosedInlines start end = try $
- trimInlinesF . mconcat <$> enclosed start end inline
+ trimInlines . mconcat <$> enclosed start end inline
enclosedRaw :: OrgParser a
-> OrgParser b
@@ -1436,7 +1430,7 @@ notAfterForbiddenBorderChar = do
return $ lastFBCPos /= Just pos
-- | Read a sub- or superscript expression
-subOrSuperExpr :: OrgParser (F Inlines)
+subOrSuperExpr :: OrgParser Inlines
subOrSuperExpr = try $
choice [ id <$> charsInBalanced '{' '}' (noneOf "\n\r")
, enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r")
@@ -1451,10 +1445,10 @@ simpleSubOrSuperString = try $
<*> many1 alphaNum
]
-inlineLaTeX :: OrgParser (F Inlines)
+inlineLaTeX :: OrgParser Inlines
inlineLaTeX = try $ do
cmd <- inlineLaTeXCommand
- maybe mzero returnF $
+ maybe mzero return $
parseAsMath cmd `mplus` parseAsMathMLSym cmd `mplus` parseAsInlineLaTeX cmd
where
parseAsMath :: String -> Maybe Inlines
@@ -1486,3 +1480,31 @@ inlineLaTeXCommand = try $ do
count len anyChar
return cs
_ -> mzero
+
+smart :: OrgParser Inlines
+smart = do
+ getOption readerSmart >>= guard
+ doubleQuoted <|> singleQuoted <|>
+ choice [orgApostrophe, dash, ellipses]
+ where orgApostrophe =
+ (char '\'' <|> char '\8217') <* updateLastPreCharPos
+ <* updateLastForbiddenCharPos
+ *> return (B.str "\x2019")
+
+singleQuoted :: OrgParser Inlines
+singleQuoted = try $ do
+ singleQuoteStart
+ withQuoteContext InSingleQuote $
+ B.singleQuoted . trimInlines . mconcat <$>
+ many1Till inline singleQuoteEnd
+
+-- doubleQuoted will handle regular double-quoted sections, as well
+-- as dialogues with an open double-quote without a close double-quote
+-- in the same paragraph.
+doubleQuoted :: OrgParser Inlines
+doubleQuoted = try $ do
+ doubleQuoteStart
+ contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
+ (withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return
+ (B.doubleQuoted . trimInlines $ contents))
+ <|> (return $ (B.str "\8220") <> contents)
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index b9a77c5d6..a8112bc81 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -51,14 +51,16 @@ import Data.Monoid (mconcat, mempty)
import Data.Sequence (viewr, ViewR(..))
import Data.Char (toLower, isHexDigit, isSpace)
+import Text.Pandoc.Error
+
-- | Parse reStructuredText string and return Pandoc document.
readRST :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Pandoc
+ -> Either PandocError Pandoc
readRST opts s = (readWith parseRST) def{ stateOptions = opts } (s ++ "\n\n")
-readRSTWithWarnings :: ReaderOptions -> String -> (Pandoc, [String])
-readRSTWithWarnings opts s = (readWithWarnings parseRST) def{ stateOptions = opts } (s ++ "\n\n")
+readRSTWithWarnings :: ReaderOptions -> String -> Either PandocError (Pandoc, [String])
+readRSTWithWarnings opts s = (readWith (returnWarnings parseRST)) def{ stateOptions = opts } (s ++ "\n\n")
type RSTParser = Parser [Char] ParserState
diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs
index 9f5738478..07b414431 100644
--- a/src/Text/Pandoc/Readers/TWiki.hs
+++ b/src/Text/Pandoc/Readers/TWiki.hs
@@ -48,17 +48,18 @@ import Data.Maybe (fromMaybe)
import Text.HTML.TagSoup
import Data.Char (isAlphaNum)
import qualified Data.Foldable as F
+import Text.Pandoc.Error
-- | Read twiki from an input string and return a Pandoc document.
readTWiki :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Pandoc
+ -> Either PandocError Pandoc
readTWiki opts s =
(readWith parseTWiki) def{ stateOptions = opts } (s ++ "\n\n")
readTWikiWithWarnings :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> (Pandoc, [String])
+ -> Either PandocError (Pandoc, [String])
readTWikiWithWarnings opts s =
(readWith parseTWikiWithWarnings) def{ stateOptions = opts } (s ++ "\n\n")
where parseTWikiWithWarnings = do
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 63ab80eb9..4565b26a1 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -68,11 +68,12 @@ import Text.Printf
import Control.Applicative ((<$>), (*>), (<*), (<$))
import Data.Monoid
import Debug.Trace (trace)
+import Text.Pandoc.Error
-- | Parse a Textile text and return a Pandoc document.
readTextile :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
- -> Pandoc
+ -> Either PandocError Pandoc
readTextile opts s =
(readWith parseTextile) def{ stateOptions = opts } (s ++ "\n\n")
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index 834d18c5c..304d6d4c5 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -48,6 +48,7 @@ import Data.Monoid (Monoid, mconcat, mempty, mappend)
import Control.Monad (void, guard, when)
import Data.Default
import Control.Monad.Reader (Reader, runReader, asks)
+import Text.Pandoc.Error
import Data.Time.LocalTime (getZonedTime)
import Text.Pandoc.Compat.Directory(getModificationTime)
@@ -83,12 +84,12 @@ getT2TMeta inps out = do
return $ T2TMeta curDate curMtime (intercalate ", " inps) out
-- | Read Txt2Tags from an input string returning a Pandoc document
-readTxt2Tags :: T2TMeta -> ReaderOptions -> String -> Pandoc
+readTxt2Tags :: T2TMeta -> ReaderOptions -> String -> Either PandocError Pandoc
readTxt2Tags t opts s = flip runReader t $ readWithM parseT2T (def {stateOptions = opts}) (s ++ "\n\n")
-- | Read Txt2Tags (ignoring all macros) from an input string returning
-- a Pandoc document
-readTxt2TagsNoMacros :: ReaderOptions -> String -> Pandoc
+readTxt2TagsNoMacros :: ReaderOptions -> String -> Either PandocError Pandoc
readTxt2TagsNoMacros = readTxt2Tags def
parseT2T :: T2T Pandoc
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index bc960fd38..e0460c66e 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -85,6 +85,8 @@ module Text.Pandoc.Shared (
-- * Error handling
err,
warn,
+ mapLeft,
+ hush,
-- * Safe read
safeRead,
-- * Temp directory
@@ -113,7 +115,7 @@ import System.FilePath ( (</>), takeExtension, dropExtension)
import Data.Generics (Typeable, Data)
import qualified Control.Monad.State as S
import qualified Control.Exception as E
-import Control.Monad (msum, unless)
+import Control.Monad (msum, unless, MonadPlus(..))
import Text.Pandoc.Pretty (charWidth)
import Text.Pandoc.Compat.Locale (defaultTimeLocale)
import Data.Time
@@ -855,6 +857,14 @@ warn msg = do
name <- getProgName
UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
+mapLeft :: (a -> b) -> Either a c -> Either b c
+mapLeft f (Left x) = Left (f x)
+mapLeft _ (Right x) = Right x
+
+hush :: Either a b -> Maybe b
+hush (Left _) = Nothing
+hush (Right x) = Just x
+
-- | Remove intermediate "." and ".." directories from a path.
--
-- > collapseFilePath "./foo" == "foo"
@@ -883,11 +893,11 @@ collapseFilePath = joinPath . reverse . foldl go [] . splitDirectories
-- Safe read
--
-safeRead :: (Monad m, Read a) => String -> m a
+safeRead :: (MonadPlus m, Read a) => String -> m a
safeRead s = case reads s of
(d,x):_
| all isSpace x -> return d
- _ -> fail $ "Could not read `" ++ s ++ "'"
+ _ -> mzero
--
-- Temp directory
diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs
index e5b8c5167..1c33b004a 100644
--- a/src/Text/Pandoc/Writers/AsciiDoc.hs
+++ b/src/Text/Pandoc/Writers/AsciiDoc.hs
@@ -126,7 +126,7 @@ blockToAsciiDoc :: WriterOptions -- ^ Options
blockToAsciiDoc _ Null = return empty
blockToAsciiDoc opts (Plain inlines) = do
contents <- inlineListToAsciiDoc opts inlines
- return $ contents <> cr
+ return $ contents <> blankline
blockToAsciiDoc opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = do
blockToAsciiDoc opts (Para [Image alt (src,tit)])
blockToAsciiDoc opts (Para inlines) = do
@@ -272,7 +272,7 @@ bulletListItemToAsciiDoc opts blocks = do
contents <- foldM addBlock empty blocks
modify $ \s -> s{ bulletListLevel = lev }
let marker = text (replicate lev '*')
- return $ marker <> space <> contents <> cr
+ return $ marker <> text " " <> contents <> cr
-- | Convert ordered list item (a list of blocks) to asciidoc.
orderedListItemToAsciiDoc :: WriterOptions -- ^ options
@@ -292,7 +292,7 @@ orderedListItemToAsciiDoc opts marker blocks = do
modify $ \s -> s{ orderedListLevel = lev + 1 }
contents <- foldM addBlock empty blocks
modify $ \s -> s{ orderedListLevel = lev }
- return $ text marker <> space <> contents <> cr
+ return $ text marker <> text " " <> contents <> cr
-- | Convert definition list item (label, list of blocks) to asciidoc.
definitionListItemToAsciiDoc :: WriterOptions
diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs
new file mode 100644
index 000000000..706b27175
--- /dev/null
+++ b/src/Text/Pandoc/Writers/CommonMark.hs
@@ -0,0 +1,178 @@
+{-
+Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.CommonMark
+ Copyright : Copyright (C) 2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to CommonMark.
+
+CommonMark: <http://commonmark.org>
+-}
+module Text.Pandoc.Writers.CommonMark (writeCommonMark) where
+
+import Text.Pandoc.Writers.HTML (writeHtmlString)
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared (isTightList)
+import Text.Pandoc.Templates (renderTemplate')
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Options
+import CMark
+import qualified Data.Text as T
+import Control.Monad.Identity (runIdentity, Identity)
+import Control.Monad.State (runState, State, modify, get)
+import Text.Pandoc.Walk (walkM)
+
+-- | Convert Pandoc to CommonMark.
+writeCommonMark :: WriterOptions -> Pandoc -> String
+writeCommonMark opts (Pandoc meta blocks) = rendered
+ where main = runIdentity $ blocksToCommonMark opts (blocks' ++ notes')
+ (blocks', notes) = runState (walkM processNotes blocks) []
+ notes' = if null notes
+ then []
+ else [OrderedList (1, Decimal, Period) $ reverse notes]
+ metadata = runIdentity $ metaToJSON opts
+ (blocksToCommonMark opts)
+ (inlinesToCommonMark opts)
+ meta
+ context = defField "body" main $ metadata
+ rendered = if writerStandalone opts
+ then renderTemplate' (writerTemplate opts) context
+ else main
+
+processNotes :: Inline -> State [[Block]] Inline
+processNotes (Note bs) = do
+ modify (bs :)
+ notes <- get
+ return $ Str $ "[" ++ show (length notes) ++ "]"
+processNotes x = return x
+
+node :: NodeType -> [Node] -> Node
+node = Node Nothing
+
+blocksToCommonMark :: WriterOptions -> [Block] -> Identity String
+blocksToCommonMark opts bs = return $
+ T.unpack $ nodeToCommonmark cmarkOpts colwidth
+ $ node DOCUMENT (blocksToNodes bs)
+ where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts]
+ colwidth = if writerWrapText opts
+ then writerColumns opts
+ else 0
+
+inlinesToCommonMark :: WriterOptions -> [Inline] -> Identity String
+inlinesToCommonMark opts ils = return $
+ T.unpack $ nodeToCommonmark cmarkOpts colwidth
+ $ node PARAGRAPH (inlinesToNodes ils)
+ where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts]
+ colwidth = if writerWrapText opts
+ then writerColumns opts
+ else 0
+
+blocksToNodes :: [Block] -> [Node]
+blocksToNodes = foldr blockToNodes []
+
+blockToNodes :: Block -> [Node] -> [Node]
+blockToNodes (Plain xs) = (node PARAGRAPH (inlinesToNodes xs) :)
+blockToNodes (Para xs) = (node PARAGRAPH (inlinesToNodes xs) :)
+blockToNodes (CodeBlock (_,classes,_) xs) =
+ (node (CODE_BLOCK (T.pack (unwords classes)) (T.pack xs)) [] :)
+blockToNodes (RawBlock fmt xs)
+ | fmt == Format "html" = (node (HTML (T.pack xs)) [] :)
+ | otherwise = id
+blockToNodes (BlockQuote bs) =
+ (node BLOCK_QUOTE (blocksToNodes bs) :)
+blockToNodes (BulletList items) =
+ (node (LIST ListAttributes{
+ listType = BULLET_LIST,
+ listDelim = PERIOD_DELIM,
+ listTight = isTightList items,
+ listStart = 1 }) (map (node ITEM . blocksToNodes) items) :)
+blockToNodes (OrderedList (start, _sty, delim) items) =
+ (node (LIST ListAttributes{
+ listType = ORDERED_LIST,
+ listDelim = case delim of
+ OneParen -> PAREN_DELIM
+ TwoParens -> PAREN_DELIM
+ _ -> PERIOD_DELIM,
+ listTight = isTightList items,
+ listStart = start }) (map (node ITEM . blocksToNodes) items) :)
+blockToNodes HorizontalRule = (node HRULE [] :)
+blockToNodes (Header lev _ ils) = (node (HEADER lev) (inlinesToNodes ils) :)
+blockToNodes (Div _ bs) = (blocksToNodes bs ++)
+blockToNodes (DefinitionList items) = blockToNodes (BulletList items')
+ where items' = map dlToBullet items
+ dlToBullet (term, ((Para xs : ys) : zs)) =
+ Para (term ++ [LineBreak] ++ xs) : ys ++ concat zs
+ dlToBullet (term, ((Plain xs : ys) : zs)) =
+ Plain (term ++ [LineBreak] ++ xs) : ys ++ concat zs
+ dlToBullet (term, xs) =
+ Para term : concat xs
+blockToNodes t@(Table _ _ _ _ _) =
+ (node (HTML (T.pack $! writeHtmlString def $! Pandoc nullMeta [t])) [] :)
+blockToNodes Null = id
+
+inlinesToNodes :: [Inline] -> [Node]
+inlinesToNodes = foldr inlineToNodes []
+
+inlineToNodes :: Inline -> [Node] -> [Node]
+inlineToNodes (Str s) = (node (TEXT (T.pack s)) [] :)
+inlineToNodes Space = (node (TEXT (T.pack " ")) [] :)
+inlineToNodes LineBreak = (node LINEBREAK [] :)
+inlineToNodes (Emph xs) = (node EMPH (inlinesToNodes xs) :)
+inlineToNodes (Strong xs) = (node STRONG (inlinesToNodes xs) :)
+inlineToNodes (Strikeout xs) =
+ ((node (INLINE_HTML (T.pack "<s>")) [] : inlinesToNodes xs ++
+ [node (INLINE_HTML (T.pack "</s>")) []]) ++ )
+inlineToNodes (Superscript xs) =
+ ((node (INLINE_HTML (T.pack "<sub>")) [] : inlinesToNodes xs ++
+ [node (INLINE_HTML (T.pack "</sub>")) []]) ++ )
+inlineToNodes (Subscript xs) =
+ ((node (INLINE_HTML (T.pack "<sup>")) [] : inlinesToNodes xs ++
+ [node (INLINE_HTML (T.pack "</sup>")) []]) ++ )
+inlineToNodes (SmallCaps xs) =
+ ((node (INLINE_HTML (T.pack "<span style=\"font-variant:small-caps;\">")) []
+ : inlinesToNodes xs ++
+ [node (INLINE_HTML (T.pack "</span>")) []]) ++ )
+inlineToNodes (Link ils (url,tit)) =
+ (node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes ils) :)
+inlineToNodes (Image ils (url,tit)) =
+ (node (IMAGE (T.pack url) (T.pack tit)) (inlinesToNodes ils) :)
+inlineToNodes (RawInline fmt xs)
+ | fmt == Format "html" = (node (INLINE_HTML (T.pack xs)) [] :)
+ | otherwise = id
+inlineToNodes (Quoted qt ils) =
+ ((node (TEXT start) [] : inlinesToNodes ils ++ [node (TEXT end) []]) ++)
+ where (start, end) = case qt of
+ SingleQuote -> (T.pack "‘", T.pack "’")
+ DoubleQuote -> (T.pack "“", T.pack "”")
+inlineToNodes (Code _ str) = (node (CODE (T.pack str)) [] :)
+inlineToNodes (Math mt str) =
+ case mt of
+ InlineMath ->
+ (node (INLINE_HTML (T.pack ("\\(" ++ str ++ "\\)"))) [] :)
+ DisplayMath ->
+ (node (INLINE_HTML (T.pack ("\\[" ++ str ++ "\\]"))) [] :)
+inlineToNodes (Span _ ils) = (inlinesToNodes ils ++)
+inlineToNodes (Cite _ ils) = (inlinesToNodes ils ++)
+inlineToNodes (Note _) = id -- should not occur
+-- we remove Note elements in preprocessing
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index b10317506..19f8f2f11 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -114,7 +114,8 @@ elementToDocbook opts lvl (Sec _ _num (id',_,_) title elements) =
n | n == 0 -> "chapter"
| n >= 1 && n <= 5 -> "sect" ++ show n
| otherwise -> "simplesect"
- in inTags True tag [("id", writerIdentifierPrefix opts ++ id')] $
+ in inTags True tag [("id", writerIdentifierPrefix opts ++ id') |
+ not (null id')] $
inTagsSimple "title" (inlinesToDocbook opts title) $$
vcat (map (elementToDocbook opts (lvl + 1)) elements')
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 441392918..4809d2a14 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE ScopedTypeVariables, PatternGuards #-}
+{-# LANGUAGE ScopedTypeVariables, PatternGuards, ViewPatterns #-}
{-
Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu>
@@ -29,7 +29,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' documents to docx.
-}
module Text.Pandoc.Writers.Docx ( writeDocx ) where
-import Data.List ( intercalate, isPrefixOf, isSuffixOf, stripPrefix )
+import Data.List ( intercalate, isPrefixOf, isSuffixOf )
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Lazy.Char8 as BL8
@@ -54,6 +54,8 @@ import Text.Pandoc.Walk
import Text.Highlighting.Kate.Types ()
import Text.XML.Light as XML
import Text.TeXMath
+import Text.Pandoc.Readers.Docx.StyleMap
+import Text.Pandoc.Readers.Docx.Util (elemName)
import Control.Monad.State
import Text.Highlighting.Kate
import Data.Unique (hashUnique, newUnique)
@@ -63,8 +65,8 @@ import qualified Control.Exception as E
import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef,
extensionFromMimeType)
import Control.Applicative ((<$>), (<|>), (<*>))
-import Data.Maybe (fromMaybe, mapMaybe)
-import Data.Char (isDigit)
+import Data.Maybe (fromMaybe, mapMaybe, maybeToList)
+import Data.Char (ord)
data ListMarker = NoMarker
| BulletMarker
@@ -106,8 +108,9 @@ data WriterState = WriterState{
, stChangesAuthor :: String
, stChangesDate :: String
, stPrintWidth :: Integer
- , stHeadingStyles :: [(Int,String)]
+ , stStyleMaps :: StyleMaps
, stFirstPara :: Bool
+ , stTocTitle :: [Inline]
}
defaultWriterState :: WriterState
@@ -127,8 +130,9 @@ defaultWriterState = WriterState{
, stChangesAuthor = "unknown"
, stChangesDate = "1969-12-31T19:00:00Z"
, stPrintWidth = 1
- , stHeadingStyles = []
+ , stStyleMaps = defaultStyleMaps
, stFirstPara = False
+ , stTocTitle = normalizeInlines [Str "Table of Contents"]
}
type WS a = StateT WriterState IO a
@@ -175,13 +179,36 @@ renumId f renumMap e
renumIds :: (QName -> Bool) -> (M.Map String String) -> [Element] -> [Element]
renumIds f renumMap = map (renumId f renumMap)
+-- | Certain characters are invalid in XML even if escaped.
+-- See #1992
+stripInvalidChars :: Pandoc -> Pandoc
+stripInvalidChars = bottomUp (filter isValidChar)
+
+-- | See XML reference
+isValidChar :: Char -> Bool
+isValidChar (ord -> c)
+ | c == 0x9 = True
+ | c == 0xA = True
+ | c == 0xD = True
+ | 0x20 <= c && c <= 0xD7FF = True
+ | 0xE000 <= c && c <= 0xFFFD = True
+ | 0x10000 <= c && c <= 0x10FFFF = True
+ | otherwise = False
+
+metaValueToInlines :: MetaValue -> [Inline]
+metaValueToInlines (MetaString s) = normalizeInlines [Str s]
+metaValueToInlines (MetaInlines ils) = ils
+metaValueToInlines (MetaBlocks bs) = query return bs
+metaValueToInlines (MetaBool b) = [Str $ show b]
+metaValueToInlines _ = []
+
-- | Produce an Docx file from a Pandoc document.
writeDocx :: WriterOptions -- ^ Writer options
-> Pandoc -- ^ Document to convert
-> IO BL.ByteString
writeDocx opts doc@(Pandoc meta _) = do
let datadir = writerUserDataDir opts
- let doc' = walk fixDisplayMath doc
+ let doc' = stripInvalidChars . walk fixDisplayMath $ doc
username <- lookup "USERNAME" <$> getEnvironment
utctime <- getCurrentTime
refArchive <- liftM (toArchive . toLazy) $
@@ -215,32 +242,18 @@ writeDocx opts doc@(Pandoc meta _) = do
styledoc <- parseXml refArchive distArchive stylepath
-- parse styledoc for heading styles
- let styleNamespaces = map ((,) <$> qName . attrKey <*> attrVal) .
- filter ((==Just "xmlns") . qPrefix . attrKey) .
- elAttribs $ styledoc
- let headingStyles =
- let
- mywURI = lookup "w" styleNamespaces
- myName name = QName name mywURI (Just "w")
- getAttrStyleId = findAttr (myName "styleId")
- getNameVal = findChild (myName "name") >=> findAttr (myName "val")
- getNum s | not $ null s, all isDigit s = Just (read s :: Int)
- | otherwise = Nothing
- getEngHeader = getAttrStyleId >=> stripPrefix "Heading" >=> getNum
- getIntHeader = getNameVal >=> stripPrefix "heading " >=> getNum
- toTuple getF = liftM2 (,) <$> getF <*> getAttrStyleId
- toMap getF = mapMaybe (toTuple getF) $
- findChildren (myName "style") styledoc
- select a b | not $ null a = a
- | otherwise = b
- in
- select (toMap getEngHeader) (toMap getIntHeader)
+ let styleMaps = getStyleMaps styledoc
+
+ let tocTitle = fromMaybe (stTocTitle defaultWriterState) $
+ metaValueToInlines <$> lookupMeta "toc-title" meta
((contents, footnotes), st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc')
defaultWriterState{ stChangesAuthor = fromMaybe "unknown" username
, stChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime
, stPrintWidth = (maybe 420 (\x -> quot x 20) pgContentWidth)
- , stHeadingStyles = headingStyles}
+ , stStyleMaps = styleMaps
+ , stTocTitle = tocTitle
+ }
let epochtime = floor $ utcTimeToPOSIXSeconds utctime
let imgs = M.elems $ stImages st
@@ -393,9 +406,18 @@ writeDocx opts doc@(Pandoc meta _) = do
linkrels
-- styles
- let newstyles = styleToOpenXml $ writerHighlightStyle opts
- let styledoc' = styledoc{ elContent = elContent styledoc ++
- [Elem x | x <- newstyles, writerHighlight opts] }
+ let newstyles = styleToOpenXml styleMaps $ writerHighlightStyle opts
+ let styledoc' = styledoc{ elContent = modifyContent (elContent styledoc) }
+ where
+ modifyContent
+ | writerHighlight opts = (++ map Elem newstyles)
+ | otherwise = filter notTokStyle
+ notTokStyle (Elem el) = notStyle el || notTokId el
+ notTokStyle _ = True
+ notStyle = (/= elemName' "style") . elName
+ notTokId = maybe True (`notElem` tokStys) . findAttr (elemName' "styleId")
+ tokStys = "SourceCode" : map show (enumFromTo KeywordTok NormalTok)
+ elemName' = elemName (sNameSpaces styleMaps) "w"
let styleEntry = toEntry stylepath epochtime $ renderXml styledoc'
-- construct word/numbering.xml
@@ -440,6 +462,17 @@ writeDocx opts doc@(Pandoc meta _) = do
]
let relsEntry = toEntry relsPath epochtime $ renderXml rels
+ -- we use dist archive for settings.xml, because Word sometimes
+ -- adds references to footnotes or endnotes we don't have...
+ -- we do, however, copy some settings over from reference
+ let settingsPath = "word/settings.xml"
+ settingsList = [ "w:autoHyphenation"
+ , "w:consecutiveHyphenLimit"
+ , "w:hyphenationZone"
+ , "w:doNotHyphenateCap"
+ ]
+ settingsEntry <- copyChildren refArchive distArchive settingsPath epochtime settingsList
+
let entryFromArchive arch path =
maybe (fail $ path ++ " corrupt or missing in reference docx")
return
@@ -447,9 +480,6 @@ writeDocx opts doc@(Pandoc meta _) = do
docPropsAppEntry <- entryFromArchive refArchive "docProps/app.xml"
themeEntry <- entryFromArchive refArchive "word/theme/theme1.xml"
fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml"
- -- we use dist archive for settings.xml, because Word sometimes
- -- adds references to footnotes or endnotes we don't have...
- settingsEntry <- entryFromArchive distArchive "word/settings.xml"
webSettingsEntry <- entryFromArchive refArchive "word/webSettings.xml"
headerFooterEntries <- mapM (entryFromArchive refArchive) $
mapMaybe (fmap ("word/" ++) . extractTarget)
@@ -472,10 +502,13 @@ writeDocx opts doc@(Pandoc meta _) = do
miscRelEntries ++ otherMediaEntries
return $ fromArchive archive
-styleToOpenXml :: Style -> [Element]
-styleToOpenXml style = parStyle : map toStyle alltoktypes
+styleToOpenXml :: StyleMaps -> Style -> [Element]
+styleToOpenXml sm style =
+ maybeToList parStyle ++ mapMaybe toStyle alltoktypes
where alltoktypes = enumFromTo KeywordTok NormalTok
- toStyle toktype = mknode "w:style" [("w:type","character"),
+ toStyle toktype | hasStyleName (show toktype) (sCharStyleMap sm) = Nothing
+ | otherwise = Just $
+ mknode "w:style" [("w:type","character"),
("w:customStyle","1"),("w:styleId",show toktype)]
[ mknode "w:name" [("w:val",show toktype)] ()
, mknode "w:basedOn" [("w:val","VerbatimChar")] ()
@@ -496,17 +529,35 @@ styleToOpenXml style = parStyle : map toStyle alltoktypes
tokBg toktype = maybe "auto" (drop 1 . fromColor)
$ (tokenBackground =<< lookup toktype tokStyles)
`mplus` backgroundColor style
- parStyle = mknode "w:style" [("w:type","paragraph"),
+ parStyle | hasStyleName "Source Code" (sParaStyleMap sm) = Nothing
+ | otherwise = Just $
+ mknode "w:style" [("w:type","paragraph"),
("w:customStyle","1"),("w:styleId","SourceCode")]
[ mknode "w:name" [("w:val","Source Code")] ()
, mknode "w:basedOn" [("w:val","Normal")] ()
, mknode "w:link" [("w:val","VerbatimChar")] ()
, mknode "w:pPr" []
$ mknode "w:wordWrap" [("w:val","off")] ()
+ : mknode "w:noProof" [] ()
: ( maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill",drop 1 $ fromColor col)] ()])
$ backgroundColor style )
]
+copyChildren :: Archive -> Archive -> String -> Integer -> [String] -> IO Entry
+copyChildren refArchive distArchive path timestamp elNames = do
+ ref <- parseXml refArchive distArchive path
+ dist <- parseXml distArchive distArchive path
+ return $ toEntry path timestamp $ renderXml dist{
+ elContent = elContent dist ++ copyContent ref
+ }
+ where
+ strName QName{qName=name, qPrefix=prefix}
+ | Just p <- prefix = p++":"++name
+ | otherwise = name
+ shouldCopy = (`elem` elNames) . strName
+ cleanElem el@Element{elName=name} = Elem el{elName=name{qURI=Nothing}}
+ copyContent = map cleanElem . filterChildrenName shouldCopy
+
-- this is the lowest number used for a list numId
baseListId :: Int
baseListId = 1000
@@ -584,6 +635,34 @@ mkLvl marker lvl =
getNumId :: WS Int
getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists
+makeTOC :: WriterOptions -> WS [Element]
+makeTOC opts | writerTableOfContents opts = do
+ let depth = "1-"++(show (writerTOCDepth opts))
+ let tocCmd = "TOC \\o \""++depth++"\" \\h \\z \\u"
+ tocTitle <- gets stTocTitle
+ title <- withParaPropM (pStyleM "TOC Heading") (blocksToOpenXML opts [Para tocTitle])
+ return $
+ [mknode "w:sdt" [] ([
+ mknode "w:sdtPr" [] (
+ mknode "w:docPartObj" [] (
+ [mknode "w:docPartGallery" [("w:val","Table of Contents")] (),
+ mknode "w:docPartUnique" [] ()]
+ ) -- w:docPartObj
+ ), -- w:sdtPr
+ mknode "w:sdtContent" [] (title++[
+ mknode "w:p" [] (
+ mknode "w:r" [] ([
+ mknode "w:fldChar" [("w:fldCharType","begin"),("w:dirty","true")] (),
+ mknode "w:instrText" [("xml:space","preserve")] tocCmd,
+ mknode "w:fldChar" [("w:fldCharType","separate")] (),
+ mknode "w:fldChar" [("w:fldCharType","end")] ()
+ ]) -- w:r
+ ) -- w:p
+ ])
+ ])] -- w:sdt
+makeTOC _ = return []
+
+
-- | Convert Pandoc document to two lists of
-- OpenXML elements (the main document and footnotes).
writeOpenXML :: WriterOptions -> Pandoc -> WS ([Element], [Element])
@@ -602,32 +681,45 @@ writeOpenXML opts (Pandoc meta blocks) = do
Just (MetaBlocks [Para xs]) -> xs
Just (MetaInlines xs) -> xs
_ -> []
- title <- withParaProp (pStyle "Title") $ blocksToOpenXML opts [Para tit | not (null tit)]
- subtitle <- withParaProp (pStyle "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')]
- authors <- withParaProp (pStyle "Author") $ blocksToOpenXML opts $
+ title <- withParaPropM (pStyleM "Title") $ blocksToOpenXML opts [Para tit | not (null tit)]
+ subtitle <- withParaPropM (pStyleM "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')]
+ authors <- withParaProp (pCustomStyle "Author") $ blocksToOpenXML opts $
map Para auths
- date <- withParaProp (pStyle "Date") $ blocksToOpenXML opts [Para dat | not (null dat)]
+ date <- withParaPropM (pStyleM "Date") $ blocksToOpenXML opts [Para dat | not (null dat)]
abstract <- if null abstract'
then return []
- else withParaProp (pStyle "Abstract") $ blocksToOpenXML opts abstract'
+ else withParaProp (pCustomStyle "Abstract") $ blocksToOpenXML opts abstract'
let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs
convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs
convertSpace xs = xs
let blocks' = bottomUp convertSpace blocks
doc' <- (setFirstPara >> blocksToOpenXML opts blocks')
notes' <- reverse `fmap` gets stFootnotes
- let meta' = title ++ subtitle ++ authors ++ date ++ abstract
+ toc <- makeTOC opts
+ let meta' = title ++ subtitle ++ authors ++ date ++ abstract ++ toc
return (meta' ++ doc', notes')
-- | Convert a list of Pandoc blocks to OpenXML.
blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element]
blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls
-pStyle :: String -> Element
-pStyle sty = mknode "w:pStyle" [("w:val",sty)] ()
+pCustomStyle :: String -> Element
+pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] ()
+
+pStyleM :: String -> WS XML.Element
+pStyleM styleName = do
+ styleMaps <- gets stStyleMaps
+ let sty' = getStyleId styleName $ sParaStyleMap styleMaps
+ return $ mknode "w:pStyle" [("w:val",sty')] ()
+
+rCustomStyle :: String -> Element
+rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] ()
-rStyle :: String -> Element
-rStyle sty = mknode "w:rStyle" [("w:val",sty)] ()
+rStyleM :: String -> WS XML.Element
+rStyleM styleName = do
+ styleMaps <- gets stStyleMaps
+ let sty' = getStyleId styleName $ sCharStyleMap styleMaps
+ return $ mknode "w:rStyle" [("w:val",sty')] ()
getUniqueId :: MonadIO m => m String
-- the + 20 is to ensure that there are no clashes with the rIds
@@ -641,13 +733,12 @@ blockToOpenXML opts (Div (_,["references"],_) bs) = do
let (hs, bs') = span isHeaderBlock bs
header <- blocksToOpenXML opts hs
-- We put the Bibliography style on paragraphs after the header
- rest <- withParaProp (pStyle "Bibliography") $ blocksToOpenXML opts bs'
+ rest <- withParaPropM (pStyleM "Bibliography") $ blocksToOpenXML opts bs'
return (header ++ rest)
blockToOpenXML opts (Div _ bs) = blocksToOpenXML opts bs
blockToOpenXML opts (Header lev (ident,_,_) lst) = do
setFirstPara
- headingStyles <- gets stHeadingStyles
- paraProps <- maybe id (withParaProp . pStyle) (lookup lev headingStyles) $
+ paraProps <- withParaPropM (pStyleM ("Heading "++show lev)) $
getParaProps False
contents <- inlinesToOpenXML opts lst
usedIdents <- gets stSectionIds
@@ -660,26 +751,32 @@ blockToOpenXML opts (Header lev (ident,_,_) lst) = do
,("w:name",bookmarkName)] ()
let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] ()
return [mknode "w:p" [] (paraProps ++ [bookmarkStart, bookmarkEnd] ++ contents)]
-blockToOpenXML opts (Plain lst) = withParaProp (pStyle "Compact")
+blockToOpenXML opts (Plain lst) = withParaProp (pCustomStyle "Compact")
$ blockToOpenXML opts (Para lst)
-- title beginning with fig: indicates that the image is a figure
blockToOpenXML opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = do
setFirstPara
+ pushParaProp $ pCustomStyle $
+ if null alt
+ then "Figure"
+ else "FigureWithCaption"
paraProps <- getParaProps False
+ popParaProp
contents <- inlinesToOpenXML opts [Image alt (src,tit)]
- captionNode <- withParaProp (pStyle "ImageCaption")
+ captionNode <- withParaProp (pCustomStyle "ImageCaption")
$ blockToOpenXML opts (Para alt)
return $ mknode "w:p" [] (paraProps ++ contents) : captionNode
-- fixDisplayMath sometimes produces a Para [] as artifact
blockToOpenXML _ (Para []) = return []
blockToOpenXML opts (Para lst) = do
- isFirstPara <- gets stFirstPara
+ isFirstPara <- gets stFirstPara
paraProps <- getParaProps $ case lst of
[Math DisplayMath _] -> True
_ -> False
+ bodyTextStyle <- pStyleM "Body Text"
let paraProps' = case paraProps of
- [] | isFirstPara -> [mknode "w:pPr" [] [(pStyle "FirstParagraph")]]
- [] -> [mknode "w:pPr" [] [(pStyle "BodyText")]]
+ [] | isFirstPara -> [mknode "w:pPr" [] [pCustomStyle "FirstParagraph"]]
+ [] -> [mknode "w:pPr" [] [bodyTextStyle]]
ps -> ps
modify $ \s -> s { stFirstPara = False }
contents <- inlinesToOpenXML opts lst
@@ -688,11 +785,11 @@ blockToOpenXML _ (RawBlock format str)
| format == Format "openxml" = return [ x | Elem x <- parseXML str ]
| otherwise = return []
blockToOpenXML opts (BlockQuote blocks) = do
- p <- withParaProp (pStyle "BlockQuote") $ blocksToOpenXML opts blocks
+ p <- withParaPropM (pStyleM "Block Text") $ blocksToOpenXML opts blocks
setFirstPara
return p
blockToOpenXML opts (CodeBlock attrs str) = do
- p <- withParaProp (pStyle "SourceCode") $ (blockToOpenXML opts $ Para [Code attrs str])
+ p <- withParaProp (pCustomStyle "SourceCode") (blockToOpenXML opts $ Para [Code attrs str])
setFirstPara
return p
blockToOpenXML _ HorizontalRule = do
@@ -707,7 +804,7 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do
let captionStr = stringify caption
caption' <- if null caption
then return []
- else withParaProp (pStyle "TableCaption")
+ else withParaProp (pCustomStyle "TableCaption")
$ blockToOpenXML opts (Para caption)
let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] ()
let cellToOpenXML (al, cell) = withParaProp (alignmentFor al)
@@ -718,32 +815,36 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do
[ mknode "w:tcBorders" []
$ mknode "w:bottom" [("w:val","single")] ()
, mknode "w:vAlign" [("w:val","bottom")] () ]
- let emptyCell = [mknode "w:p" [] [mknode "w:pPr" []
- [mknode "w:pStyle" [("w:val","Compact")] ()]]]
+ let emptyCell = [mknode "w:p" [] [pCustomStyle "Compact"]]
let mkcell border contents = mknode "w:tc" []
$ [ borderProps | border ] ++
if null contents
then emptyCell
else contents
- let mkrow border cells = mknode "w:tr" [] $ map (mkcell border) cells
+ let mkrow border cells = mknode "w:tr" [] $
+ [mknode "w:trPr" [] [
+ mknode "w:cnfStyle" [("w:firstRow","1")] ()] | border]
+ ++ map (mkcell border) cells
let textwidth = 7920 -- 5.5 in in twips, 1/20 pt
let fullrow = 5000 -- 100% specified in pct
let rowwidth = fullrow * sum widths
let mkgridcol w = mknode "w:gridCol"
[("w:w", show (floor (textwidth * w) :: Integer))] ()
+ let hasHeader = not (all null headers)
return $
caption' ++
[mknode "w:tbl" []
( mknode "w:tblPr" []
( mknode "w:tblStyle" [("w:val","TableNormal")] () :
mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () :
+ mknode "w:tblLook" [("w:firstRow","1") | hasHeader ] () :
[ mknode "w:tblCaption" [("w:val", captionStr)] ()
| not (null caption) ] )
: mknode "w:tblGrid" []
(if all (==0) widths
then []
else map mkgridcol widths)
- : [ mkrow True headers' | not (all null headers) ] ++
+ : [ mkrow True headers' | hasHeader ] ++
map (mkrow False) rows'
)]
blockToOpenXML opts (BulletList lst) = do
@@ -767,9 +868,9 @@ blockToOpenXML opts (DefinitionList items) = do
definitionListItemToOpenXML :: WriterOptions -> ([Inline],[[Block]]) -> WS [Element]
definitionListItemToOpenXML opts (term,defs) = do
- term' <- withParaProp (pStyle "DefinitionTerm")
+ term' <- withParaProp (pCustomStyle "DefinitionTerm")
$ blockToOpenXML opts (Para term)
- defs' <- withParaProp (pStyle "Definition")
+ defs' <- withParaProp (pCustomStyle "Definition")
$ concat `fmap` mapM (blocksToOpenXML opts) defs
return $ term' ++ defs'
@@ -833,6 +934,9 @@ withTextProp d p = do
popTextProp
return res
+withTextPropM :: WS Element -> WS a -> WS a
+withTextPropM = (. flip withTextProp) . (>>=)
+
getParaProps :: Bool -> WS [Element]
getParaProps displayMathPara = do
props <- gets stParaProperties
@@ -861,6 +965,9 @@ withParaProp d p = do
popParaProp
return res
+withParaPropM :: WS Element -> WS a -> WS a
+withParaPropM = (. flip withParaProp) . (>>=)
+
formattedString :: String -> WS [Element]
formattedString str = do
props <- getTextProps
@@ -943,25 +1050,26 @@ inlineToOpenXML opts (Math mathType str) = do
Right r -> return [r]
Left _ -> inlinesToOpenXML opts (texMathToInlines mathType str)
inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst
-inlineToOpenXML opts (Code attrs str) =
- withTextProp (rStyle "VerbatimChar")
- $ if writerHighlight opts
- then case highlight formatOpenXML attrs str of
- Nothing -> unhighlighted
- Just h -> return h
- else unhighlighted
- where unhighlighted = intercalate [br] `fmap`
- (mapM formattedString $ lines str)
- formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok)
- toHlTok (toktype,tok) = mknode "w:r" []
- [ mknode "w:rPr" []
- [ rStyle $ show toktype ]
- , mknode "w:t" [("xml:space","preserve")] tok ]
+inlineToOpenXML opts (Code attrs str) = do
+ let unhighlighted = intercalate [br] `fmap`
+ (mapM formattedString $ lines str)
+ formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok)
+ toHlTok (toktype,tok) = mknode "w:r" []
+ [ mknode "w:rPr" []
+ [ rCustomStyle (show toktype) ]
+ , mknode "w:t" [("xml:space","preserve")] tok ]
+ withTextProp (rCustomStyle "VerbatimChar")
+ $ if writerHighlight opts
+ then case highlight formatOpenXML attrs str of
+ Nothing -> unhighlighted
+ Just h -> return h
+ else unhighlighted
inlineToOpenXML opts (Note bs) = do
notes <- gets stFootnotes
notenum <- getUniqueId
+ footnoteStyle <- rStyleM "Footnote Reference"
let notemarker = mknode "w:r" []
- [ mknode "w:rPr" [] (rStyle "FootnoteRef")
+ [ mknode "w:rPr" [] footnoteStyle
, mknode "w:footnoteRef" [] () ]
let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker
let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : ils) : xs
@@ -971,22 +1079,22 @@ inlineToOpenXML opts (Note bs) = do
oldParaProperties <- gets stParaProperties
oldTextProperties <- gets stTextProperties
modify $ \st -> st{ stListLevel = -1, stParaProperties = [], stTextProperties = [] }
- contents <- withParaProp (pStyle "FootnoteText") $ blocksToOpenXML opts
+ contents <- withParaPropM (pStyleM "Footnote Text") $ blocksToOpenXML opts
$ insertNoteRef bs
modify $ \st -> st{ stListLevel = oldListLevel, stParaProperties = oldParaProperties,
stTextProperties = oldTextProperties }
let newnote = mknode "w:footnote" [("w:id", notenum)] $ contents
modify $ \s -> s{ stFootnotes = newnote : notes }
return [ mknode "w:r" []
- [ mknode "w:rPr" [] (rStyle "FootnoteRef")
+ [ mknode "w:rPr" [] footnoteStyle
, mknode "w:footnoteReference" [("w:id", notenum)] () ] ]
-- internal link:
inlineToOpenXML opts (Link txt ('#':xs,_)) = do
- contents <- withTextProp (rStyle "Link") $ inlinesToOpenXML opts txt
+ contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt
return [ mknode "w:hyperlink" [("w:anchor",xs)] contents ]
-- external link:
inlineToOpenXML opts (Link txt (src,_)) = do
- contents <- withTextProp (rStyle "Link") $ inlinesToOpenXML opts txt
+ contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt
extlinks <- gets stExternalLinks
id' <- case M.lookup src extlinks of
Just i -> return i
@@ -1088,7 +1196,7 @@ defaultFootnotes = [ mknode "w:footnote"
[ mknode "w:p" [] $
[ mknode "w:r" [] $
[ mknode "w:continuationSeparator" [] ()]]]]
-
+
parseXml :: Archive -> Archive -> String -> IO Element
parseXml refArchive distArchive relpath =
case ((findEntryByPath relpath refArchive `mplus`
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 63c3b5501..29ea44e02 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -495,6 +495,9 @@ writeEPUB opts doc@(Pandoc meta _) = do
[] -> "UNTITLED"
(x:_) -> titleText x
x -> stringify x
+
+ let tocTitle = fromMaybe plainTitle $
+ metaValueToString <$> lookupMeta "toc-title" meta
let uuid = case epubIdentifier metadata of
(x:_) -> identifierText x -- use first identifier as UUID
[] -> error "epubIdentifier is null" -- shouldn't happen
@@ -539,7 +542,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
map chapterRefNode chapterEntries)
, unode "guide" $
[ unode "reference" !
- [("type","toc"),("title",plainTitle),
+ [("type","toc"),("title", tocTitle),
("href","nav.xhtml")] $ ()
] ++
[ unode "reference" !
@@ -620,7 +623,7 @@ writeEPUB opts doc@(Pandoc meta _) = do
let navBlocks = [RawBlock (Format "html") $ ppElement $
unode navtag ! ([("epub:type","toc") | epub3] ++
[("id","toc")]) $
- [ unode "h1" ! [("id","toc-title")] $ plainTitle
+ [ unode "h1" ! [("id","toc-title")] $ tocTitle
, unode "ol" ! [("class","toc")] $ evalState (mapM (navPointNode navXhtmlFormatter) secs) 1]]
let landmarks = if epub3
then [RawBlock (Format "html") $ ppElement $
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index ef00ea036..53dc931cc 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -446,19 +446,25 @@ blockToHtml opts (Para lst) = do
contents <- inlineListToHtml opts lst
return $ H.p contents
blockToHtml opts (Div attr@(_,classes,_) bs) = do
- contents <- blockListToHtml opts bs
+ let speakerNotes = "notes" `elem` classes
+ -- we don't want incremental output inside speaker notes, see #1394
+ let opts' = if speakerNotes then opts{ writerIncremental = False } else opts
+ contents <- blockListToHtml opts' bs
let contents' = nl opts >> contents >> nl opts
return $
- if "notes" `elem` classes
- then let opts' = opts{ writerIncremental = False } in
- -- we don't want incremental output inside speaker notes
- case writerSlideVariant opts of
+ if speakerNotes
+ then case writerSlideVariant opts of
RevealJsSlides -> addAttrs opts' attr $ H5.aside $ contents'
NoSlides -> addAttrs opts' attr $ H.div $ contents'
_ -> mempty
else addAttrs opts attr $ H.div $ contents'
-blockToHtml _ (RawBlock f str)
+blockToHtml opts (RawBlock f str)
| f == Format "html" = return $ preEscapedString str
+ | f == Format "latex" =
+ case writerHTMLMathMethod opts of
+ MathJax _ -> do modify (\st -> st{ stMath = True })
+ return $ toHtml str
+ _ -> return mempty
| otherwise = return mempty
blockToHtml opts (HorizontalRule) = return $ if writerHtml5 opts then H5.hr else H.hr
blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do
@@ -769,6 +775,8 @@ inlineToHtml opts inline =
case writerHTMLMathMethod opts of
LaTeXMathML _ -> do modify (\st -> st {stMath = True})
return $ toHtml str
+ MathJax _ -> do modify (\st -> st {stMath = True})
+ return $ toHtml str
_ -> return mempty
| f == Format "html" -> return $ preEscapedString str
| otherwise -> return mempty
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 0e5ec5c18..58456e3ab 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -42,6 +42,7 @@ import Data.List ( (\\), isSuffixOf, isInfixOf, stripPrefix,
isPrefixOf, intercalate, intersperse )
import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, ord )
import Data.Maybe ( fromMaybe )
+import Data.Aeson.Types ( (.:), parseMaybe, withObject )
import Control.Applicative ((<|>))
import Control.Monad.State
import Text.Pandoc.Pretty
@@ -102,8 +103,16 @@ pandocToLaTeX options (Pandoc meta blocks) = do
modify $ \s -> s{ stInternalLinks = query isInternalLink blocks' }
let template = writerTemplate options
-- set stBook depending on documentclass
+ let colwidth = if writerWrapText options
+ then Just $ writerColumns options
+ else Nothing
+ metadata <- metaToJSON options
+ (fmap (render colwidth) . blockListToLaTeX)
+ (fmap (render colwidth) . inlineListToLaTeX)
+ meta
let bookClasses = ["memoir","book","report","scrreprt","scrbook"]
- case lookup "documentclass" (writerVariables options) of
+ case lookup "documentclass" (writerVariables options) `mplus`
+ parseMaybe (withObject "object" (.: "documentclass")) metadata of
Just x | x `elem` bookClasses -> modify $ \s -> s{stBook = True}
| otherwise -> return ()
Nothing | any (\x -> "\\documentclass" `isPrefixOf` x &&
@@ -114,13 +123,6 @@ pandocToLaTeX options (Pandoc meta blocks) = do
-- \enquote{...} for smart quotes:
when ("{csquotes}" `isInfixOf` template) $
modify $ \s -> s{stCsquotes = True}
- let colwidth = if writerWrapText options
- then Just $ writerColumns options
- else Nothing
- metadata <- metaToJSON options
- (fmap (render colwidth) . blockListToLaTeX)
- (fmap (render colwidth) . inlineListToLaTeX)
- meta
let (blocks'', lastHeader) = if writerCiteMethod options == Citeproc then
(blocks', [])
else case last blocks' of
@@ -701,7 +703,7 @@ inlineListToLaTeX lst =
("\\\\[" ++ show (length lbs) ++
"\\baselineskip]") : fixBreaks rest
fixBreaks (y:ys) = y : fixBreaks ys
-
+
isQuoted :: Inline -> Bool
isQuoted (Quoted _ _) = True
isQuoted _ = False
@@ -750,10 +752,11 @@ inlineToLaTeX (Cite cits lst) = do
inlineToLaTeX (Code (_,classes,_) str) = do
opts <- gets stOptions
+ inHeading <- gets stInHeading
case () of
- _ | writerListings opts -> listingsCode
+ _ | writerListings opts && not inHeading -> listingsCode
| writerHighlight opts && not (null classes) -> highlightCode
- | otherwise -> rawCode
+ | otherwise -> rawCode
where listingsCode = do
inNote <- gets stInNote
when inNote $ modify $ \s -> s{ stVerbInNote = True }
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index d71f0daf8..dee4d56a4 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -57,12 +57,15 @@ import qualified Data.Text as T
type Notes = [[Block]]
type Refs = [([Inline], Target)]
-data WriterState = WriterState { stNotes :: Notes
- , stRefs :: Refs
- , stIds :: [String]
- , stPlain :: Bool }
+data WriterState = WriterState { stNotes :: Notes
+ , stRefs :: Refs
+ , stRefShortcutable :: Bool
+ , stInList :: Bool
+ , stIds :: [String]
+ , stPlain :: Bool }
instance Default WriterState
- where def = WriterState{ stNotes = [], stRefs = [], stIds = [], stPlain = False }
+ where def = WriterState{ stNotes = [], stRefs = [], stRefShortcutable = True,
+ stInList = False, stIds = [], stPlain = False }
-- | Convert Pandoc to Markdown.
writeMarkdown :: WriterOptions -> Pandoc -> String
@@ -453,7 +456,7 @@ blockToMarkdown opts t@(Table caption aligns widths headers rows) = do
$ Pandoc nullMeta [t]
return $ nst $ tbl $$ blankline $$ caption'' $$ blankline
blockToMarkdown opts (BulletList items) = do
- contents <- mapM (bulletListItemToMarkdown opts) items
+ contents <- inList $ mapM (bulletListItemToMarkdown opts) items
return $ cat contents <> blankline
blockToMarkdown opts (OrderedList (start,sty,delim) items) = do
let start' = if isEnabled Ext_startnum opts then start else 1
@@ -464,13 +467,22 @@ blockToMarkdown opts (OrderedList (start,sty,delim) items) = do
let markers' = map (\m -> if length m < 3
then m ++ replicate (3 - length m) ' '
else m) markers
- contents <- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $
+ contents <- inList $
+ mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $
zip markers' items
return $ cat contents <> blankline
blockToMarkdown opts (DefinitionList items) = do
- contents <- mapM (definitionListItemToMarkdown opts) items
+ contents <- inList $ mapM (definitionListItemToMarkdown opts) items
return $ cat contents <> blankline
+inList :: State WriterState a -> State WriterState a
+inList p = do
+ oldInList <- gets stInList
+ modify $ \st -> st{ stInList = True }
+ res <- p
+ modify $ \st -> st{ stInList = oldInList }
+ return res
+
addMarkdownAttribute :: String -> String
addMarkdownAttribute s =
case span isTagText $ reverse $ parseTags s of
@@ -497,7 +509,12 @@ pipeTable headless aligns rawHeaders rawRows = do
AlignCenter -> ':':replicate w '-' ++ ":"
AlignRight -> replicate (w + 1) '-' ++ ":"
AlignDefault -> replicate (w + 2) '-'
- let header = if headless then empty else torow rawHeaders
+ -- note: pipe tables can't completely lack a
+ -- header; for a headerless table, we need a header of empty cells.
+ -- see jgm/pandoc#1996.
+ let header = if headless
+ then torow (replicate (length aligns) empty)
+ else torow rawHeaders
let border = nowrap $ text "|" <> hcat (intersperse (text "|") $
map toborder $ zip aligns widths) <> text "|"
let body = vcat $ map torow rawRows
@@ -677,12 +694,53 @@ getReference label (src, tit) = do
-- | Convert list of Pandoc inline elements to markdown.
inlineListToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc
-inlineListToMarkdown opts lst =
- mapM (inlineToMarkdown opts) (avoidBadWraps lst) >>= return . cat
- where avoidBadWraps [] = []
- avoidBadWraps (Space:Str (c:cs):xs)
- | c `elem` ("-*+>" :: String) = Str (' ':c:cs) : avoidBadWraps xs
- avoidBadWraps (x:xs) = x : avoidBadWraps xs
+inlineListToMarkdown opts lst = do
+ inlist <- gets stInList
+ go (if inlist then avoidBadWrapsInList lst else lst)
+ where go [] = return empty
+ go (i:is) = case i of
+ (Link _ _) -> case is of
+ -- If a link is followed by another link or '[' we don't shortcut
+ (Link _ _):_ -> unshortcutable
+ Space:(Link _ _):_ -> unshortcutable
+ Space:(Str('[':_)):_ -> unshortcutable
+ Space:(RawInline _ ('[':_)):_ -> unshortcutable
+ Space:(Cite _ _):_ -> unshortcutable
+ (Cite _ _):_ -> unshortcutable
+ Str ('[':_):_ -> unshortcutable
+ (RawInline _ ('[':_)):_ -> unshortcutable
+ (RawInline _ (' ':'[':_)):_ -> unshortcutable
+ _ -> shortcutable
+ _ -> shortcutable
+ where shortcutable = liftM2 (<>) (inlineToMarkdown opts i) (go is)
+ unshortcutable = do
+ iMark <- withState (\s -> s { stRefShortcutable = False })
+ (inlineToMarkdown opts i)
+ modify (\s -> s {stRefShortcutable = True })
+ fmap (iMark <>) (go is)
+
+avoidBadWrapsInList :: [Inline] -> [Inline]
+avoidBadWrapsInList [] = []
+avoidBadWrapsInList (Space:Str ('>':cs):xs) =
+ Str (' ':'>':cs) : avoidBadWrapsInList xs
+avoidBadWrapsInList (Space:Str [c]:[])
+ | c `elem` ['-','*','+'] = Str [' ', c] : []
+avoidBadWrapsInList (Space:Str [c]:Space:xs)
+ | c `elem` ['-','*','+'] = Str [' ', c] : Space : avoidBadWrapsInList xs
+avoidBadWrapsInList (Space:Str cs:Space:xs)
+ | isOrderedListMarker cs = Str (' ':cs) : Space : avoidBadWrapsInList xs
+avoidBadWrapsInList (Space:Str cs:[])
+ | isOrderedListMarker cs = Str (' ':cs) : []
+avoidBadWrapsInList (x:xs) = x : avoidBadWrapsInList xs
+
+isOrderedListMarker :: String -> Bool
+isOrderedListMarker xs = (last xs `elem` ['.',')']) &&
+ isRight (runParserT (anyOrderedListMarker >> eof)
+ defaultParserState "" xs)
+
+isRight :: Either a b -> Bool
+isRight (Right _) = True
+isRight (Left _) = False
escapeSpaces :: Inline -> Inline
escapeSpaces (Str s) = Str $ substitute " " "\\ " s
@@ -692,8 +750,10 @@ escapeSpaces x = x
-- | Convert Pandoc inline element to markdown.
inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc
inlineToMarkdown opts (Span attrs ils) = do
+ plain <- gets stPlain
contents <- inlineListToMarkdown opts ils
- return $ if isEnabled Ext_raw_html opts
+ return $ if not plain &&
+ (isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts)
then tagWithAttrs "span" attrs <> contents <> text "</span>"
else contents
inlineToMarkdown opts (Emph lst) = do
@@ -726,13 +786,14 @@ inlineToMarkdown opts (Subscript lst) = do
else "<sub>" <> contents <> "</sub>"
inlineToMarkdown opts (SmallCaps lst) = do
plain <- gets stPlain
- if plain
- then inlineListToMarkdown opts $ capitalize lst
- else do
+ if not plain &&
+ (isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts)
+ then do
contents <- inlineListToMarkdown opts lst
return $ tagWithAttrs "span"
- ("",[],[("style","font-variant:small-caps;")])
+ ("",[],[("style","font-variant:small-caps;")])
<> contents <> text "</span>"
+ else inlineListToMarkdown opts $ capitalize lst
inlineToMarkdown opts (Quoted SingleQuote lst) = do
contents <- inlineListToMarkdown opts lst
return $ "‘" <> contents <> "’"
@@ -838,6 +899,9 @@ inlineToMarkdown opts (Link txt (src, tit)) = do
[Str s] | escapeURI s == srcSuffix -> True
_ -> False
let useRefLinks = writerReferenceLinks opts && not useAuto
+ shortcutable <- gets stRefShortcutable
+ let useShortcutRefLinks = shortcutable &&
+ isEnabled Ext_shortcut_reference_links opts
ref <- if useRefLinks then getReference txt (src, tit) else return []
reftext <- inlineListToMarkdown opts ref
return $ if useAuto
@@ -847,7 +911,9 @@ inlineToMarkdown opts (Link txt (src, tit)) = do
else if useRefLinks
then let first = "[" <> linktext <> "]"
second = if txt == ref
- then "[]"
+ then if useShortcutRefLinks
+ then ""
+ else "[]"
else "[" <> reftext <> "]"
in first <> second
else if plain
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index 2a4129512..81bbdaf3f 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -127,7 +127,7 @@ writeODT opts doc@(Pandoc meta _) = do
return $ fromArchive archive''
transformPicMath :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline
-transformPicMath opts entriesRef (Image lab (src,_)) = do
+transformPicMath opts entriesRef (Image lab (src,t)) = do
res <- fetchItem' (writerMediaBag opts) (writerSourceURL opts) src
case res of
Left (_ :: E.SomeException) -> do
@@ -145,7 +145,9 @@ transformPicMath opts entriesRef (Image lab (src,_)) = do
epochtime <- floor `fmap` getPOSIXTime
let entry = toEntry newsrc epochtime $ toLazy img
modifyIORef entriesRef (entry:)
- return $ Image lab (newsrc, tit')
+ let fig | "fig:" `isPrefixOf` t = "fig:"
+ | otherwise = ""
+ return $ Image lab (newsrc, fig++tit')
transformPicMath _ entriesRef (Math t math) = do
entries <- readIORef entriesRef
let dt = if t == InlineMath then DisplayInline else DisplayBlock
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index 865b7fb35..aee656413 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -288,6 +288,8 @@ blockToOpenDocument o bs
| Plain b <- bs = if null b
then return empty
else inParagraphTags =<< inlinesToOpenDocument o b
+ | Para [Image c (s,'f':'i':'g':':':t)] <- bs
+ = figure c s t
| Para b <- bs = if null b
then return empty
else inParagraphTags =<< inlinesToOpenDocument o b
@@ -334,7 +336,7 @@ blockToOpenDocument o bs
mapM_ addParaStyle . newPara $ paraHStyles ++ paraStyles
captionDoc <- if null c
then return empty
- else withParagraphStyle o "Caption" [Para c]
+ else withParagraphStyle o "TableCaption" [Para c]
th <- if all null h
then return empty
else colHeadsToOpenDocument o name (map fst paraHStyles) h
@@ -342,6 +344,12 @@ blockToOpenDocument o bs
return $ inTags True "table:table" [ ("table:name" , name)
, ("table:style-name", name)
] (vcat columns $$ th $$ vcat tr) $$ captionDoc
+ figure caption source title | null caption =
+ withParagraphStyle o "Figure" [Para [Image caption (source,title)]]
+ | otherwise = do
+ imageDoc <- withParagraphStyle o "FigureWithCaption" [Para [Image caption (source,title)]]
+ captionDoc <- withParagraphStyle o "FigureCaption" [Para caption]
+ return $ imageDoc $$ captionDoc
colHeadsToOpenDocument :: WriterOptions -> String -> [String] -> [[Block]] -> State WriterState Doc
colHeadsToOpenDocument o tn ns hs =
@@ -553,4 +561,3 @@ textStyleAttr s
,("style:font-name-asian" ,"Courier New")
,("style:font-name-complex" ,"Courier New")]
| otherwise = []
-
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index 717a47000..2dd899680 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -48,11 +48,13 @@ import Data.Char (isSpace, toLower)
type Refs = [([Inline], Target)]
data WriterState =
- WriterState { stNotes :: [[Block]]
- , stLinks :: Refs
- , stImages :: [([Inline], (String, String, Maybe String))]
- , stHasMath :: Bool
- , stOptions :: WriterOptions
+ WriterState { stNotes :: [[Block]]
+ , stLinks :: Refs
+ , stImages :: [([Inline], (String, String, Maybe String))]
+ , stHasMath :: Bool
+ , stHasRawTeX :: Bool
+ , stOptions :: WriterOptions
+ , stTopLevel :: Bool
}
-- | Convert Pandoc to RST.
@@ -60,7 +62,8 @@ writeRST :: WriterOptions -> Pandoc -> String
writeRST opts document =
let st = WriterState { stNotes = [], stLinks = [],
stImages = [], stHasMath = False,
- stOptions = opts }
+ stHasRawTeX = False, stOptions = opts,
+ stTopLevel = True}
in evalState (pandocToRST document) st
-- | Return RST representation of document.
@@ -78,23 +81,32 @@ pandocToRST (Pandoc meta blocks) = do
(fmap (render colwidth) . blockListToRST)
(fmap (trimr . render colwidth) . inlineListToRST)
$ deleteMeta "title" $ deleteMeta "subtitle" meta
- body <- blockListToRST blocks
+ body <- blockListToRST' True $ normalizeHeadings 1 blocks
notes <- liftM (reverse . stNotes) get >>= notesToRST
-- note that the notes may contain refs, so we do them first
refs <- liftM (reverse . stLinks) get >>= refsToRST
pics <- liftM (reverse . stImages) get >>= pictRefsToRST
hasMath <- liftM stHasMath get
+ rawTeX <- liftM stHasRawTeX get
let main = render colwidth $ foldl ($+$) empty $ [body, notes, refs, pics]
let context = defField "body" main
$ defField "toc" (writerTableOfContents opts)
- $ defField "toc-depth" (writerTOCDepth opts)
+ $ defField "toc-depth" (show $ writerTOCDepth opts)
$ defField "math" hasMath
$ defField "title" (render Nothing title :: String)
$ defField "math" hasMath
+ $ defField "rawtex" rawTeX
$ metadata
if writerStandalone opts
then return $ renderTemplate' (writerTemplate opts) context
else return main
+ where
+ normalizeHeadings lev (Header l a i:bs) = Header lev a i:normalizeHeadings (lev+1) cont ++ normalizeHeadings lev bs'
+ where (cont,bs') = break (headerLtEq l) bs
+ headerLtEq level (Header l' _ _) = l' <= level
+ headerLtEq _ _ = False
+ normalizeHeadings lev (b:bs) = b:normalizeHeadings lev bs
+ normalizeHeadings _ [] = []
-- | Return RST representation of reference key table.
refsToRST :: Refs -> State WriterState Doc
@@ -188,11 +200,21 @@ blockToRST (RawBlock f@(Format f') str)
(nest 3 $ text str) $$ blankline
blockToRST HorizontalRule =
return $ blankline $$ "--------------" $$ blankline
-blockToRST (Header level _ inlines) = do
+blockToRST (Header level (name,classes,_) inlines) = do
contents <- inlineListToRST inlines
- let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1)
- let border = text $ replicate (offset contents) headerChar
- return $ nowrap $ contents $$ border $$ blankline
+ isTopLevel <- gets stTopLevel
+ if isTopLevel
+ then do
+ let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1)
+ let border = text $ replicate (offset contents) headerChar
+ return $ nowrap $ contents $$ border $$ blankline
+ else do
+ let rub = "rubric:: " <> contents
+ let name' | null name = empty
+ | otherwise = ":name: " <> text name
+ let cls | null classes = empty
+ | otherwise = ":class: " <> text (unwords classes)
+ return $ nowrap $ hang 3 ".. " (rub $$ name' $$ cls) $$ blankline
blockToRST (CodeBlock (_,classes,kvs) str) = do
opts <- stOptions <$> get
let tabstop = writerTabStop opts
@@ -294,9 +316,19 @@ definitionListItemToRST (label, defs) = do
return $ label' $$ nest tabstop (nestle contents <> cr)
-- | Convert list of Pandoc block elements to RST.
+blockListToRST' :: Bool
+ -> [Block] -- ^ List of block elements
+ -> State WriterState Doc
+blockListToRST' topLevel blocks = do
+ tl <- gets stTopLevel
+ modify (\s->s{stTopLevel=topLevel})
+ res <- vcat `fmap` mapM blockToRST blocks
+ modify (\s->s{stTopLevel=tl})
+ return res
+
blockListToRST :: [Block] -- ^ List of block elements
-> State WriterState Doc
-blockListToRST blocks = mapM blockToRST blocks >>= return . vcat
+blockListToRST = blockListToRST' False
-- | Convert list of Pandoc inline elements to RST.
inlineListToRST :: [Inline] -> State WriterState Doc
@@ -392,6 +424,9 @@ inlineToRST (Math t str) = do
else blankline $$ (".. math:: " <> text str) $$ blankline
inlineToRST (RawInline f x)
| f == "rst" = return $ text x
+ | f == "latex" || f == "tex" = do
+ modify $ \st -> st{ stHasRawTeX = True }
+ return $ ":raw-latex:`" <> text x <> "`"
| otherwise = return empty
inlineToRST (LineBreak) = return cr -- there's no line break in RST (see Para)
inlineToRST Space = return space