diff options
Diffstat (limited to 'src/Text')
39 files changed, 1956 insertions, 1141 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index d2bb85699..dd361f8d7 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -37,10 +37,12 @@ inline links: > module Main where > import Text.Pandoc +> import Text.Pandoc.Error (handleError) > > markdownToRST :: String -> String -> markdownToRST = -> (writeRST def {writerReferenceLinks = True}) . readMarkdown def +> markdownToRST = handleError . +> writeRST def {writerReferenceLinks = True} . +> readMarkdown def > > main = getContents >>= putStrLn . markdownToRST @@ -66,6 +68,7 @@ module Text.Pandoc , mkStringReader , readDocx , readMarkdown + , readCommonMark , readMediaWiki , readRST , readOrg @@ -109,6 +112,7 @@ module Text.Pandoc , writeOrg , writeAsciiDoc , writeHaddock + , writeCommonMark , writeCustom -- * Rendering templates and default templates , module Text.Pandoc.Templates @@ -124,6 +128,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.JSON import Text.Pandoc.Readers.Markdown +import Text.Pandoc.Readers.CommonMark import Text.Pandoc.Readers.MediaWiki import Text.Pandoc.Readers.RST import Text.Pandoc.Readers.Org @@ -161,11 +166,13 @@ import Text.Pandoc.Writers.Textile import Text.Pandoc.Writers.Org import Text.Pandoc.Writers.AsciiDoc import Text.Pandoc.Writers.Haddock +import Text.Pandoc.Writers.CommonMark import Text.Pandoc.Writers.Custom import Text.Pandoc.Templates import Text.Pandoc.Options -import Text.Pandoc.Shared (safeRead, warn) +import Text.Pandoc.Shared (safeRead, warn, mapLeft) import Text.Pandoc.MediaBag (MediaBag) +import Text.Pandoc.Error import Data.Aeson import qualified Data.ByteString.Lazy as BL import Data.List (intercalate) @@ -201,19 +208,22 @@ parseFormatSpec = parse formatSpec "" '-' -> Set.delete ext _ -> Set.insert ext -data Reader = StringReader (ReaderOptions -> String -> IO Pandoc) - | ByteStringReader (ReaderOptions -> BL.ByteString -> IO (Pandoc, MediaBag)) -mkStringReader :: (ReaderOptions -> String -> Pandoc) -> Reader +data Reader = StringReader (ReaderOptions -> String -> IO (Either PandocError Pandoc)) + | ByteStringReader (ReaderOptions -> BL.ByteString -> IO (Either PandocError (Pandoc,MediaBag))) + +mkStringReader :: (ReaderOptions -> String -> (Either PandocError Pandoc)) -> Reader mkStringReader r = StringReader (\o s -> return $ r o s) -mkStringReaderWithWarnings :: (ReaderOptions -> String -> (Pandoc, [String])) -> Reader +mkStringReaderWithWarnings :: (ReaderOptions -> String -> Either PandocError (Pandoc, [String])) -> Reader mkStringReaderWithWarnings r = StringReader $ \o s -> do - let (doc, warnings) = r o s - mapM_ warn warnings - return doc + case r o s of + Left err -> return $ Left err + Right (doc, warnings) -> do + mapM_ warn warnings + return (Right doc) -mkBSReader :: (ReaderOptions -> BL.ByteString -> (Pandoc, MediaBag)) -> Reader +mkBSReader :: (ReaderOptions -> BL.ByteString -> (Either PandocError (Pandoc, MediaBag))) -> Reader mkBSReader r = ByteStringReader (\o s -> return $ r o s) -- | Association list of formats and readers. @@ -225,6 +235,7 @@ readers = [ ("native" , StringReader $ \_ s -> return $ readNative s) ,("markdown_phpextra" , mkStringReaderWithWarnings readMarkdownWithWarnings) ,("markdown_github" , mkStringReaderWithWarnings readMarkdownWithWarnings) ,("markdown_mmd", mkStringReaderWithWarnings readMarkdownWithWarnings) + ,("commonmark" , mkStringReader readCommonMark) ,("rst" , mkStringReaderWithWarnings readRSTWithWarnings ) ,("mediawiki" , mkStringReader readMediaWiki) ,("docbook" , mkStringReader readDocBook) @@ -296,6 +307,7 @@ writers = [ ,("org" , PureStringWriter writeOrg) ,("asciidoc" , PureStringWriter writeAsciiDoc) ,("haddock" , PureStringWriter writeHaddock) + ,("commonmark" , PureStringWriter writeCommonMark) ] getDefaultExtensions :: String -> Set Extension @@ -357,8 +369,8 @@ class ToJSONFilter a => ToJsonFilter a where toJsonFilter :: a -> IO () toJsonFilter = toJSONFilter -readJSON :: ReaderOptions -> String -> Pandoc -readJSON _ = either error id . eitherDecode' . UTF8.fromStringLazy +readJSON :: ReaderOptions -> String -> Either PandocError Pandoc +readJSON _ = mapLeft ParseFailure . eitherDecode' . UTF8.fromStringLazy writeJSON :: WriterOptions -> Pandoc -> String writeJSON _ = UTF8.toStringLazy . encode diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs new file mode 100644 index 000000000..73d1e8f08 --- /dev/null +++ b/src/Text/Pandoc/Error.hs @@ -0,0 +1,64 @@ +{- +Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} +{- | + Module : Text.Pandoc.Error + Copyright : Copyright (C) 2006-2015 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +This module provides a standard way to deal with possible errors encounted +during parsing. + +-} +module Text.Pandoc.Error (PandocError(..), handleError) where + +import Text.Parsec.Error +import Text.Parsec.Pos hiding (Line) +import Text.Pandoc.Compat.Except + +type Input = String + +data PandocError = -- | Generic parse failure + ParseFailure String + -- | Error thrown by a Parsec parser + | ParsecError Input ParseError + deriving (Show) + + +instance Error PandocError where + strMsg = ParseFailure + + +-- | An unsafe method to handle `PandocError`s. +handleError :: Either PandocError a -> a +handleError (Right r) = r +handleError (Left err) = + case err of + ParseFailure string -> error string + ParsecError input err' -> + let errPos = errorPos err' + errLine = sourceLine errPos + errColumn = sourceColumn errPos + theline = (lines input ++ [""]) !! (errLine - 1) + in error $ "\nError at " ++ show err' ++ "\n" ++ + theline ++ "\n" ++ replicate (errColumn - 1) ' ' ++ + "^" + diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index 68b34dcf3..8f0a991ba 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -1,4 +1,5 @@ {-# LANGUAGE OverloadedStrings, ScopedTypeVariables #-} +{-# OPTIONS_GHC -fno-warn-type-defaults #-} {- Copyright (C) 2011-2014 John MacFarlane <jgm@berkeley.edu> @@ -38,8 +39,11 @@ import Control.Monad import Data.Bits import Data.Binary import Data.Binary.Get -import Text.Pandoc.Shared (safeRead) +import Text.Pandoc.Shared (safeRead, hush) import qualified Data.Map as M +import Text.Pandoc.Compat.Except +import Control.Monad.Trans +import Data.Maybe (fromMaybe) -- quick and dirty functions to get image sizes -- algorithms borrowed from wwwis.pl @@ -64,7 +68,7 @@ imageType img = case B.take 4 img of "%!PS" | (B.take 4 $ B.drop 1 $ B.dropWhile (/=' ') img) == "EPSF" -> return Eps - _ -> fail "Unknown image type" + _ -> (hush . Left) "Unknown image type" imageSize :: ByteString -> Maybe ImageSize imageSize img = do @@ -114,7 +118,7 @@ pngSize img = do ([w1,w2,w3,w4,h1,h2,h3,h4] :: [Integer]) -> return ((shift w1 24) + (shift w2 16) + (shift w3 8) + w4, (shift h1 24) + (shift h2 16) + (shift h3 8) + h4) - _ -> fail "PNG parse error" + _ -> (hush . Left) "PNG parse error" let (dpix, dpiy) = findpHYs rest'' return $ ImageSize { pxX = x, pxY = y, dpiX = dpix, dpiY = dpiy } @@ -143,7 +147,7 @@ gifSize img = do dpiX = 72, dpiY = 72 } - _ -> fail "GIF parse error" + _ -> (hush . Left) "GIF parse error" jpegSize :: ByteString -> Maybe ImageSize jpegSize img = do @@ -174,36 +178,37 @@ findJfifSize bs = do Just (c,bs'') | c >= '\xc0' && c <= '\xc3' -> do case map fromIntegral $ unpack $ B.take 4 $ B.drop 3 bs'' of [h1,h2,w1,w2] -> return (shift w1 8 + w2, shift h1 8 + h2) - _ -> fail "JPEG parse error" + _ -> (hush . Left) "JPEG parse error" Just (_,bs'') -> do case map fromIntegral $ unpack $ B.take 2 bs'' of [c1,c2] -> do let len = shift c1 8 + c2 -- skip variables findJfifSize $ B.drop len bs'' - _ -> fail "JPEG parse error" - Nothing -> fail "Did not find length record" + _ -> (hush . Left) "JPEG parse error" + Nothing -> (hush . Left) "Did not find length record" exifSize :: ByteString -> Maybe ImageSize -exifSize bs = runGet (Just <$> exifHeader bl) bl +exifSize bs = hush . runGet header $ bl where bl = BL.fromChunks [bs] + header = runExceptT $ exifHeader bl -- NOTE: It would be nicer to do -- runGet ((Just <$> exifHeader) <|> return Nothing) -- which would prevent pandoc from raising an error when an exif header can't -- be parsed. But we only get an Alternative instance for Get in binary 0.6, -- and binary 0.5 ships with ghc 7.6. -exifHeader :: BL.ByteString -> Get ImageSize +exifHeader :: BL.ByteString -> ExceptT String Get ImageSize exifHeader hdr = do - _app1DataSize <- getWord16be - exifHdr <- getWord32be - unless (exifHdr == 0x45786966) $ fail "Did not find exif header" - zeros <- getWord16be - unless (zeros == 0) $ fail "Expected zeros after exif header" + _app1DataSize <- lift getWord16be + exifHdr <- lift getWord32be + unless (exifHdr == 0x45786966) $ throwError "Did not find exif header" + zeros <- lift getWord16be + unless (zeros == 0) $ throwError "Expected zeros after exif header" -- beginning of tiff header -- we read whole thing to use -- in getting data from offsets: let tiffHeader = BL.drop 8 hdr - byteAlign <- getWord16be + byteAlign <- lift getWord16be let bigEndian = byteAlign == 0x4d4d let (getWord16, getWord32, getWord64) = if bigEndian @@ -213,17 +218,17 @@ exifHeader hdr = do num <- getWord32 den <- getWord32 return $ fromIntegral num / fromIntegral den - tagmark <- getWord16 - unless (tagmark == 0x002a) $ fail "Failed alignment sanity check" - ifdOffset <- getWord32 - skip (fromIntegral ifdOffset - 8) -- skip to IDF - numentries <- getWord16 - let ifdEntry = do - tag <- getWord16 >>= \t -> - maybe (return UnknownTagType) return - (M.lookup t tagTypeTable) - dataFormat <- getWord16 - numComponents <- getWord32 + tagmark <- lift getWord16 + unless (tagmark == 0x002a) $ throwError "Failed alignment sanity check" + ifdOffset <- lift getWord32 + lift $ skip (fromIntegral ifdOffset - 8) -- skip to IDF + numentries <- lift getWord16 + let ifdEntry :: ExceptT String Get (TagType, DataFormat) + ifdEntry = do + tag <- fromMaybe UnknownTagType . flip M.lookup tagTypeTable + <$> lift getWord16 + dataFormat <- lift getWord16 + numComponents <- lift getWord32 (fmt, bytesPerComponent) <- case dataFormat of 1 -> return (UnsignedByte . runGet getWord8, 1) @@ -238,9 +243,10 @@ exifHeader hdr = do 10 -> return (SignedRational . runGet getRational, 8) 11 -> return (SingleFloat . runGet getWord32 {- TODO -}, 4) 12 -> return (DoubleFloat . runGet getWord64 {- TODO -}, 8) - _ -> fail $ "Unknown data format " ++ show dataFormat + _ -> throwError $ "Unknown data format " ++ show dataFormat let totalBytes = fromIntegral $ numComponents * bytesPerComponent - payload <- if totalBytes <= 4 -- data is right here + payload <- lift $ + if totalBytes <= 4 -- data is right here then fmt <$> (getLazyByteString (fromIntegral totalBytes) <* skip (4 - totalBytes)) @@ -252,9 +258,9 @@ exifHeader hdr = do entries <- sequence $ replicate (fromIntegral numentries) ifdEntry subentries <- case lookup ExifOffset entries of Just (UnsignedLong offset) -> do - pos <- bytesRead - skip (fromIntegral offset - (fromIntegral pos - 8)) - numsubentries <- getWord16 + pos <- lift bytesRead + lift $ skip (fromIntegral offset - (fromIntegral pos - 8)) + numsubentries <- lift getWord16 sequence $ replicate (fromIntegral numsubentries) ifdEntry _ -> return [] diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs index a55d5417e..1246cdc8f 100644 --- a/src/Text/Pandoc/MediaBag.hs +++ b/src/Text/Pandoc/MediaBag.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE GeneralizedNewtypeDeriving, DeriveDataTypeable #-} {- Copyright (C) 2014 John MacFarlane <jgm@berkeley.edu> @@ -46,13 +46,15 @@ import Text.Pandoc.MIME (MimeType, getMimeTypeDef) import qualified Text.Pandoc.UTF8 as UTF8 import Data.Maybe (fromMaybe) import System.IO (stderr) +import Data.Data (Data) +import Data.Typeable (Typeable) -- | A container for a collection of binary resources, with names and -- mime types. Note that a 'MediaBag' is a Monoid, so 'mempty' -- can be used for an empty 'MediaBag', and '<>' can be used to append -- two 'MediaBag's. newtype MediaBag = MediaBag (M.Map [String] (MimeType, BL.ByteString)) - deriving (Monoid) + deriving (Monoid, Data, Typeable) instance Show MediaBag where show bag = "MediaBag " ++ show (mediaDirectory bag) diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 24e31fbb6..1776d14e5 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE DeriveDataTypeable #-} {- Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu> @@ -51,6 +52,8 @@ import Data.Default import Text.Pandoc.Highlighting (Style, pygments) import Text.Pandoc.MediaBag (MediaBag) import Data.Monoid +import Data.Data (Data) +import Data.Typeable (Typeable) -- | Individually selectable syntax extensions. data Extension = @@ -74,7 +77,7 @@ data Extension = | Ext_latex_macros -- ^ Parse LaTeX macro definitions (for math only) | Ext_fenced_code_blocks -- ^ Parse fenced code blocks | Ext_fenced_code_attributes -- ^ Allow attributes on fenced code blocks - | Ext_backtick_code_blocks -- ^ Github style ``` code blocks + | Ext_backtick_code_blocks -- ^ GitHub style ``` code blocks | Ext_inline_code_attributes -- ^ Allow attributes on inline code | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks | Ext_native_divs -- ^ Use Div blocks for contents of <div> tags @@ -109,7 +112,8 @@ data Extension = | Ext_implicit_header_references -- ^ Implicit reference links for headers | Ext_line_blocks -- ^ RST style line blocks | Ext_epub_html_exts -- ^ Recognise the EPUB extended version of HTML - deriving (Show, Read, Enum, Eq, Ord, Bounded) + | Ext_shortcut_reference_links -- ^ Shortcut reference links + deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable) pandocExtensions :: Set Extension pandocExtensions = Set.fromList @@ -151,6 +155,7 @@ pandocExtensions = Set.fromList , Ext_header_attributes , Ext_implicit_header_references , Ext_line_blocks + , Ext_shortcut_reference_links ] phpMarkdownExtraExtensions :: Set Extension @@ -164,6 +169,7 @@ phpMarkdownExtraExtensions = Set.fromList , Ext_intraword_underscores , Ext_header_attributes , Ext_abbreviations + , Ext_shortcut_reference_links ] githubMarkdownExtensions :: Set Extension @@ -180,6 +186,7 @@ githubMarkdownExtensions = Set.fromList , Ext_strikeout , Ext_hard_line_breaks , Ext_lists_without_preceding_blankline + , Ext_shortcut_reference_links ] multimarkdownExtensions :: Set Extension @@ -202,7 +209,9 @@ multimarkdownExtensions = Set.fromList strictExtensions :: Set Extension strictExtensions = Set.fromList - [ Ext_raw_html ] + [ Ext_raw_html + , Ext_shortcut_reference_links + ] data ReaderOptions = ReaderOptions{ readerExtensions :: Set Extension -- ^ Syntax extensions @@ -220,7 +229,7 @@ data ReaderOptions = ReaderOptions{ , readerDefaultImageExtension :: String -- ^ Default extension for images , readerTrace :: Bool -- ^ Print debugging info , readerTrackChanges :: TrackChanges -} deriving (Show, Read) +} deriving (Show, Read, Data, Typeable) instance Default ReaderOptions where def = ReaderOptions{ @@ -242,7 +251,7 @@ instance Default ReaderOptions -- Writer options -- -data EPUBVersion = EPUB2 | EPUB3 deriving (Eq, Show, Read) +data EPUBVersion = EPUB2 | EPUB3 deriving (Eq, Show, Read, Data, Typeable) data HTMLMathMethod = PlainMath | LaTeXMathML (Maybe String) -- url of LaTeXMathML.js @@ -252,18 +261,18 @@ data HTMLMathMethod = PlainMath | MathML (Maybe String) -- url of MathMLinHTML.js | MathJax String -- url of MathJax.js | KaTeX String String -- url of stylesheet and katex.js - deriving (Show, Read, Eq) + deriving (Show, Read, Eq, Data, Typeable) data CiteMethod = Citeproc -- use citeproc to render them | Natbib -- output natbib cite commands | Biblatex -- output biblatex cite commands - deriving (Show, Read, Eq) + deriving (Show, Read, Eq, Data, Typeable) -- | Methods for obfuscating email addresses in HTML. data ObfuscationMethod = NoObfuscation | ReferenceObfuscation | JavascriptObfuscation - deriving (Show, Read, Eq) + deriving (Show, Read, Eq, Data, Typeable) -- | Varieties of HTML slide shows. data HTMLSlideVariant = S5Slides @@ -272,13 +281,13 @@ data HTMLSlideVariant = S5Slides | DZSlides | RevealJsSlides | NoSlides - deriving (Show, Read, Eq) + deriving (Show, Read, Eq, Data, Typeable) -- | Options for accepting or rejecting MS Word track-changes. data TrackChanges = AcceptChanges | RejectChanges | AllChanges - deriving (Show, Read, Eq) + deriving (Show, Read, Eq, Data, Typeable) -- | Options for writers data WriterOptions = WriterOptions @@ -324,7 +333,8 @@ data WriterOptions = WriterOptions , writerReferenceDocx :: Maybe FilePath -- ^ Path to reference DOCX if specified , writerMediaBag :: MediaBag -- ^ Media collected by docx or epub reader , writerVerbose :: Bool -- ^ Verbose debugging output - } deriving Show + , writerLaTeXArgs :: [String] -- ^ Flags to pass to latex-engine + } deriving (Show, Data, Typeable) instance Default WriterOptions where def = WriterOptions { writerStandalone = False @@ -368,6 +378,7 @@ instance Default WriterOptions where , writerReferenceDocx = Nothing , writerMediaBag = mempty , writerVerbose = False + , writerLaTeXArgs = [] } -- | Returns True if the given extension is enabled. diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index ea6699ac4..2d602a0df 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -71,7 +71,8 @@ makePDF :: String -- ^ pdf creator (pdflatex, lualatex, xelatex) makePDF program writer opts doc = withTempDir "tex2pdf." $ \tmpdir -> do doc' <- handleImages opts tmpdir doc let source = writer opts doc' - tex2pdf' (writerVerbose opts) tmpdir program source + args = writerLaTeXArgs opts + tex2pdf' (writerVerbose opts) args tmpdir program source handleImages :: WriterOptions -> FilePath -- ^ temp dir to store images @@ -107,8 +108,7 @@ convertImages tmpdir (Image ils (src, tit)) = do img <- convertImage tmpdir src newPath <- case img of - Left e -> src <$ - warn ("Unable to convert image `" ++ src ++ "':\n" ++ e) + Left e -> src <$ warn e Right fp -> return fp return (Image ils (newPath, tit)) convertImages _ x = return x @@ -122,7 +122,8 @@ convertImage tmpdir fname = Just "application/pdf" -> doNothing _ -> JP.readImage fname >>= \res -> case res of - Left msg -> return $ Left msg + Left _ -> return $ Left $ "Unable to convert `" ++ + fname ++ "' for use with pdflatex." Right img -> E.catch (Right fileOut <$ JP.savePngImage fileOut img) $ \(e :: E.SomeException) -> return (Left (show e)) @@ -132,15 +133,16 @@ convertImage tmpdir fname = doNothing = return (Right fname) tex2pdf' :: Bool -- ^ Verbose output + -> [String] -- ^ Arguments to the latex-engine -> FilePath -- ^ temp directory for output -> String -- ^ tex program -> String -- ^ tex source -> IO (Either ByteString ByteString) -tex2pdf' verbose tmpDir program source = do +tex2pdf' verbose args tmpDir program source = do let numruns = if "\\tableofcontents" `isInfixOf` source then 3 -- to get page numbers else 2 -- 1 run won't give you PDF bookmarks - (exit, log', mbPdf) <- runTeXProgram verbose program 1 numruns tmpDir source + (exit, log', mbPdf) <- runTeXProgram verbose program args 1 numruns tmpDir source case (exit, mbPdf) of (ExitFailure _, _) -> do let logmsg = extractMsg log' @@ -173,9 +175,9 @@ extractMsg log' = do -- Run a TeX program on an input bytestring and return (exit code, -- contents of stdout, contents of produced PDF if any). Rerun -- a fixed number of times to resolve references. -runTeXProgram :: Bool -> String -> Int -> Int -> FilePath -> String +runTeXProgram :: Bool -> String -> [String] -> Int -> Int -> FilePath -> String -> IO (ExitCode, ByteString, Maybe ByteString) -runTeXProgram verbose program runNumber numRuns tmpDir source = do +runTeXProgram verbose program args runNumber numRuns tmpDir source = do let file = tmpDir </> "input.tex" exists <- doesFileExist file unless exists $ UTF8.writeFile file source @@ -188,7 +190,7 @@ runTeXProgram verbose program runNumber numRuns tmpDir source = do let file' = file #endif let programArgs = ["-halt-on-error", "-interaction", "nonstopmode", - "-output-directory", tmpDir', file'] + "-output-directory", tmpDir', file'] ++ args env' <- getEnvironment let sep = searchPathSeparator:[] let texinputs = maybe (tmpDir' ++ sep) ((tmpDir' ++ sep) ++) @@ -212,7 +214,7 @@ runTeXProgram verbose program runNumber numRuns tmpDir source = do B.hPutStr stderr err putStr "\n" if runNumber <= numRuns - then runTeXProgram verbose program (runNumber + 1) numRuns tmpDir source + then runTeXProgram verbose program args (runNumber + 1) numRuns tmpDir source else do let pdfFile = replaceDirectory (replaceExtension file ".pdf") tmpDir pdfExists <- doesFileExist pdfFile diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index facf4d3b9..33120e55d 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -65,7 +65,8 @@ module Text.Pandoc.Parsing ( anyLine, widthsFromIndices, gridTableWith, readWith, - readWithWarnings, + returnWarnings, + returnState, readWithM, testStringWith, guardEnabled, @@ -104,11 +105,8 @@ module Text.Pandoc.Parsing ( anyLine, applyMacros', Parser, ParserT, - F(..), - runF, - askF, - asksF, token, + generalize, -- * Re-exports from Text.Pandoc.Parsec Stream, runParser, @@ -163,7 +161,8 @@ module Text.Pandoc.Parsing ( anyLine, setSourceColumn, setSourceLine, newPos, - addWarning + addWarning, + (<+?>) ) where @@ -188,30 +187,16 @@ import Data.Default import qualified Data.Set as Set import Control.Monad.Reader import Control.Monad.Identity -import Control.Applicative ((<$>), (<*>), (*>), (<*), (<$), Applicative) +import Control.Applicative ((<$>), (<*>), (*>), (<*), (<$)) import Data.Monoid import Data.Maybe (catMaybes) +import Text.Pandoc.Error + type Parser t s = Parsec t s type ParserT = ParsecT -newtype F a = F { unF :: Reader ParserState a } deriving (Monad, Applicative, Functor) - -runF :: F a -> ParserState -> a -runF = runReader . unF - -askF :: F ParserState -askF = F ask - -asksF :: (ParserState -> a) -> F a -asksF f = F $ asks f - -instance Monoid a => Monoid (F a) where - mempty = return mempty - mappend = liftM2 mappend - mconcat = liftM mconcat . sequence - -- | Parse any line of text anyLine :: Stream [Char] m Char => ParserT [Char] st m [Char] anyLine = do @@ -863,36 +848,30 @@ readWithM :: (Monad m, Functor m) => ParserT [Char] st m a -- ^ parser -> st -- ^ initial state -> String -- ^ input - -> m a + -> m (Either PandocError a) readWithM parser state input = - handleError <$> (runParserT parser state "source" input) - where - handleError (Left err') = - let errPos = errorPos err' - errLine = sourceLine errPos - errColumn = sourceColumn errPos - theline = (lines input ++ [""]) !! (errLine - 1) - in error $ "\nError at " ++ show err' ++ "\n" ++ - theline ++ "\n" ++ replicate (errColumn - 1) ' ' ++ - "^" - handleError (Right result) = result + mapLeft (ParsecError input) <$> runParserT parser state "source" input + -- | Parse a string with a given parser and state readWith :: Parser [Char] st a -> st -> String - -> a + -> Either PandocError a readWith p t inp = runIdentity $ readWithM p t inp -readWithWarnings :: Parser [Char] ParserState a - -> ParserState - -> String - -> (a, [String]) -readWithWarnings p = readWith $ do +returnWarnings :: (Stream s m c) + => ParserT s ParserState m a + -> ParserT s ParserState m (a, [String]) +returnWarnings p = do doc <- p warnings <- stateWarnings <$> getState return (doc, warnings) +-- | Return the final internal state with the result of a parser +returnState :: (Stream s m c) => ParsecT s st m a -> ParsecT s st m (a, st) +returnState p = (,) <$> p <*> getState + -- | Parse a string with @parser@ (for testing). testStringWith :: (Show a, Stream [Char] Identity Char) => ParserT [Char] ParserState Identity a @@ -914,7 +893,6 @@ data ParserState = ParserState stateNotes :: NoteTable, -- ^ List of notes (raw bodies) stateNotes' :: NoteTable', -- ^ List of notes (parsed bodies) stateMeta :: Meta, -- ^ Document metadata - stateMeta' :: F Meta, -- ^ Document metadata stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used stateHeaders :: M.Map Inlines String, -- ^ List of headers and ids (used for implicit ref links) stateIdentifiers :: [String], -- ^ List of header identifiers used @@ -929,7 +907,8 @@ data ParserState = ParserState stateCaption :: Maybe Inlines, -- ^ Caption in current environment stateInHtmlBlock :: Maybe String, -- ^ Tag type of HTML block being parsed stateMarkdownAttribute :: Bool, -- ^ True if in markdown=1 context - stateWarnings :: [String] -- ^ Warnings generated by the parser + stateWarnings :: [String], -- ^ Warnings generated by the parser + stateInFootnote :: Bool -- ^ True if in a footnote block. } instance Default ParserState where @@ -1011,7 +990,6 @@ defaultParserState = stateNotes = [], stateNotes' = [], stateMeta = nullMeta, - stateMeta' = return nullMeta, stateHeaderTable = [], stateHeaders = M.empty, stateIdentifiers = [], @@ -1024,7 +1002,8 @@ defaultParserState = stateCaption = Nothing, stateInHtmlBlock = Nothing, stateMarkdownAttribute = False, - stateWarnings = []} + stateWarnings = [], + stateInFootnote = False } -- | Succeed only if the extension is enabled. guardEnabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m () @@ -1063,7 +1042,7 @@ data QuoteContext type NoteTable = [(String, String)] -type NoteTable' = [(String, F Blocks)] -- used in markdown reader +type NoteTable' = [(String, Blocks)] -- used in markdown reader newtype Key = Key String deriving (Show, Read, Eq, Ord) @@ -1259,8 +1238,15 @@ applyMacros' target = do else return target -- | Append a warning to the log. -addWarning :: Maybe SourcePos -> String -> Parser [Char] ParserState () +addWarning :: (Stream s m c) => Maybe SourcePos -> String -> ParserT s ParserState m () addWarning mbpos msg = updateState $ \st -> st{ stateWarnings = (msg ++ maybe "" (\pos -> " " ++ show pos) mbpos) : stateWarnings st } + +generalize :: (Monad m) => Parser s st a -> ParserT s st m a +generalize m = mkPT (\ s -> (return $ (return . runIdentity) <$> runIdentity (runParsecT m s))) + +infixr 5 <+?> +(<+?>) :: (Monoid a, Monad m) => ParserT s st m a -> ParserT s st m a -> ParserT s st m a +a <+?> b = a >>= flip fmap (try b <|> return mempty) . (<>) diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs index 2f2656086..9a97dfc21 100644 --- a/src/Text/Pandoc/Pretty.hs +++ b/src/Text/Pandoc/Pretty.hs @@ -250,6 +250,11 @@ renderDoc :: (IsString a, Monoid a) => Doc -> DocState a renderDoc = renderList . toList . unDoc +data IsBlock = IsBlock Int [String] + +-- This would be nicer with a pattern synonym +-- pattern VBlock i s <- mkIsBlock -> Just (IsBlock ..) + renderList :: (IsString a, Monoid a) => [D] -> DocState a renderList [] = return () @@ -323,11 +328,11 @@ renderList (BreakingSpace : xs) = do outp 1 " " renderList xs' -renderList (b1@Block{} : b2@Block{} : xs) = - renderList (mergeBlocks False b1 b2 : xs) +renderList (Block i1 s1 : Block i2 s2 : xs) = + renderList (mergeBlocks False (IsBlock i1 s1) (IsBlock i2 s2) : xs) -renderList (b1@Block{} : BreakingSpace : b2@Block{} : xs) = - renderList (mergeBlocks True b1 b2 : xs) +renderList (Block i1 s1 : BreakingSpace : Block i2 s2 : xs) = + renderList (mergeBlocks True (IsBlock i1 s1) (IsBlock i2 s2) : xs) renderList (Block width lns : xs) = do st <- get @@ -339,15 +344,14 @@ renderList (Block width lns : xs) = do modify $ \s -> s{ prefix = oldPref } renderList xs -mergeBlocks :: Bool -> D -> D -> D -mergeBlocks addSpace (Block w1 lns1) (Block w2 lns2) = +mergeBlocks :: Bool -> IsBlock -> IsBlock -> D +mergeBlocks addSpace (IsBlock w1 lns1) (IsBlock w2 lns2) = Block (w1 + w2 + if addSpace then 1 else 0) $ zipWith (\l1 l2 -> pad w1 l1 ++ l2) (lns1 ++ empties) (map sp lns2 ++ empties) where empties = replicate (abs $ length lns1 - length lns2) "" pad n s = s ++ replicate (n - realLength s) ' ' sp "" = "" sp xs = if addSpace then (' ' : xs) else xs -mergeBlocks _ _ _ = error "mergeBlocks tried on non-Block!" blockToDoc :: Int -> [String] -> Doc blockToDoc _ lns = text $ intercalate "\n" lns diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs new file mode 100644 index 000000000..51a35c8ad --- /dev/null +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -0,0 +1,119 @@ +{- +Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.CommonMark + Copyright : Copyright (C) 2015 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of CommonMark-formatted plain text to 'Pandoc' document. + +CommonMark is a strongly specified variant of Markdown: http://commonmark.org. +-} +module Text.Pandoc.Readers.CommonMark (readCommonMark) +where + +import CMark +import Data.Text (unpack, pack) +import Data.List (groupBy) +import Text.Pandoc.Definition +import Text.Pandoc.Options +import Text.Pandoc.Error + +-- | Parse a CommonMark formatted string into a 'Pandoc' structure. +readCommonMark :: ReaderOptions -> String -> Either PandocError Pandoc +readCommonMark opts = Right . nodeToPandoc . commonmarkToNode opts' . pack + where opts' = if readerSmart opts + then [optNormalize, optSmart] + else [optNormalize] + +nodeToPandoc :: Node -> Pandoc +nodeToPandoc (Node _ DOCUMENT nodes) = + Pandoc nullMeta $ foldr addBlock [] nodes +nodeToPandoc n = -- shouldn't happen + Pandoc nullMeta $ foldr addBlock [] [n] + +addBlocks :: [Node] -> [Block] +addBlocks = foldr addBlock [] + +addBlock :: Node -> [Block] -> [Block] +addBlock (Node _ PARAGRAPH nodes) = + (Para (addInlines nodes) :) +addBlock (Node _ HRULE _) = + (HorizontalRule :) +addBlock (Node _ BLOCK_QUOTE nodes) = + (BlockQuote (addBlocks nodes) :) +addBlock (Node _ (HTML t) _) = + (RawBlock (Format "html") (unpack t) :) +addBlock (Node _ (CODE_BLOCK info t) _) = + (CodeBlock ("", take 1 (words (unpack info)), []) (unpack t) :) +addBlock (Node _ (HEADER lev) nodes) = + (Header lev ("",[],[]) (addInlines nodes) :) +addBlock (Node _ (LIST listAttrs) nodes) = + (constructor (map (setTightness . addBlocks . children) nodes) :) + where constructor = case listType listAttrs of + BULLET_LIST -> BulletList + ORDERED_LIST -> OrderedList + (start, DefaultStyle, delim) + start = listStart listAttrs + setTightness = if listTight listAttrs + then map paraToPlain + else id + paraToPlain (Para xs) = Plain (xs) + paraToPlain x = x + delim = case listDelim listAttrs of + PERIOD_DELIM -> Period + PAREN_DELIM -> OneParen +addBlock (Node _ ITEM _) = id -- handled in LIST +addBlock _ = id + +children :: Node -> [Node] +children (Node _ _ ns) = ns + +addInlines :: [Node] -> [Inline] +addInlines = foldr addInline [] + +addInline :: Node -> [Inline] -> [Inline] +addInline (Node _ (TEXT t) _) = (map toinl clumps ++) + where raw = unpack t + clumps = groupBy samekind raw + samekind ' ' ' ' = True + samekind ' ' _ = False + samekind _ ' ' = False + samekind _ _ = True + toinl (' ':_) = Space + toinl xs = Str xs +addInline (Node _ LINEBREAK _) = (LineBreak :) +addInline (Node _ SOFTBREAK _) = (Space :) +addInline (Node _ (INLINE_HTML t) _) = + (RawInline (Format "html") (unpack t) :) +addInline (Node _ (CODE t) _) = + (Code ("",[],[]) (unpack t) :) +addInline (Node _ EMPH nodes) = + (Emph (addInlines nodes) :) +addInline (Node _ STRONG nodes) = + (Strong (addInlines nodes) :) +addInline (Node _ (LINK url title) nodes) = + (Link (addInlines nodes) (unpack url, unpack title) :) +addInline (Node _ (IMAGE url title) nodes) = + (Image (addInlines nodes) (unpack url, unpack title) :) +addInline _ = id diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 663960a87..98a142840 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -15,6 +15,9 @@ import Control.Applicative ((<$>)) import Data.List (intersperse) import Data.Maybe (fromMaybe) import Text.TeXMath (readMathML, writeTeX) +import Text.Pandoc.Error (PandocError) +import Text.Pandoc.Compat.Except +import Data.Default {- @@ -497,7 +500,7 @@ List of all DocBook tags, with [x] indicating implemented, [x] ?asciidoc-br? - line break from asciidoc docbook output -} -type DB = State DBState +type DB = ExceptT PandocError (State DBState) data DBState = DBState{ dbSectionLevel :: Int , dbQuoteType :: QuoteType @@ -507,16 +510,18 @@ data DBState = DBState{ dbSectionLevel :: Int , dbFigureTitle :: Inlines } deriving Show -readDocBook :: ReaderOptions -> String -> Pandoc -readDocBook _ inp = Pandoc (dbMeta st') (toList $ mconcat bs) - where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp') - DBState{ dbSectionLevel = 0 - , dbQuoteType = DoubleQuote - , dbMeta = mempty - , dbAcceptsMeta = False - , dbBook = False - , dbFigureTitle = mempty - } +instance Default DBState where + def = DBState{ dbSectionLevel = 0 + , dbQuoteType = DoubleQuote + , dbMeta = mempty + , dbAcceptsMeta = False + , dbBook = False + , dbFigureTitle = mempty } + + +readDocBook :: ReaderOptions -> String -> Either PandocError Pandoc +readDocBook _ inp = (\blocks -> Pandoc (dbMeta st') (toList . mconcat $ blocks)) <$> bs + where (bs , st') = flip runState def . runExceptT . mapM parseBlock . normalizeTree . parseXML $ inp' inp' = handleInstructions inp -- We treat <?asciidoc-br?> specially (issue #1236), converting it @@ -863,7 +868,9 @@ parseBlock (Elem e) = parseRow = mapM (parseMixed plain . elContent) . filterChildren isEntry sect n = do isbook <- gets dbBook let n' = if isbook || n == 0 then n + 1 else n - headerText <- case filterChild (named "title") e of + headerText <- case filterChild (named "title") e `mplus` + (filterChild (named "info") e >>= + filterChild (named "title")) of Just t -> getInlines t Nothing -> return mempty modify $ \st -> st{ dbSectionLevel = n } diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index d4680cb7e..67a97ae85 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -96,14 +96,17 @@ import Control.Applicative ((<$>)) import Data.Sequence (ViewL(..), viewl) import qualified Data.Sequence as Seq (null) +import Text.Pandoc.Error +import Text.Pandoc.Compat.Except + readDocx :: ReaderOptions -> B.ByteString - -> (Pandoc, MediaBag) + -> Either PandocError (Pandoc, MediaBag) readDocx opts bytes = case archiveToDocx (toArchive bytes) of - Right docx -> (Pandoc meta blks, mediaBag) where - (meta, blks, mediaBag) = (docxToOutput opts docx) - Left _ -> error $ "couldn't parse docx file" + Right docx -> (\(meta, blks, mediaBag) -> (Pandoc meta blks, mediaBag)) + <$> (docxToOutput opts docx) + Left _ -> Left (ParseFailure "couldn't parse docx file") data DState = DState { docxAnchorMap :: M.Map String String , docxMediaBag :: MediaBag @@ -122,10 +125,10 @@ data DEnv = DEnv { docxOptions :: ReaderOptions instance Default DEnv where def = DEnv def False -type DocxContext = ReaderT DEnv (State DState) +type DocxContext = ExceptT PandocError (ReaderT DEnv (State DState)) -evalDocxContext :: DocxContext a -> DEnv -> DState -> a -evalDocxContext ctx env st = evalState (runReaderT ctx env) st +evalDocxContext :: DocxContext a -> DEnv -> DState -> Either PandocError a +evalDocxContext ctx env st = flip evalState st . flip runReaderT env . runExceptT $ ctx -- This is empty, but we put it in for future-proofing. spansToKeep :: [String] @@ -277,7 +280,13 @@ runToInlines :: Run -> DocxContext Inlines runToInlines (Run rs runElems) | Just (s, _) <- rStyle rs , s `elem` codeStyles = - return $ code $ concatMap runElemToString runElems + let rPr = resolveDependentRunStyle rs + codeString = code $ concatMap runElemToString runElems + in + return $ case rVertAlign rPr of + Just SupScrpt -> superscript codeString + Just SubScrpt -> subscript codeString + _ -> codeString | otherwise = do let ils = concatReduce (map runElemToInlines runElems) return $ (runStyleToTransform $ resolveDependentRunStyle rs) ils @@ -545,7 +554,7 @@ bodyToOutput (Body bps) = do blks', mediaBag) -docxToOutput :: ReaderOptions -> Docx -> (Meta, [Block], MediaBag) +docxToOutput :: ReaderOptions -> Docx -> Either PandocError (Meta, [Block], MediaBag) docxToOutput opts (Docx (Document _ body)) = let dEnv = def { docxOptions = opts} in evalDocxContext (bodyToOutput body) dEnv def diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 671d2acf3..cce80fb48 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -65,6 +65,7 @@ import Text.Pandoc.Compat.Except import Text.TeXMath.Readers.OMML (readOMML) import Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..)) import Text.TeXMath (Exp) +import Text.Pandoc.Readers.Docx.Util import Data.Char (readLitChar, ord, chr, isDigit) data ReaderEnv = ReaderEnv { envNotes :: Notes @@ -108,8 +109,6 @@ mapD f xs = in concatMapM handler xs -type NameSpaces = [(String, String)] - data Docx = Docx Document deriving Show @@ -158,6 +157,7 @@ data ParagraphStyle = ParagraphStyle { pStyle :: [String] , indentation :: Maybe ParIndentation , dropCap :: Bool , pHeading :: Maybe (String, Int) + , pNumInfo :: Maybe (String, String) , pBlockQuote :: Maybe Bool } deriving Show @@ -167,6 +167,7 @@ defaultParagraphStyle = ParagraphStyle { pStyle = [] , indentation = Nothing , dropCap = False , pHeading = Nothing + , pNumInfo = Nothing , pBlockQuote = Nothing } @@ -224,6 +225,7 @@ data RunStyle = RunStyle { isBold :: Maybe Bool data ParStyleData = ParStyleData { headingLev :: Maybe (String, Int) , isBlockQuote :: Maybe Bool + , numInfo :: Maybe (String, String) , psStyle :: Maybe ParStyle} deriving Show @@ -246,10 +248,6 @@ type ChangeId = String type Author = String type ChangeDate = String -attrToNSPair :: Attr -> Maybe (String, String) -attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) -attrToNSPair _ = Nothing - archiveToDocx :: Archive -> Either DocxError Docx archiveToDocx archive = do let notes = archiveToNotes archive @@ -266,7 +264,7 @@ archiveToDocument :: Archive -> D Document archiveToDocument zf = do entry <- maybeToD $ findEntryByPath "word/document.xml" zf docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry - let namespaces = mapMaybe attrToNSPair (elAttribs docElem) + let namespaces = elemToNameSpaces docElem bodyElem <- maybeToD $ findChild (elemName namespaces "w" "body") docElem body <- elemToBody namespaces bodyElem return $ Document namespaces body @@ -285,7 +283,7 @@ archiveToStyles zf = case stylesElem of Nothing -> (M.empty, M.empty) Just styElem -> - let namespaces = mapMaybe attrToNSPair (elAttribs styElem) + let namespaces = elemToNameSpaces styElem in ( M.fromList $ buildBasedOnList namespaces styElem (Nothing :: Maybe CharStyle), @@ -353,10 +351,10 @@ archiveToNotes zf = enElem = findEntryByPath "word/endnotes.xml" zf >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) fn_namespaces = case fnElem of - Just e -> mapMaybe attrToNSPair (elAttribs e) + Just e -> elemToNameSpaces e Nothing -> [] en_namespaces = case enElem of - Just e -> mapMaybe attrToNSPair (elAttribs e) + Just e -> elemToNameSpaces e Nothing -> [] ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces fn = fnElem >>= (elemToNotes ns "footnote") @@ -456,7 +454,7 @@ archiveToNumbering' zf = do Nothing -> Just $ Numbering [] [] [] Just entry -> do numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry - let namespaces = mapMaybe attrToNSPair (elAttribs numberingElem) + let namespaces = elemToNameSpaces numberingElem numElems = findChildren (QName "num" (lookup "w" namespaces) (Just "w")) numberingElem @@ -485,15 +483,6 @@ elemToNotes _ _ _ = Nothing --------------------------------------------- --------------------------------------------- -elemName :: NameSpaces -> String -> String -> QName -elemName ns prefix name = (QName name (lookup prefix ns) (Just prefix)) - -isElem :: NameSpaces -> String -> String -> Element -> Bool -isElem ns prefix name element = - qName (elName element) == name && - qURI (elName element) == (lookup prefix ns) - - elemToTblGrid :: NameSpaces -> Element -> D TblGrid elemToTblGrid ns element | isElem ns "w" "tblGrid" element = let cols = findChildren (elemName ns "w" "gridCol") element @@ -546,20 +535,6 @@ elemToParIndentation ns element | isElem ns "w" "ind" element = stringToInteger} elemToParIndentation _ _ = Nothing - -elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String) -elemToNumInfo ns element | isElem ns "w" "p" element = do - let pPr = findChild (elemName ns "w" "pPr") element - numPr = pPr >>= findChild (elemName ns "w" "numPr") - lvl <- numPr >>= - findChild (elemName ns "w" "ilvl") >>= - findAttr (elemName ns "w" "val") - numId <- numPr >>= - findChild (elemName ns "w" "numId") >>= - findAttr (elemName ns "w" "val") - return (numId, lvl) -elemToNumInfo _ _ = Nothing - testBitMask :: String -> Int -> Bool testBitMask bitMaskS n = case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of @@ -578,20 +553,28 @@ elemToBodyPart ns element return $ OMathPara expsLst elemToBodyPart ns element | isElem ns "w" "p" element - , Just (numId, lvl) <- elemToNumInfo ns element = do + , Just (numId, lvl) <- getNumInfo ns element = do sty <- asks envParStyles let parstyle = elemToParagraphStyle ns element sty parparts <- mapD (elemToParPart ns) (elChildren element) num <- asks envNumbering case lookupLevel numId lvl num of - Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts - Nothing -> throwError WrongElem + Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts + Nothing -> throwError WrongElem elemToBodyPart ns element | isElem ns "w" "p" element = do - sty <- asks envParStyles - let parstyle = elemToParagraphStyle ns element sty - parparts <- mapD (elemToParPart ns) (elChildren element) - return $ Paragraph parstyle parparts + sty <- asks envParStyles + let parstyle = elemToParagraphStyle ns element sty + parparts <- mapD (elemToParPart ns) (elChildren element) + case pNumInfo parstyle of + Just (numId, lvl) -> do + num <- asks envNumbering + case lookupLevel numId lvl num of + Just levelInfo -> + return $ ListItem parstyle numId lvl levelInfo parparts + Nothing -> + throwError WrongElem + Nothing -> return $ Paragraph parstyle parparts elemToBodyPart ns element | isElem ns "w" "tbl" element = do let caption' = findChild (elemName ns "w" "tblPr") element @@ -771,6 +754,7 @@ elemToParagraphStyle ns element sty Just _ -> True Nothing -> False , pHeading = getParStyleField headingLev sty style + , pNumInfo = getParStyleField numInfo sty style , pBlockQuote = getParStyleField isBlockQuote sty style } elemToParagraphStyle _ _ _ = defaultParagraphStyle @@ -857,12 +841,26 @@ getBlockQuote ns element , styleName `elem` blockQuoteStyleNames = Just True getBlockQuote _ _ = Nothing +getNumInfo :: NameSpaces -> Element -> Maybe (String, String) +getNumInfo ns element = do + let numPr = findChild (elemName ns "w" "pPr") element >>= + findChild (elemName ns "w" "numPr") + lvl = fromMaybe "0" (numPr >>= + findChild (elemName ns "w" "ilvl") >>= + findAttr (elemName ns "w" "val")) + numId <- numPr >>= + findChild (elemName ns "w" "numId") >>= + findAttr (elemName ns "w" "val") + return (numId, lvl) + + elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> ParStyleData elemToParStyleData ns element parentStyle = ParStyleData { headingLev = getHeaderLevel ns element , isBlockQuote = getBlockQuote ns element + , numInfo = getNumInfo ns element , psStyle = parentStyle } diff --git a/src/Text/Pandoc/Readers/Docx/StyleMap.hs b/src/Text/Pandoc/Readers/Docx/StyleMap.hs new file mode 100644 index 000000000..2901ea2a3 --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/StyleMap.hs @@ -0,0 +1,106 @@ +module Text.Pandoc.Readers.Docx.StyleMap ( StyleMaps(..) + , defaultStyleMaps + , getStyleMaps + , getStyleId + , hasStyleName + ) where + +import Text.XML.Light +import Text.Pandoc.Readers.Docx.Util +import Control.Monad.State +import Data.Char (toLower) +import qualified Data.Map as M + +newtype ParaStyleMap = ParaStyleMap ( M.Map String String ) +newtype CharStyleMap = CharStyleMap ( M.Map String String ) + +class StyleMap a where + alterMap :: (M.Map String String -> M.Map String String) -> a -> a + getMap :: a -> M.Map String String + +instance StyleMap ParaStyleMap where + alterMap f (ParaStyleMap m) = ParaStyleMap $ f m + getMap (ParaStyleMap m) = m + +instance StyleMap CharStyleMap where + alterMap f (CharStyleMap m) = CharStyleMap $ f m + getMap (CharStyleMap m) = m + +insert :: (StyleMap a) => Maybe String -> Maybe String -> a -> a +insert (Just k) (Just v) m = alterMap (M.insert k v) m +insert _ _ m = m + +getStyleId :: (StyleMap a) => String -> a -> String +getStyleId s = M.findWithDefault (filter (/=' ') s) (map toLower s) . getMap + +hasStyleName :: (StyleMap a) => String -> a -> Bool +hasStyleName styleName = M.member (map toLower styleName) . getMap + +data StyleMaps = StyleMaps { sNameSpaces :: NameSpaces + , sParaStyleMap :: ParaStyleMap + , sCharStyleMap :: CharStyleMap + } + +data StyleType = ParaStyle | CharStyle + +defaultStyleMaps :: StyleMaps +defaultStyleMaps = StyleMaps { sNameSpaces = [] + , sParaStyleMap = ParaStyleMap M.empty + , sCharStyleMap = CharStyleMap M.empty + } + +type StateM a = State StyleMaps a + +getStyleMaps :: Element -> StyleMaps +getStyleMaps docElem = execState genStyleMap state' + where + state' = defaultStyleMaps {sNameSpaces = elemToNameSpaces docElem} + genStyleItem e = do + styleType <- getStyleType e + styleId <- getAttrStyleId e + nameValLowercase <- fmap (map toLower) `fmap` getNameVal e + case styleType of + Just ParaStyle -> modParaStyleMap $ insert nameValLowercase styleId + Just CharStyle -> modCharStyleMap $ insert nameValLowercase styleId + _ -> return () + genStyleMap = do + style <- elemName' "style" + let styles = findChildren style docElem + forM_ styles genStyleItem + +modParaStyleMap :: (ParaStyleMap -> ParaStyleMap) -> StateM () +modParaStyleMap f = modify $ \s -> + s {sParaStyleMap = f $ sParaStyleMap s} + +modCharStyleMap :: (CharStyleMap -> CharStyleMap) -> StateM () +modCharStyleMap f = modify $ \s -> + s {sCharStyleMap = f $ sCharStyleMap s} + +getStyleType :: Element -> StateM (Maybe StyleType) +getStyleType e = do + styleTypeStr <- getAttrType e + case styleTypeStr of + Just "paragraph" -> return $ Just ParaStyle + Just "character" -> return $ Just CharStyle + _ -> return Nothing + +getAttrType :: Element -> StateM (Maybe String) +getAttrType el = do + name <- elemName' "type" + return $ findAttr name el + +getAttrStyleId :: Element -> StateM (Maybe String) +getAttrStyleId el = do + name <- elemName' "styleId" + return $ findAttr name el + +getNameVal :: Element -> StateM (Maybe String) +getNameVal el = do + name <- elemName' "name" + val <- elemName' "val" + return $ findChild name el >>= findAttr val + +elemName' :: String -> StateM QName +elemName' name = do + namespaces <- gets sNameSpaces + return $ elemName namespaces "w" name diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs new file mode 100644 index 000000000..891f107b0 --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/Util.hs @@ -0,0 +1,26 @@ +module Text.Pandoc.Readers.Docx.Util ( + NameSpaces + , elemName + , isElem + , elemToNameSpaces + ) where + +import Text.XML.Light +import Data.Maybe (mapMaybe) + +type NameSpaces = [(String, String)] + +elemToNameSpaces :: Element -> NameSpaces +elemToNameSpaces = mapMaybe attrToNSPair . elAttribs + +attrToNSPair :: Attr -> Maybe (String, String) +attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) +attrToNSPair _ = Nothing + +elemName :: NameSpaces -> String -> String -> QName +elemName ns prefix name = QName name (lookup prefix ns) (Just prefix) + +isElem :: NameSpaces -> String -> String -> Element -> Bool +isElem ns prefix name element = + qName (elName element) == name && + qURI (elName element) == lookup prefix ns diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index b061d8683..338540533 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -35,18 +35,20 @@ import Control.DeepSeq.Generics (deepseq, NFData) import Debug.Trace (trace) +import Text.Pandoc.Error + type Items = M.Map String (FilePath, MimeType) -readEPUB :: ReaderOptions -> BL.ByteString -> (Pandoc, MediaBag) +readEPUB :: ReaderOptions -> BL.ByteString -> Either PandocError (Pandoc, MediaBag) readEPUB opts bytes = runEPUB (archiveToEPUB opts $ toArchive bytes) -runEPUB :: Except String a -> a -runEPUB = either error id . runExcept +runEPUB :: Except PandocError a -> Either PandocError a +runEPUB = runExcept -- Note that internal reference are aggresively normalised so that all ids -- are of the form "filename#id" -- -archiveToEPUB :: (MonadError String m) => ReaderOptions -> Archive -> m (Pandoc, MediaBag) +archiveToEPUB :: (MonadError PandocError m) => ReaderOptions -> Archive -> m (Pandoc, MediaBag) archiveToEPUB os archive = do -- root is path to folder with manifest file in (root, content) <- getManifest archive @@ -64,19 +66,20 @@ archiveToEPUB os archive = do return $ (ast, mediaBag) where os' = os {readerParseRaw = True} - parseSpineElem :: MonadError String m => FilePath -> (FilePath, MimeType) -> m Pandoc + parseSpineElem :: MonadError PandocError m => FilePath -> (FilePath, MimeType) -> m Pandoc parseSpineElem (normalise -> r) (normalise -> path, mime) = do when (readerTrace os) (traceM path) doc <- mimeToReader mime r path let docSpan = B.doc $ B.para $ B.spanWith (takeFileName path, [], []) mempty return $ docSpan <> doc - mimeToReader :: MonadError String m => MimeType -> FilePath -> FilePath -> m Pandoc + mimeToReader :: MonadError PandocError m => MimeType -> FilePath -> FilePath -> m Pandoc mimeToReader "application/xhtml+xml" (normalise -> root) (normalise -> path) = do fname <- findEntryByPathE (root </> path) archive - return $ fixInternalReferences path . + html <- either throwError return . readHtml os' . UTF8.toStringLazy $ fromEntry fname + return $ fixInternalReferences path html mimeToReader s _ path | s `elem` imageMimes = return $ imageToPandoc path | otherwise = return $ mempty @@ -114,7 +117,7 @@ imageMimes = ["image/gif", "image/jpeg", "image/png"] type CoverImage = FilePath -parseManifest :: (MonadError String m) => Element -> m (Maybe CoverImage, Items) +parseManifest :: (MonadError PandocError m) => Element -> m (Maybe CoverImage, Items) parseManifest content = do manifest <- findElementE (dfName "manifest") content let items = findChildren (dfName "item") manifest @@ -130,7 +133,7 @@ parseManifest content = do mime <- findAttrE (emptyName "media-type") e return (uid, (href, mime)) -parseSpine :: MonadError String m => Items -> Element -> m [(FilePath, MimeType)] +parseSpine :: MonadError PandocError m => Items -> Element -> m [(FilePath, MimeType)] parseSpine is e = do spine <- findElementE (dfName "spine") e let itemRefs = findChildren (dfName "itemref") spine @@ -141,7 +144,7 @@ parseSpine is e = do guard linear findAttr (emptyName "idref") ref -parseMeta :: MonadError String m => Element -> m Meta +parseMeta :: MonadError PandocError m => Element -> m Meta parseMeta content = do meta <- findElementE (dfName "metadata") content let dcspace (QName _ (Just "http://purl.org/dc/elements/1.1/") (Just "dc")) = True @@ -159,7 +162,7 @@ renameMeta :: String -> String renameMeta "creator" = "author" renameMeta s = s -getManifest :: MonadError String m => Archive -> m (String, Element) +getManifest :: MonadError PandocError m => Archive -> m (String, Element) getManifest archive = do metaEntry <- findEntryByPathE ("META-INF" </> "container.xml") archive docElem <- (parseXMLDocE . UTF8.toStringLazy . fromEntry) metaEntry @@ -266,18 +269,18 @@ emptyName s = QName s Nothing Nothing -- Convert Maybe interface to Either -findAttrE :: MonadError String m => QName -> Element -> m String +findAttrE :: MonadError PandocError m => QName -> Element -> m String findAttrE q e = mkE "findAttr" $ findAttr q e -findEntryByPathE :: MonadError String m => FilePath -> Archive -> m Entry +findEntryByPathE :: MonadError PandocError m => FilePath -> Archive -> m Entry findEntryByPathE (normalise -> path) a = mkE ("No entry on path: " ++ path) $ findEntryByPath path a -parseXMLDocE :: MonadError String m => String -> m Element +parseXMLDocE :: MonadError PandocError m => String -> m Element parseXMLDocE doc = mkE "Unable to parse XML doc" $ parseXMLDoc doc -findElementE :: MonadError String m => QName -> Element -> m Element +findElementE :: MonadError PandocError m => QName -> Element -> m Element findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x -mkE :: MonadError String m => String -> Maybe a -> m a -mkE s = maybe (throwError s) return +mkE :: MonadError PandocError m => String -> Maybe a -> m a +mkE s = maybe (throwError . ParseFailure $ s) return diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 02ff07e73..52358e51e 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -1,4 +1,5 @@ -{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses #-} +{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses, +ViewPatterns#-} {- Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> @@ -43,7 +44,7 @@ import Text.Pandoc.Definition import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..)) import Text.Pandoc.Shared ( extractSpaces, renderTags' - , escapeURI, safeRead ) + , escapeURI, safeRead, mapLeft ) import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace) , Extension (Ext_epub_html_exts, Ext_native_divs, Ext_native_spans)) @@ -62,15 +63,18 @@ import Text.TeXMath (readMathML, writeTeX) import Data.Default (Default (..), def) import Control.Monad.Reader (Reader,ask, asks, local, runReader) +import Text.Pandoc.Error + +import Text.Parsec.Error + -- | Convert HTML-formatted string to 'Pandoc' document. readHtml :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assumes @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readHtml opts inp = - case flip runReader def $ runParserT parseDoc (HTMLState def{ stateOptions = opts } []) "source" tags of - Left err' -> error $ "\nError at " ++ show err' - Right result -> result + mapLeft (ParseFailure . getError) . flip runReader def $ + runParserT parseDoc (HTMLState def{ stateOptions = opts } []) "source" tags where tags = stripPrefixes . canonicalizeTags $ parseTagsOptions parseOptions{ optTagPosition = True } inp parseDoc = do @@ -78,6 +82,9 @@ readHtml opts inp = meta <- stateMeta . parserState <$> getState bs' <- replaceNotes (B.toList blocks) return $ Pandoc meta bs' + getError (errorMessages -> ms) = case ms of + [] -> "" + (m:_) -> messageString m replaceNotes :: [Block] -> TagParser [Block] replaceNotes = walkM replaceNotes' @@ -880,7 +887,7 @@ htmlTag :: Monad m => (Tag String -> Bool) -> ParserT [Char] st m (Tag String, String) htmlTag f = try $ do - lookAhead $ char '<' >> (oneOf "/!?" <|> letter) + lookAhead $ char '<' >> ((oneOf "/!?" >> nonspaceChar) <|> letter) (next : _) <- getInput >>= return . canonicalizeTags . parseTags guard $ f next -- advance the parser diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index c03382c17..aa2534afc 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -26,15 +26,17 @@ import Documentation.Haddock.Parser import Documentation.Haddock.Types import Debug.Trace (trace) +import Text.Pandoc.Error + -- | Parse Haddock markup and return a 'Pandoc' document. readHaddock :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse - -> Pandoc + -> Either PandocError Pandoc readHaddock opts = #if MIN_VERSION_haddock_library(1,2,0) - B.doc . docHToBlocks . trace' . _doc . parseParas + Right . B.doc . docHToBlocks . trace' . _doc . parseParas #else - B.doc . docHToBlocks . trace' . parseParas + Right . B.doc . docHToBlocks . trace' . parseParas #endif where trace' x = if readerTrace opts then trace (show x) x diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 942b9f3b3..08aa0b20e 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -42,26 +42,25 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing hiding ((<|>), many, optional, space, mathDisplay, mathInline) import qualified Text.Pandoc.UTF8 as UTF8 -import Data.Char ( chr, ord ) +import Data.Char ( chr, ord, isLetter, isAlphaNum ) import Control.Monad.Trans (lift) import Control.Monad import Text.Pandoc.Builder -import Data.Char (isLetter, isAlphaNum) import Control.Applicative import Data.Monoid -import Data.Maybe (fromMaybe) +import Data.Maybe (fromMaybe, maybeToList) import System.Environment (getEnv) -import System.FilePath (replaceExtension, (</>)) -import Data.List (intercalate, intersperse) +import System.FilePath (replaceExtension, (</>), takeExtension, addExtension) +import Data.List (intercalate) import qualified Data.Map as M import qualified Control.Exception as E -import System.FilePath (takeExtension, addExtension) import Text.Pandoc.Highlighting (fromListingsLanguage) +import Text.Pandoc.Error -- | Parse LaTeX from string and return 'Pandoc' document. readLaTeX :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assumes @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readLaTeX opts = readWith parseLaTeX def{ stateOptions = opts } parseLaTeX :: LP Pandoc @@ -73,17 +72,16 @@ parseLaTeX = do let (Pandoc _ bs') = doc bs return $ Pandoc meta bs' -type LP = Parser [Char] ParserState +type LP = Parser String ParserState anyControlSeq :: LP String anyControlSeq = do char '\\' next <- option '\n' anyChar - name <- case next of - '\n' -> return "" - c | isLetter c -> (c:) <$> (many letter <* optional sp) - | otherwise -> return [c] - return name + case next of + '\n' -> return "" + c | isLetter c -> (c:) <$> (many letter <* optional sp) + | otherwise -> return [c] controlSeq :: String -> LP String controlSeq name = try $ do @@ -103,7 +101,7 @@ dimenarg = try $ do sp :: LP () sp = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t') - <|> (try $ newline <* lookAhead anyChar <* notFollowedBy blankline) + <|> try (newline <* lookAhead anyChar <* notFollowedBy blankline) isLowerHex :: Char -> Bool isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f' @@ -161,30 +159,28 @@ mathInline :: LP String -> LP Inlines mathInline p = math <$> (try p >>= applyMacros') mathChars :: LP String -mathChars = concat <$> - many ( many1 (satisfy (\c -> c /= '$' && c /='\\')) - <|> (\c -> ['\\',c]) <$> (try $ char '\\' *> anyChar) - ) +mathChars = (concat <$>) $ + many $ + many1 (satisfy (\c -> c /= '$' && c /='\\')) + <|> (\c -> ['\\',c]) <$> try (char '\\' *> anyChar) quoted' :: (Inlines -> Inlines) -> LP String -> LP () -> LP Inlines quoted' f starter ender = do startchs <- starter try ((f . mconcat) <$> manyTill inline ender) <|> lit startchs -double_quote :: LP Inlines -double_quote = - ( quoted' doubleQuoted (try $ string "``") (void $ try $ string "''") +doubleQuote :: LP Inlines +doubleQuote = + quoted' doubleQuoted (try $ string "``") (void $ try $ string "''") <|> quoted' doubleQuoted (string "“") (void $ char '”') -- the following is used by babel for localized quotes: <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'") <|> quoted' doubleQuoted (string "\"") (void $ char '"') - ) -single_quote :: LP Inlines -single_quote = - ( quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter) +singleQuote :: LP Inlines +singleQuote = + quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter) <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter) - ) inline :: LP Inlines inline = (mempty <$ comment) @@ -194,17 +190,17 @@ inline = (mempty <$ comment) <|> inlineEnvironment <|> inlineGroup <|> (char '-' *> option (str "-") - ((char '-') *> option (str "–") (str "—" <$ char '-'))) - <|> double_quote - <|> single_quote + (char '-' *> option (str "–") (str "—" <$ char '-'))) + <|> doubleQuote + <|> singleQuote <|> (str "”" <$ try (string "''")) <|> (str "”" <$ char '”') <|> (str "’" <$ char '\'') <|> (str "’" <$ char '’') <|> (str "\160" <$ char '~') - <|> (mathDisplay $ string "$$" *> mathChars <* string "$$") - <|> (mathInline $ char '$' *> mathChars <* char '$') - <|> (superscript <$> (char '^' *> tok)) + <|> mathDisplay (string "$$" *> mathChars <* string "$$") + <|> mathInline (char '$' *> mathChars <* char '$') + <|> try (superscript <$> (char '^' *> tok)) <|> (subscript <$> (char '_' *> tok)) <|> (guardEnabled Ext_literate_haskell *> char '|' *> doLHSverb) <|> (str . (:[]) <$> tildeEscape) @@ -238,20 +234,32 @@ block = (mempty <$ comment) blocks :: LP Blocks blocks = mconcat <$> many block +getRawCommand :: String -> LP String +getRawCommand name' = do + rawargs <- withRaw (skipopts *> option "" dimenarg *> many braced) + return $ '\\' : name' ++ snd rawargs + +lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v +lookupListDefault d = (fromMaybe d .) . lookupList + where + lookupList l m = msum $ map (`M.lookup` m) l + blockCommand :: LP Blocks blockCommand = try $ do name <- anyControlSeq guard $ name /= "begin" && name /= "end" star <- option "" (string "*" <* optional sp) let name' = name ++ star - case M.lookup name' blockCommands of - Just p -> p - Nothing -> case M.lookup name blockCommands of - Just p -> p - Nothing -> mzero + let raw = do + rawcommand <- getRawCommand name' + transformed <- applyMacros' rawcommand + guard $ transformed /= rawcommand + notFollowedBy $ parseFromString inlines transformed + parseFromString blocks transformed + lookupListDefault raw [name',name] blockCommands inBrackets :: Inlines -> Inlines -inBrackets x = (str "[") <> x <> (str "]") +inBrackets x = str "[" <> x <> str "]" -- eat an optional argument and one or more arguments in braces ignoreInlines :: String -> (String, LP Inlines) @@ -259,19 +267,21 @@ ignoreInlines name = (name, doraw <|> (mempty <$ optargs)) where optargs = skipopts *> skipMany (try $ optional sp *> braced) contseq = '\\':name doraw = (rawInline "latex" . (contseq ++) . snd) <$> - (getOption readerParseRaw >>= guard >> (withRaw optargs)) + (getOption readerParseRaw >>= guard >> withRaw optargs) ignoreBlocks :: String -> (String, LP Blocks) ignoreBlocks name = (name, doraw <|> (mempty <$ optargs)) where optargs = skipopts *> skipMany (try $ optional sp *> braced) contseq = '\\':name doraw = (rawBlock "latex" . (contseq ++) . snd) <$> - (getOption readerParseRaw >>= guard >> (withRaw optargs)) + (getOption readerParseRaw >>= guard >> withRaw optargs) blockCommands :: M.Map String (LP Blocks) blockCommands = M.fromList $ [ ("par", mempty <$ skipopts) - , ("title", mempty <$ (skipopts *> tok >>= addMeta "title")) + , ("title", mempty <$ (skipopts *> + (grouped inline >>= addMeta "title") + <|> (grouped block >>= addMeta "title"))) , ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle")) , ("author", mempty <$ (skipopts *> authors)) -- -- in letter class, temp. store address & sig as title, author @@ -302,7 +312,7 @@ blockCommands = M.fromList $ -- , ("hrule", pure horizontalRule) , ("rule", skipopts *> tok *> tok *> pure horizontalRule) - , ("item", skipopts *> loose_item) + , ("item", skipopts *> looseItem) , ("documentclass", skipopts *> braced *> preamble) , ("centerline", (para . trimInlines) <$> (skipopts *> tok)) , ("caption", skipopts *> setCaption) @@ -341,7 +351,7 @@ setCaption :: LP Blocks setCaption = do ils <- tok mblabel <- option Nothing $ - try $ spaces >> controlSeq "label" >> (Just <$> tok) + try $ spaces' >> controlSeq "label" >> (Just <$> tok) let ils' = case mblabel of Just lab -> ils <> spanWith ("",[],[("data-label", stringify lab)]) mempty @@ -369,7 +379,7 @@ section (ident, classes, kvs) lvl = do let lvl' = if hasChapters then lvl + 1 else lvl skipopts contents <- grouped inline - lab <- option ident $ try (spaces >> controlSeq "label" >> spaces >> braced) + lab <- option ident $ try (spaces' >> controlSeq "label" >> spaces' >> braced) attr' <- registerHeader (lab, classes, kvs) contents return $ headerWith attr' lvl' contents @@ -382,25 +392,21 @@ inlineCommand = try $ do star <- option "" (string "*") let name' = name ++ star let raw = do - rawargs <- withRaw (skipopts *> option "" dimenarg *> many braced) - let rawcommand = '\\' : name ++ star ++ snd rawargs + rawcommand <- getRawCommand name' transformed <- applyMacros' rawcommand if transformed /= rawcommand then parseFromString inlines transformed else if parseRaw then return $ rawInline "latex" rawcommand else return mempty - case M.lookup name' inlineCommands of - Just p -> p <|> raw - Nothing -> case M.lookup name inlineCommands of - Just p -> p <|> raw - Nothing -> raw + lookupListDefault mzero [name',name] inlineCommands + <|> raw unlessParseRaw :: LP () unlessParseRaw = getOption readerParseRaw >>= guard . not isBlockCommand :: String -> Bool -isBlockCommand s = maybe False (const True) $ M.lookup s blockCommands +isBlockCommand s = s `M.member` blockCommands inlineEnvironments :: M.Map String (LP Inlines) @@ -446,7 +452,7 @@ inlineCommands = M.fromList $ , ("cref", unlessParseRaw >> (inBrackets <$> tok)) -- from cleveref.sty , ("(", mathInline $ manyTill anyChar (try $ string "\\)")) , ("[", mathDisplay $ manyTill anyChar (try $ string "\\]")) - , ("ensuremath", mathInline $ braced) + , ("ensuremath", mathInline braced) , ("texorpdfstring", (\_ x -> x) <$> tok <*> tok) , ("P", lit "¶") , ("S", lit "§") @@ -495,7 +501,7 @@ inlineCommands = M.fromList $ , ("v", option (str "v") $ try $ tok >>= accent hacek) , ("u", option (str "u") $ try $ tok >>= accent breve) , ("i", lit "i") - , ("\\", linebreak <$ (optional (bracketed inline) *> optional sp)) + , ("\\", linebreak <$ (optional (bracketed inline) *> spaces')) , (",", pure mempty) , ("@", pure mempty) , (" ", lit "\160") @@ -508,7 +514,7 @@ inlineCommands = M.fromList $ , ("thanks", (note . mconcat) <$> (char '{' *> manyTill block (char '}'))) , ("footnote", (note . mconcat) <$> (char '{' *> manyTill block (char '}'))) , ("verb", doverb) - , ("lstinline", doverb) + , ("lstinline", skipopts *> doverb) , ("Verb", doverb) , ("texttt", (code . stringify . toList) <$> tok) , ("url", (unescapeURL <$> braced) >>= \url -> @@ -618,7 +624,7 @@ lit = pure . str accent :: (Char -> String) -> Inlines -> LP Inlines accent f ils = case toList ils of - (Str (x:xs) : ys) -> return $ fromList $ (Str (f x ++ xs) : ys) + (Str (x:xs) : ys) -> return $ fromList (Str (f x ++ xs) : ys) [] -> mzero _ -> return ils @@ -807,7 +813,7 @@ breve 'u' = "ŭ" breve c = [c] tok :: LP Inlines -tok = try $ grouped inline <|> inlineCommand <|> str <$> (count 1 $ inlineChar) +tok = try $ grouped inline <|> inlineCommand <|> str <$> count 1 inlineChar opt :: LP Inlines opt = bracketed inline <* optional sp @@ -825,17 +831,14 @@ environment :: LP Blocks environment = do controlSeq "begin" name <- braced - case M.lookup name environments of - Just p -> p <|> rawEnv name - Nothing -> rawEnv name + M.findWithDefault mzero name environments + <|> rawEnv name inlineEnvironment :: LP Inlines inlineEnvironment = try $ do controlSeq "begin" name <- braced - case M.lookup name inlineEnvironments of - Just p -> p - Nothing -> mzero + M.findWithDefault mzero name inlineEnvironments rawEnv :: String -> LP Blocks rawEnv name = do @@ -848,15 +851,11 @@ rawEnv name = do ---- -type IncludeParser = ParserT [Char] [String] IO String +type IncludeParser = ParserT String [String] IO String -- | Replace "include" commands with file contents. -handleIncludes :: String -> IO String -handleIncludes s = do - res <- runParserT includeParser' [] "input" s - case res of - Right s' -> return s' - Left e -> error $ show e +handleIncludes :: String -> IO (Either PandocError String) +handleIncludes s = mapLeft (ParsecError s) <$> runParserT includeParser' [] "input" s includeParser' :: IncludeParser includeParser' = @@ -912,7 +911,7 @@ include' = do <|> try (string "input") <|> string "usepackage" -- skip options - skipMany $ try $ char '[' *> (manyTill anyChar (char ']')) + skipMany $ try $ char '[' *> manyTill anyChar (char ']') fs <- (map trim . splitBy (==',')) <$> braced' return $ if name == "usepackage" then map (maybeAddExtension ".sty") fs @@ -985,14 +984,14 @@ keyvals = try $ char '[' *> manyTill keyval (char ']') alltt :: String -> LP Blocks alltt t = walk strToCode <$> parseFromString blocks (substitute " " "\\ " $ substitute "%" "\\%" $ - concat $ intersperse "\\\\\n" $ lines t) + intercalate "\\\\\n" $ lines t) where strToCode (Str s) = Code nullAttr s strToCode x = x -rawLaTeXBlock :: Parser [Char] ParserState String +rawLaTeXBlock :: LP String rawLaTeXBlock = snd <$> try (withRaw (environment <|> blockCommand)) -rawLaTeXInline :: Parser [Char] ParserState Inline +rawLaTeXInline :: LP Inline rawLaTeXInline = do raw <- (snd <$> withRaw inlineCommand) <|> (snd <$> withRaw blockCommand) RawInline "latex" <$> applyMacros' raw @@ -1001,24 +1000,24 @@ addImageCaption :: Blocks -> LP Blocks addImageCaption = walkM go where go (Image alt (src,tit)) = do mbcapt <- stateCaption <$> getState - case mbcapt of - Just ils -> return (Image (toList ils) (src, "fig:")) - Nothing -> return (Image alt (src,tit)) + return $ case mbcapt of + Just ils -> Image (toList ils) (src, "fig:") + Nothing -> Image alt (src,tit) go x = return x addTableCaption :: Blocks -> LP Blocks addTableCaption = walkM go where go (Table c als ws hs rs) = do mbcapt <- stateCaption <$> getState - case mbcapt of - Just ils -> return (Table (toList ils) als ws hs rs) - Nothing -> return (Table c als ws hs rs) + return $ case mbcapt of + Just ils -> Table (toList ils) als ws hs rs + Nothing -> Table c als ws hs rs go x = return x environments :: M.Map String (LP Blocks) environments = M.fromList [ ("document", env "document" blocks <* skipMany anyChar) - , ("letter", env "letter" letter_contents) + , ("letter", env "letter" letterContents) , ("figure", env "figure" $ resetCaption *> skipopts *> blocks >>= addImageCaption) , ("center", env "center" blocks) @@ -1031,12 +1030,12 @@ environments = M.fromList , ("verse", blockQuote <$> env "verse" blocks) , ("itemize", bulletList <$> listenv "itemize" (many item)) , ("description", definitionList <$> listenv "description" (many descItem)) - , ("enumerate", ordered_list) + , ("enumerate", orderedList') , ("alltt", alltt =<< verbEnv "alltt") , ("code", guardEnabled Ext_literate_haskell *> (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> verbEnv "code")) - , ("verbatim", codeBlock <$> (verbEnv "verbatim")) + , ("verbatim", codeBlock <$> verbEnv "verbatim") , ("Verbatim", do options <- option [] keyvals let kvs = [ (if k == "firstnumber" then "startFrom" @@ -1044,17 +1043,17 @@ environments = M.fromList let classes = [ "numberLines" | lookup "numbers" options == Just "left" ] let attr = ("",classes,kvs) - codeBlockWith attr <$> (verbEnv "Verbatim")) + codeBlockWith attr <$> verbEnv "Verbatim") , ("lstlisting", do options <- option [] keyvals let kvs = [ (if k == "firstnumber" then "startFrom" else k, v) | (k,v) <- options ] let classes = [ "numberLines" | lookup "numbers" options == Just "left" ] - ++ maybe [] (:[]) (lookup "language" options + ++ maybeToList (lookup "language" options >>= fromListingsLanguage) let attr = (fromMaybe "" (lookup "label" options),classes,kvs) - codeBlockWith attr <$> (verbEnv "lstlisting")) + codeBlockWith attr <$> verbEnv "lstlisting") , ("minted", do options <- option [] keyvals lang <- grouped (many1 $ satisfy (/='}')) let kvs = [ (if k == "firstnumber" @@ -1064,7 +1063,7 @@ environments = M.fromList [ "numberLines" | lookup "linenos" options == Just "true" ] let attr = ("",classes,kvs) - codeBlockWith attr <$> (verbEnv "minted")) + codeBlockWith attr <$> verbEnv "minted") , ("obeylines", parseFromString (para . trimInlines . mconcat <$> many inline) =<< intercalate "\\\\\n" . lines <$> verbEnv "obeylines") @@ -1083,8 +1082,8 @@ environments = M.fromList , ("alignat*", mathEnv para (Just "aligned") "alignat*") ] -letter_contents :: LP Blocks -letter_contents = do +letterContents :: LP Blocks +letterContents = do bs <- blocks st <- getState -- add signature (author) and address (title) @@ -1111,8 +1110,8 @@ closing = do item :: LP Blocks item = blocks *> controlSeq "item" *> skipopts *> blocks -loose_item :: LP Blocks -loose_item = do +looseItem :: LP Blocks +looseItem = do ctx <- stateParserContext `fmap` getState if ctx == ListItemState then mzero @@ -1155,8 +1154,8 @@ verbEnv name = do res <- manyTill anyChar endEnv return $ stripTrailingNewlines res -ordered_list :: LP Blocks -ordered_list = do +orderedList' :: LP Blocks +orderedList' = do optional sp (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $ try $ char '[' *> anyOrderedListMarker <* char ']' @@ -1168,7 +1167,7 @@ ordered_list = do optional sp num <- grouped (many1 digit) spaces - return $ (read num + 1 :: Int) + return (read num + 1 :: Int) bs <- listenv "enumerate" (many item) return $ orderedListWith (start, style, delim) bs @@ -1182,14 +1181,14 @@ paragraph = do preamble :: LP Blocks preamble = mempty <$> manyTill preambleBlock beginDoc where beginDoc = lookAhead $ try $ controlSeq "begin" *> string "{document}" - preambleBlock = (void comment) - <|> (void sp) - <|> (void blanklines) - <|> (void macro) - <|> (void blockCommand) - <|> (void anyControlSeq) - <|> (void braced) - <|> (void anyChar) + preambleBlock = void comment + <|> void sp + <|> void blanklines + <|> void macro + <|> void blockCommand + <|> void anyControlSeq + <|> void braced + <|> void anyChar ------- @@ -1265,7 +1264,7 @@ complexNatbibCitation mode = try $ do suff <- ils skipSpaces optional $ char ';' - return $ addPrefix pref $ addSuffix suff $ cits' + return $ addPrefix pref $ addSuffix suff cits' (c:cits, raw) <- withRaw $ grouped parseOne return $ cite (c{ citationMode = mode }:cits) (rawInline "latex" $ "\\citetext" ++ raw) @@ -1275,7 +1274,7 @@ complexNatbibCitation mode = try $ do parseAligns :: LP [Alignment] parseAligns = try $ do char '{' - let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ try (string "@{}") + let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ (char '@' >> braced) maybeBar let cAlign = AlignCenter <$ char 'c' let lAlign = AlignLeft <$ char 'l' @@ -1289,13 +1288,13 @@ parseAligns = try $ do return aligns' hline :: LP () -hline = () <$ (try $ spaces >> controlSeq "hline") +hline = () <$ try (spaces' *> controlSeq "hline" <* spaces') lbreak :: LP () -lbreak = () <$ (try $ spaces *> controlSeq "\\") +lbreak = () <$ try (spaces' *> controlSeq "\\" <* spaces') amp :: LP () -amp = () <$ (try $ spaces *> char '&') +amp = () <$ try (spaces' *> char '&') parseTableRow :: Int -- ^ number of columns -> LP [Blocks] @@ -1308,20 +1307,22 @@ parseTableRow cols = try $ do guard $ cells' /= [mempty] -- note: a & b in a three-column table leaves an empty 3rd cell: let cells'' = cells' ++ replicate (cols - numcells) mempty - spaces + spaces' return cells'' +spaces' :: LP () +spaces' = spaces *> skipMany (comment *> spaces) + simpTable :: Bool -> LP Blocks simpTable hasWidthParameter = try $ do - when hasWidthParameter $ () <$ (spaces >> tok) - spaces + when hasWidthParameter $ () <$ (spaces' >> tok) + skipopts aligns <- parseAligns let cols = length aligns optional hline header' <- option [] $ try (parseTableRow cols <* lbreak <* hline) rows <- sepEndBy (parseTableRow cols) (lbreak <* optional hline) - spaces - skipMany (comment *> spaces) + spaces' let header'' = if null header' then replicate cols mempty else header' diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 187b479c3..5e0cef4f8 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1,4 +1,5 @@ {-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7 +{-# LANGUAGE ScopedTypeVariables #-} {- Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> @@ -31,11 +32,11 @@ Conversion of markdown-formatted plain text to 'Pandoc' document. module Text.Pandoc.Readers.Markdown ( readMarkdown, readMarkdownWithWarnings ) where -import Data.List ( transpose, sortBy, findIndex, intersperse, intercalate ) +import Data.List ( transpose, sortBy, intersperse, intercalate, elemIndex) import qualified Data.Map as M import Data.Scientific (coefficient, base10Exponent) import Data.Ord ( comparing ) -import Data.Char ( isAlphaNum, toLower ) +import Data.Char ( isSpace, isAlphaNum, toLower ) import Data.Maybe import Text.Pandoc.Definition import qualified Data.Text as T @@ -55,34 +56,41 @@ import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag, isTextTag, isCommentTag ) import Data.Monoid (mconcat, mempty) -import Control.Applicative ((<$>), (<*), (*>), (<$)) +import Control.Applicative ((<$>), (<*), (*>), (<$), (<*>)) import Control.Monad +import Control.Monad.Reader import System.FilePath (takeExtension, addExtension) import Text.HTML.TagSoup import Text.HTML.TagSoup.Match (tagOpen) import qualified Data.Set as Set import Text.Printf (printf) import Debug.Trace (trace) +import Text.Pandoc.Error -type MarkdownParser = Parser [Char] ParserState +type MarkdownParser a = ParserT [Char] ParserState (Reader ParserState) a -- | Read markdown from an input string and return a Pandoc document. readMarkdown :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readMarkdown opts s = - (readWith parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n") + runMarkdown opts s parseMarkdown -- | Read markdown from an input string and return a pair of a Pandoc document -- and a list of warnings. readMarkdownWithWarnings :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> (Pandoc, [String]) -readMarkdownWithWarnings opts s = - (readWithWarnings parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n") - -trimInlinesF :: F Inlines -> F Inlines -trimInlinesF = liftM trimInlines + -> Either PandocError (Pandoc, [String]) +readMarkdownWithWarnings opts s = runMarkdown opts s (returnWarnings parseMarkdown) + +runMarkdown :: forall a . ReaderOptions -> String -> MarkdownParser a -> Either PandocError a +runMarkdown opts inp p = fst <$> res + where + imd = readWithM (returnState p) def{ stateOptions = opts } (inp ++ "\n\n") + res :: Either PandocError (a, ParserState) + res = runReader imd s + s :: ParserState + s = either def snd res -- -- Constants and data structure definitions @@ -119,10 +127,10 @@ inList = do ctx <- stateParserContext <$> getState guard (ctx == ListItemState) -isNull :: F Inlines -> Bool -isNull ils = B.isNull $ runF ils def +isNull :: Inlines -> Bool +isNull = B.isNull -spnl :: Parser [Char] st () +spnl :: Monad m => ParserT [Char] st m () spnl = try $ do skipSpaces optional newline @@ -162,9 +170,9 @@ litChar = escapedChar' -- | Parse a sequence of inline elements between square brackets, -- including inlines between balanced pairs of square brackets. -inlinesInBalancedBrackets :: MarkdownParser (F Inlines) +inlinesInBalancedBrackets :: MarkdownParser Inlines inlinesInBalancedBrackets = charsInBalancedBrackets >>= - parseFromString (trimInlinesF . mconcat <$> many inline) + parseFromString (trimInlines . mconcat <$> many inline) charsInBalancedBrackets :: MarkdownParser [Char] charsInBalancedBrackets = do @@ -181,16 +189,16 @@ charsInBalancedBrackets = do -- document structure -- -titleLine :: MarkdownParser (F Inlines) +titleLine :: MarkdownParser Inlines titleLine = try $ do char '%' skipSpaces res <- many $ (notFollowedBy newline >> inline) <|> try (endline >> whitespace) newline - return $ trimInlinesF $ mconcat res + return $ trimInlines $ mconcat res -authorsLine :: MarkdownParser (F [Inlines]) +authorsLine :: MarkdownParser [Inlines] authorsLine = try $ do char '%' skipSpaces @@ -199,13 +207,13 @@ authorsLine = try $ do (char ';' <|> try (newline >> notFollowedBy blankline >> spaceChar)) newline - return $ sequence $ filter (not . isNull) $ map (trimInlinesF . mconcat) authors + return $ filter (not . isNull) $ map (trimInlines . mconcat) authors -dateLine :: MarkdownParser (F Inlines) +dateLine :: MarkdownParser Inlines dateLine = try $ do char '%' skipSpaces - trimInlinesF . mconcat <$> manyTill inline newline + trimInlines . mconcat <$> manyTill inline newline titleBlock :: MarkdownParser () titleBlock = pandocTitleBlock <|> mmdTitleBlock @@ -215,20 +223,16 @@ pandocTitleBlock = try $ do guardEnabled Ext_pandoc_title_block lookAhead (char '%') title <- option mempty titleLine - author <- option (return []) authorsLine + author <- option [] authorsLine date <- option mempty dateLine optional blanklines - let meta' = do title' <- title - author' <- author - date' <- date - return $ - (if B.isNull title' then id else B.setMeta "title" title') - . (if null author' then id else B.setMeta "author" author') - . (if B.isNull date' then id else B.setMeta "date" date') - $ nullMeta - updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' } - -yamlMetaBlock :: MarkdownParser (F Blocks) + let meta' = (if B.isNull title then id else B.setMeta "title" title) + . (if null author then id else B.setMeta "author" author) + . (if B.isNull date then id else B.setMeta "date" date) + $ nullMeta + updateState $ \st -> st{ stateMeta = stateMeta st <> meta' } + +yamlMetaBlock :: MarkdownParser Blocks yamlMetaBlock = try $ do guardEnabled Ext_yaml_metadata_block pos <- getPosition @@ -241,17 +245,18 @@ yamlMetaBlock = try $ do optional blanklines opts <- stateOptions <$> getState meta' <- case Yaml.decodeEither' $ UTF8.fromString rawYaml of - Right (Yaml.Object hashmap) -> return $ return $ + Right (Yaml.Object hashmap) -> return $ H.foldrWithKey (\k v m -> if ignorable k then m - else B.setMeta (T.unpack k) - (yamlToMeta opts v) m) + else case yamlToMeta opts v of + Left _ -> m + Right v' -> B.setMeta (T.unpack k) v' m) nullMeta hashmap - Right Yaml.Null -> return $ return nullMeta + Right Yaml.Null -> return nullMeta Right _ -> do addWarning (Just pos) "YAML header is not an object" - return $ return nullMeta + return nullMeta Left err' -> do case err' of InvalidYaml (Just YamlParseException{ @@ -270,41 +275,50 @@ yamlMetaBlock = try $ do _ -> addWarning (Just pos) $ "Could not parse YAML header: " ++ show err' - return $ return nullMeta - updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' } + return nullMeta + updateState $ \st -> st{ stateMeta = stateMeta st <> meta' } return mempty -- ignore fields ending with _ ignorable :: Text -> Bool -ignorable t = (T.pack "_") `T.isSuffixOf` t - -toMetaValue :: ReaderOptions -> Text -> MetaValue -toMetaValue opts x = - case readMarkdown opts (T.unpack x) of - Pandoc _ [Plain xs] -> MetaInlines xs - Pandoc _ [Para xs] +ignorable t = T.pack "_" `T.isSuffixOf` t + +toMetaValue :: ReaderOptions -> Text -> Either PandocError MetaValue +toMetaValue opts x = toMeta <$> readMarkdown opts' (T.unpack x) + where + toMeta p = + case p of + Pandoc _ [Plain xs] -> MetaInlines xs + Pandoc _ [Para xs] | endsWithNewline x -> MetaBlocks [Para xs] | otherwise -> MetaInlines xs - Pandoc _ bs -> MetaBlocks bs - where endsWithNewline t = (T.pack "\n") `T.isSuffixOf` t - -yamlToMeta :: ReaderOptions -> Yaml.Value -> MetaValue + Pandoc _ bs -> MetaBlocks bs + endsWithNewline t = T.pack "\n" `T.isSuffixOf` t + opts' = opts{readerExtensions=readerExtensions opts `Set.difference` meta_exts} + meta_exts = Set.fromList [ Ext_pandoc_title_block + , Ext_mmd_title_block + , Ext_yaml_metadata_block + ] + +yamlToMeta :: ReaderOptions -> Yaml.Value -> Either PandocError MetaValue yamlToMeta opts (Yaml.String t) = toMetaValue opts t yamlToMeta _ (Yaml.Number n) -- avoid decimal points for numbers that don't need them: - | base10Exponent n >= 0 = MetaString $ show + | base10Exponent n >= 0 = return $ MetaString $ show $ coefficient n * (10 ^ base10Exponent n) - | otherwise = MetaString $ show n -yamlToMeta _ (Yaml.Bool b) = MetaBool b -yamlToMeta opts (Yaml.Array xs) = B.toMetaValue $ map (yamlToMeta opts) - $ V.toList xs -yamlToMeta opts (Yaml.Object o) = MetaMap $ H.foldrWithKey (\k v m -> + | otherwise = return $ MetaString $ show n +yamlToMeta _ (Yaml.Bool b) = return $ MetaBool b +yamlToMeta opts (Yaml.Array xs) = B.toMetaValue <$> mapM (yamlToMeta opts) + (V.toList xs) +yamlToMeta opts (Yaml.Object o) = MetaMap <$> H.foldrWithKey (\k v m -> if ignorable k then m - else M.insert (T.unpack k) - (yamlToMeta opts v) m) - M.empty o -yamlToMeta _ _ = MetaString "" + else (do + v' <- yamlToMeta opts v + m' <- m + return (M.insert (T.unpack k) v' m'))) + (return M.empty) o +yamlToMeta _ _ = return $ MetaString "" stopLine :: MarkdownParser () stopLine = try $ (string "---" <|> string "...") >> blankline >> return () @@ -314,17 +328,21 @@ mmdTitleBlock = try $ do guardEnabled Ext_mmd_title_block kvPairs <- many1 kvPair blanklines - updateState $ \st -> st{ stateMeta' = stateMeta' st <> - return (Meta $ M.fromList kvPairs) } + updateState $ \st -> st{ stateMeta = stateMeta st <> + (Meta $ M.fromList kvPairs) } kvPair :: MarkdownParser (String, MetaValue) kvPair = try $ do key <- many1Till (alphaNum <|> oneOf "_- ") (char ':') + skipMany1 spaceNoNewline val <- manyTill anyChar (try $ newline >> lookAhead (blankline <|> nonspaceChar)) + guard $ not . null . trim $ val let key' = concat $ words $ map toLower key let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ trim val return (key',val') + where + spaceNoNewline = satisfy (\x -> isSpace x && (x/='\n') && (x/='\r')) parseMarkdown :: MarkdownParser Pandoc parseMarkdown = do @@ -335,11 +353,11 @@ parseMarkdown = do optional titleBlock blocks <- parseBlocks st <- getState - let meta = runF (stateMeta' st) st - let Pandoc _ bs = B.doc $ runF blocks st + let meta = stateMeta st + let Pandoc _ bs = B.doc blocks return $ Pandoc meta bs -referenceKey :: MarkdownParser (F Blocks) +referenceKey :: MarkdownParser Blocks referenceKey = try $ do pos <- getPosition skipNonindentSpaces @@ -366,7 +384,7 @@ referenceKey = try $ do Just _ -> addWarning (Just pos) $ "Duplicate link reference `" ++ raw ++ "'" Nothing -> return () updateState $ \s -> s { stateKeys = M.insert key target oldkeys } - return $ return mempty + return mempty referenceTitle :: MarkdownParser String referenceTitle = try $ do @@ -386,7 +404,7 @@ quotedTitle c = try $ do -- | PHP Markdown Extra style abbreviation key. Currently -- we just skip them, since Pandoc doesn't have an element for -- an abbreviation. -abbrevKey :: MarkdownParser (F Blocks) +abbrevKey :: MarkdownParser Blocks abbrevKey = do guardEnabled Ext_abbreviations try $ do @@ -395,7 +413,7 @@ abbrevKey = do char ':' skipMany (satisfy (/= '\n')) blanklines - return $ return mempty + return mempty noteMarker :: MarkdownParser String noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']') @@ -413,7 +431,7 @@ rawLines = do rest <- many rawLine return $ unlines (first:rest) -noteBlock :: MarkdownParser (F Blocks) +noteBlock :: MarkdownParser Blocks noteBlock = try $ do pos <- getPosition skipNonindentSpaces @@ -425,7 +443,7 @@ noteBlock = try $ do rest <- many $ try $ blanklines >> indentSpaces >> rawLines let raw = unlines (first:rest) ++ "\n" optional blanklines - parsed <- parseFromString parseBlocks raw + parsed <- parseFromString (inFootnote parseBlocks) raw let newnote = (ref, parsed) oldnotes <- stateNotes' <$> getState case lookup ref oldnotes of @@ -434,32 +452,40 @@ noteBlock = try $ do updateState $ \s -> s { stateNotes' = newnote : oldnotes } return mempty +inFootnote :: MarkdownParser a -> MarkdownParser a +inFootnote p = do + st <- stateInFootnote <$> getState + updateState (\s -> s { stateInFootnote = True } ) + r <- p + updateState (\s -> s { stateInFootnote = st } ) + return r + -- -- parsing blocks -- -parseBlocks :: MarkdownParser (F Blocks) +parseBlocks :: MarkdownParser Blocks parseBlocks = mconcat <$> manyTill block eof -block :: MarkdownParser (F Blocks) +block :: MarkdownParser Blocks block = do tr <- getOption readerTrace pos <- getPosition res <- choice [ mempty <$ blanklines , codeBlockFenced , yamlMetaBlock - , guardEnabled Ext_latex_macros *> (macro >>= return . return) + , guardEnabled Ext_latex_macros *> macro -- note: bulletList needs to be before header because of -- the possibility of empty list items: - , bulletList , header , lhsCodeBlock - , rawTeXBlock , divHtml , htmlBlock , table - , lineBlock , codeBlockIndented + , rawTeXBlock + , lineBlock , blockQuote , hrule , orderedList @@ -470,29 +496,28 @@ block = do , para , plain ] <?> "block" - when tr $ do - st <- getState + when tr $ trace (printf "line %d: %s" (sourceLine pos) - (take 60 $ show $ B.toList $ runF res st)) (return ()) + (take 60 . show . B.toList $ res)) (return ()) return res -- -- header blocks -- -header :: MarkdownParser (F Blocks) +header :: MarkdownParser Blocks header = setextHeader <|> atxHeader <?> "header" -atxHeader :: MarkdownParser (F Blocks) +atxHeader :: MarkdownParser Blocks atxHeader = try $ do - level <- many1 (char '#') >>= return . length + level <- length <$> many1 (char '#') notFollowedBy $ guardEnabled Ext_fancy_lists >> (char '.' <|> char ')') -- this would be a list skipSpaces - text <- trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline) + text <- trimInlines . mconcat <$> many (notFollowedBy atxClosing >> inline) attr <- atxClosing - attr' <- registerHeader attr (runF text defaultParserState) - return $ B.headerWith attr' level <$> text + attr' <- registerHeader attr text + return $ B.headerWith attr' level text atxClosing :: MarkdownParser Attr atxClosing = try $ do @@ -519,25 +544,25 @@ mmdHeaderIdentifier = do skipSpaces return (ident,[],[]) -setextHeader :: MarkdownParser (F Blocks) +setextHeader :: MarkdownParser Blocks setextHeader = try $ do -- This lookahead prevents us from wasting time parsing Inlines -- unless necessary -- it gives a significant performance boost. lookAhead $ anyLine >> many1 (oneOf setextHChars) >> blankline - text <- trimInlinesF . mconcat <$> many1 (notFollowedBy setextHeaderEnd >> inline) + text <- trimInlines . mconcat <$> many1 (notFollowedBy setextHeaderEnd >> inline) attr <- setextHeaderEnd underlineChar <- oneOf setextHChars many (char underlineChar) blanklines - let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1 - attr' <- registerHeader attr (runF text defaultParserState) - return $ B.headerWith attr' level <$> text + let level = (fromMaybe 0 $ elemIndex underlineChar setextHChars) + 1 + attr' <- registerHeader attr text + return $ B.headerWith attr' level text -- -- hrule block -- -hrule :: Parser [Char] st (F Blocks) +hrule :: Monad m => ParserT [Char] st m Blocks hrule = try $ do skipSpaces start <- satisfy isHruleChar @@ -545,24 +570,24 @@ hrule = try $ do skipMany (spaceChar <|> char start) newline optional blanklines - return $ return B.horizontalRule + return B.horizontalRule -- -- code blocks -- indentedLine :: MarkdownParser String -indentedLine = indentSpaces >> anyLine >>= return . (++ "\n") +indentedLine = indentSpaces >> ((++ "\n") <$> anyLine) -blockDelimiter :: (Char -> Bool) +blockDelimiter :: Monad m + => (Char -> Bool) -> Maybe Int - -> Parser [Char] st Int + -> ParserT [Char] st m Int blockDelimiter f len = try $ do c <- lookAhead (satisfy f) case len of Just l -> count l (char c) >> many (char c) >> return l - Nothing -> count 3 (char c) >> many (char c) >>= - return . (+ 3) . length + Nothing -> count 3 (char c) >> ((+ 3) . length <$> many (char c)) attributes :: MarkdownParser Attr attributes = try $ do @@ -607,7 +632,7 @@ specialAttr = do char '-' return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs) -codeBlockFenced :: MarkdownParser (F Blocks) +codeBlockFenced :: MarkdownParser Blocks codeBlockFenced = try $ do c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~')) <|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`')) @@ -619,7 +644,7 @@ codeBlockFenced = try $ do blankline contents <- manyTill anyLine (blockDelimiter (== c) (Just size)) blanklines - return $ return $ B.codeBlockWith attr $ intercalate "\n" contents + return $ B.codeBlockWith attr $ intercalate "\n" contents -- correctly handle github language identifiers toLanguageId :: String -> String @@ -628,7 +653,7 @@ toLanguageId = map toLower . go go "objective-c" = "objectivec" go x = x -codeBlockIndented :: MarkdownParser (F Blocks) +codeBlockIndented :: MarkdownParser Blocks codeBlockIndented = do contents <- many1 (indentedLine <|> try (do b <- blanklines @@ -636,15 +661,15 @@ codeBlockIndented = do return $ b ++ l)) optional blanklines classes <- getOption readerIndentedCodeClasses - return $ return $ B.codeBlockWith ("", classes, []) $ + return $ B.codeBlockWith ("", classes, []) $ stripTrailingNewlines $ concat contents -lhsCodeBlock :: MarkdownParser (F Blocks) +lhsCodeBlock :: MarkdownParser Blocks lhsCodeBlock = do guardEnabled Ext_literate_haskell - (return . B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> + (B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> (lhsCodeBlockBird <|> lhsCodeBlockLaTeX)) - <|> (return . B.codeBlockWith ("",["sourceCode","haskell"],[]) <$> + <|> (B.codeBlockWith ("",["sourceCode","haskell"],[]) <$> lhsCodeBlockInverseBird) lhsCodeBlockLaTeX :: MarkdownParser String @@ -673,7 +698,7 @@ lhsCodeBlockBirdWith c = try $ do blanklines return $ intercalate "\n" lns' -birdTrackLine :: Char -> Parser [Char] st String +birdTrackLine :: Monad m => Char -> ParserT [Char] st m String birdTrackLine c = try $ do char c -- allow html tags on left margin: @@ -701,12 +726,12 @@ emailBlockQuote = try $ do optional blanklines return raw -blockQuote :: MarkdownParser (F Blocks) +blockQuote :: MarkdownParser Blocks blockQuote = do raw <- emailBlockQuote -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n" - return $ B.blockQuote <$> contents + contents <- parseFromString parseBlocks $ intercalate "\n" raw ++ "\n\n" + return $ B.blockQuote contents -- -- list blocks @@ -749,7 +774,7 @@ anyOrderedListStart = try $ do return res listStart :: MarkdownParser () -listStart = bulletListStart <|> (anyOrderedListStart >> return ()) +listStart = bulletListStart <|> void anyOrderedListStart listLine :: MarkdownParser String listLine = try $ do @@ -804,7 +829,7 @@ listContinuationLine = try $ do return $ result ++ "\n" listItem :: MarkdownParser a - -> MarkdownParser (F Blocks) + -> MarkdownParser Blocks listItem start = try $ do first <- rawListItem start continuations <- many listContinuation @@ -820,14 +845,14 @@ listItem start = try $ do updateState (\st -> st {stateParserContext = oldContext}) return contents -orderedList :: MarkdownParser (F Blocks) +orderedList :: MarkdownParser Blocks orderedList = try $ do (start, style, delim) <- lookAhead anyOrderedListStart unless (style `elem` [DefaultStyle, Decimal, Example] && delim `elem` [DefaultDelim, Period]) $ guardEnabled Ext_fancy_lists when (style == Example) $ guardEnabled Ext_example_lists - items <- fmap sequence $ many1 $ listItem + items <- many1 $ listItem ( try $ do optional newline -- if preceded by Plain block in a list startpos <- sourceColumn <$> getPosition @@ -839,12 +864,12 @@ orderedList = try $ do atMostSpaces (tabStop - (endpos - startpos)) return res ) start' <- option 1 $ guardEnabled Ext_startnum >> return start - return $ B.orderedListWith (start', style, delim) <$> fmap compactify' items + return $ B.orderedListWith (start', style, delim) (compactify' items) -bulletList :: MarkdownParser (F Blocks) +bulletList :: MarkdownParser Blocks bulletList = do - items <- fmap sequence $ many1 $ listItem bulletListStart - return $ B.bulletList <$> fmap compactify' items + items <- many1 $ listItem bulletListStart + return $ B.bulletList (compactify' items) -- definition lists @@ -859,14 +884,14 @@ defListMarker = do else mzero return () -definitionListItem :: Bool -> MarkdownParser (F (Inlines, [Blocks])) +definitionListItem :: Bool -> MarkdownParser (Inlines, [Blocks]) definitionListItem compact = try $ do rawLine' <- anyLine raw <- many1 $ defRawBlock compact - term <- parseFromString (trimInlinesF . mconcat <$> many inline) rawLine' + term <- parseFromString (trimInlines . mconcat <$> many inline) rawLine' contents <- mapM (parseFromString parseBlocks) raw optional blanklines - return $ liftM2 (,) term (sequence contents) + return (term, contents) defRawBlock :: Bool -> MarkdownParser String defRawBlock compact = try $ do @@ -889,32 +914,32 @@ defRawBlock compact = try $ do return $ trimr (firstline ++ "\n" ++ unlines rawlines ++ cont) ++ if hasBlank || not (null cont) then "\n\n" else "" -definitionList :: MarkdownParser (F Blocks) +definitionList :: MarkdownParser Blocks definitionList = try $ do lookAhead (anyLine >> optional blankline >> defListMarker) compactDefinitionList <|> normalDefinitionList -compactDefinitionList :: MarkdownParser (F Blocks) +compactDefinitionList :: MarkdownParser Blocks compactDefinitionList = do guardEnabled Ext_compact_definition_lists - items <- fmap sequence $ many1 $ definitionListItem True - return $ B.definitionList <$> fmap compactify'DL items + items <- many1 $ definitionListItem True + return $ B.definitionList (compactify'DL items) -normalDefinitionList :: MarkdownParser (F Blocks) +normalDefinitionList :: MarkdownParser Blocks normalDefinitionList = do guardEnabled Ext_definition_lists - items <- fmap sequence $ many1 $ definitionListItem False - return $ B.definitionList <$> items + items <- many1 $ definitionListItem False + return $ B.definitionList items -- -- paragraph block -- -para :: MarkdownParser (F Blocks) +para :: MarkdownParser Blocks para = try $ do exts <- getOption readerExtensions - result <- trimInlinesF . mconcat <$> many1 inline - option (B.plain <$> result) + result <- trimInlines . mconcat <$> many1 inline + option (B.plain result) $ try $ do newline (blanklines >> return mempty) @@ -931,18 +956,17 @@ para = try $ do Just "div" -> () <$ lookAhead (htmlTag (~== TagClose "div")) _ -> mzero - return $ do - result' <- result - case B.toList result' of + return $ + case B.toList result of [Image alt (src,tit)] | Ext_implicit_figures `Set.member` exts -> -- the fig: at beginning of title indicates a figure - return $ B.para $ B.singleton + B.para $ B.singleton $ Image alt (src,'f':'i':'g':':':tit) - _ -> return $ B.para result' + _ -> B.para result -plain :: MarkdownParser (F Blocks) -plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline +plain :: MarkdownParser Blocks +plain = B.plain . trimInlines . mconcat <$> many1 inline -- -- raw html @@ -953,13 +977,13 @@ htmlElement = rawVerbatimBlock <|> strictHtmlBlock <|> liftM snd (htmlTag isBlockTag) -htmlBlock :: MarkdownParser (F Blocks) +htmlBlock :: MarkdownParser Blocks htmlBlock = do guardEnabled Ext_raw_html try (do (TagOpen t attrs) <- lookAhead $ fst <$> htmlTag isBlockTag (guard (t `elem` ["pre","style","script"]) >> - (return . B.rawBlock "html") <$> rawVerbatimBlock) + B.rawBlock "html" <$> rawVerbatimBlock) <|> (do guardEnabled Ext_markdown_attribute oldMarkdownAttribute <- stateMarkdownAttribute <$> getState markdownAttribute <- @@ -978,35 +1002,35 @@ htmlBlock = do <|> (guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks)) <|> htmlBlock' -htmlBlock' :: MarkdownParser (F Blocks) +htmlBlock' :: MarkdownParser Blocks htmlBlock' = try $ do first <- htmlElement skipMany spaceChar optional blanklines - return $ return $ B.rawBlock "html" first + return $ B.rawBlock "html" first strictHtmlBlock :: MarkdownParser String strictHtmlBlock = htmlInBalanced (not . isInlineTag) rawVerbatimBlock :: MarkdownParser String rawVerbatimBlock = try $ do - (TagOpen tag _, open) <- htmlTag (tagOpen (flip elem - ["pre", "style", "script"]) - (const True)) + (TagOpen tag _, open) <- + htmlTag (tagOpen (`elem` ["pre", "style", "script"]) + (const True)) contents <- manyTill anyChar (htmlTag (~== TagClose tag)) return $ open ++ contents ++ renderTags' [TagClose tag] -rawTeXBlock :: MarkdownParser (F Blocks) +rawTeXBlock :: MarkdownParser Blocks rawTeXBlock = do guardEnabled Ext_raw_tex result <- (B.rawBlock "latex" . concat <$> - rawLaTeXBlock `sepEndBy1` blankline) + generalize rawLaTeXBlock `sepEndBy1` blankline) <|> (B.rawBlock "context" . concat <$> rawConTeXtEnvironment `sepEndBy1` blankline) spaces - return $ return result + return result -rawHtmlBlocks :: MarkdownParser (F Blocks) +rawHtmlBlocks :: MarkdownParser Blocks rawHtmlBlocks = do (TagOpen tagtype _, raw) <- htmlTag isBlockTag -- try to find closing tag @@ -1018,10 +1042,10 @@ rawHtmlBlocks = do contents <- mconcat <$> many (notFollowedBy' closer >> block) result <- (closer >>= \(_, rawcloser) -> return ( - return (B.rawBlock "html" $ stripMarkdownAttribute raw) <> + (B.rawBlock "html" $ stripMarkdownAttribute raw) <> contents <> - return (B.rawBlock "html" rawcloser))) - <|> return (return (B.rawBlock "html" raw) <> contents) + (B.rawBlock "html" rawcloser))) + <|> return (B.rawBlock "html" raw <> contents) updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock } return result @@ -1036,12 +1060,12 @@ stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s -- line block -- -lineBlock :: MarkdownParser (F Blocks) +lineBlock :: MarkdownParser Blocks lineBlock = try $ do guardEnabled Ext_line_blocks lines' <- lineBlockLines >>= - mapM (parseFromString (trimInlinesF . mconcat <$> many inline)) - return $ B.para <$> (mconcat $ intersperse (return B.linebreak) lines') + mapM (parseFromString (trimInlines . mconcat <$> many inline)) + return $ B.para (mconcat $ intersperse B.linebreak lines') -- -- Tables @@ -1049,17 +1073,19 @@ lineBlock = try $ do -- Parse a dashed line with optional trailing spaces; return its length -- and the length including trailing space. -dashedLine :: Char - -> Parser [Char] st (Int, Int) +dashedLine :: Monad m => Char + -> ParserT [Char] st m (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many spaceChar - return $ (length dashes, length $ dashes ++ sp) + let lengthDashes = length dashes + lengthSp = length sp + return (lengthDashes, lengthDashes + lengthSp) -- Parse a table header with dashed lines of '-' preceded by -- one (or zero) line of text. simpleTableHeader :: Bool -- ^ Headerless table - -> MarkdownParser (F [Blocks], [Alignment], [Int]) + -> MarkdownParser ([Blocks], [Alignment], [Int]) simpleTableHeader headless = try $ do rawContent <- if headless then return "" @@ -1078,9 +1104,8 @@ simpleTableHeader headless = try $ do let rawHeads' = if headless then replicate (length dashes) "" else rawHeads - heads <- fmap sequence - $ mapM (parseFromString (mconcat <$> many plain)) - $ map trim rawHeads' + heads <- + mapM (parseFromString (mconcat <$> many plain) . trim) rawHeads' return (heads, aligns, indices) -- Returns an alignment type for a table, based on a list of strings @@ -1121,30 +1146,30 @@ rawTableLine indices = do -- Parse a table line and return a list of lists of blocks (columns). tableLine :: [Int] - -> MarkdownParser (F [Blocks]) + -> MarkdownParser [Blocks] tableLine indices = rawTableLine indices >>= - fmap sequence . mapM (parseFromString (mconcat <$> many plain)) + mapM (parseFromString (mconcat <$> many plain)) -- Parse a multiline table row and return a list of blocks (columns). multilineRow :: [Int] - -> MarkdownParser (F [Blocks]) + -> MarkdownParser [Blocks] multilineRow indices = do colLines <- many1 (rawTableLine indices) let cols = map unlines $ transpose colLines - fmap sequence $ mapM (parseFromString (mconcat <$> many plain)) cols + mapM (parseFromString (mconcat <$> many plain)) cols -- Parses a table caption: inlines beginning with 'Table:' -- and followed by blank lines. -tableCaption :: MarkdownParser (F Inlines) +tableCaption :: MarkdownParser Inlines tableCaption = try $ do guardEnabled Ext_table_captions skipNonindentSpaces string ":" <|> string "Table:" - trimInlinesF . mconcat <$> many1 inline <* blanklines + trimInlines . mconcat <$> many1 inline <* blanklines -- Parse a simple table with '---' header and one line per row. simpleTable :: Bool -- ^ Headerless table - -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) + -> MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]]) simpleTable headless = do (aligns, _widths, heads', lines') <- tableWith (simpleTableHeader headless) tableLine @@ -1158,12 +1183,12 @@ simpleTable headless = do -- which may be multiline, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). multilineTable :: Bool -- ^ Headerless table - -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) + -> MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]]) multilineTable headless = tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter multilineTableHeader :: Bool -- ^ Headerless table - -> MarkdownParser (F [Blocks], [Alignment], [Int]) + -> MarkdownParser ([Blocks], [Alignment], [Int]) multilineTableHeader headless = try $ do unless headless $ tableSep >> notFollowedBy blankline @@ -1185,7 +1210,7 @@ multilineTableHeader headless = try $ do let rawHeads = if headless then replicate (length dashes) "" else map (unlines . map trim) rawHeadsList - heads <- fmap sequence $ + heads <- mapM (parseFromString (mconcat <$> many plain)) $ map trim rawHeads return (heads, aligns, indices) @@ -1195,7 +1220,7 @@ multilineTableHeader headless = try $ do -- which may be grid, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). gridTable :: Bool -- ^ Headerless table - -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) + -> MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]]) gridTable headless = tableWith (gridTableHeader headless) gridTableRow (gridTableSep '-') gridTableFooter @@ -1204,13 +1229,14 @@ gridTableSplitLine :: [Int] -> String -> [String] gridTableSplitLine indices line = map removeFinalBar $ tail $ splitStringByIndices (init indices) $ trimr line -gridPart :: Char -> Parser [Char] st (Int, Int) +gridPart :: Monad m => Char -> ParserT [Char] st m (Int, Int) gridPart ch = do dashes <- many1 (char ch) char '+' - return (length dashes, length dashes + 1) + let lengthDashes = length dashes + return (lengthDashes, lengthDashes + 1) -gridDashedLines :: Char -> Parser [Char] st [(Int,Int)] +gridDashedLines :: Monad m => Char -> ParserT [Char] st m [(Int,Int)] gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline removeFinalBar :: String -> String @@ -1223,7 +1249,7 @@ gridTableSep ch = try $ gridDashedLines ch >> return '\n' -- | Parse header for a grid table. gridTableHeader :: Bool -- ^ Headerless table - -> MarkdownParser (F [Blocks], [Alignment], [Int]) + -> MarkdownParser ([Blocks], [Alignment], [Int]) gridTableHeader headless = try $ do optional blanklines dashes <- gridDashedLines '-' @@ -1232,9 +1258,7 @@ gridTableHeader headless = try $ do else many1 (notFollowedBy (gridTableSep '=') >> char '|' >> many1Till anyChar newline) - if headless - then return () - else gridTableSep '=' >> return () + unless headless (void $ gridTableSep '=') let lines' = map snd dashes let indices = scanl (+) 0 lines' let aligns = replicate (length lines') AlignDefault @@ -1243,7 +1267,7 @@ gridTableHeader headless = try $ do then replicate (length dashes) "" else map (unlines . map trim) $ transpose $ map (gridTableSplitLine indices) rawContent - heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads + heads <- mapM (parseFromString parseBlocks . trim) rawHeads return (heads, aligns, indices) gridTableRawLine :: [Int] -> MarkdownParser [String] @@ -1254,12 +1278,12 @@ gridTableRawLine indices = do -- | Parse row of grid table. gridTableRow :: [Int] - -> MarkdownParser (F [Blocks]) + -> MarkdownParser [Blocks] gridTableRow indices = do colLines <- many1 (gridTableRawLine indices) let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ transpose colLines - fmap compactify' <$> fmap sequence (mapM (parseFromString parseBlocks) cols) + compactify' <$> mapM (parseFromString parseBlocks) cols removeOneLeadingSpace :: [String] -> [String] removeOneLeadingSpace xs = @@ -1285,16 +1309,12 @@ pipeBreak = try $ do blankline return (first:rest) -pipeTable :: MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) +pipeTable :: MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]]) pipeTable = try $ do - (heads,aligns) <- try ( pipeBreak >>= \als -> - return (return $ replicate (length als) mempty, als)) - <|> ( pipeTableRow >>= \row -> pipeBreak >>= \als -> - - return (row, als) ) - lines' <- sequence <$> many1 pipeTableRow + (heads,aligns) <- (,) <$> pipeTableRow <*> pipeBreak + lines' <- many pipeTableRow let widths = replicate (length aligns) 0.0 - return $ (aligns, widths, heads, lines') + return (aligns, widths, heads, lines') sepPipe :: MarkdownParser () sepPipe = try $ do @@ -1302,7 +1322,7 @@ sepPipe = try $ do notFollowedBy blankline -- parse a row, also returning probable alignments for org-table cells -pipeTableRow :: MarkdownParser (F [Blocks]) +pipeTableRow :: MarkdownParser [Blocks] pipeTableRow = do nonindentSpaces openPipe <- (True <$ char '|') <|> return False @@ -1314,16 +1334,14 @@ pipeTableRow = do guard $ not (null rest && not openPipe) optional (char '|') blankline - let cells = sequence (first:rest) - return $ do - cells' <- cells - return $ map - (\ils -> + let cells = first:rest + return $ + map (\ils -> case trimInlines ils of ils' | B.isNull ils' -> mempty - | otherwise -> B.plain $ ils') cells' + | otherwise -> B.plain ils') cells -pipeTableHeaderPart :: Parser [Char] st Alignment +pipeTableHeaderPart :: Monad m => ParserT [Char] st m Alignment pipeTableHeaderPart = try $ do skipMany spaceChar left <- optionMaybe (char ':') @@ -1338,7 +1356,7 @@ pipeTableHeaderPart = try $ do (Just _,Just _) -> AlignCenter -- Succeed only if current line contains a pipe. -scanForPipe :: Parser [Char] st () +scanForPipe :: Monad m => ParserT [Char] st m () scanForPipe = do inp <- getInput case break (\c -> c == '\n' || c == '|') inp of @@ -1348,22 +1366,22 @@ scanForPipe = do -- | Parse a table using 'headerParser', 'rowParser', -- 'lineParser', and 'footerParser'. Variant of the version in -- Text.Pandoc.Parsing. -tableWith :: MarkdownParser (F [Blocks], [Alignment], [Int]) - -> ([Int] -> MarkdownParser (F [Blocks])) +tableWith :: MarkdownParser ([Blocks], [Alignment], [Int]) + -> ([Int] -> MarkdownParser [Blocks]) -> MarkdownParser sep -> MarkdownParser end - -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) + -> MarkdownParser ([Alignment], [Double], [Blocks], [[Blocks]]) tableWith headerParser rowParser lineParser footerParser = try $ do (heads, aligns, indices) <- headerParser - lines' <- fmap sequence $ rowParser indices `sepEndBy1` lineParser + lines' <- rowParser indices `sepEndBy1` lineParser footerParser numColumns <- getOption readerColumns - let widths = if (indices == []) - then replicate (length aligns) 0.0 - else widthsFromIndices numColumns indices - return $ (aligns, widths, heads, lines') + let widths = case indices of + [] -> replicate (length aligns) 0.0 + _ -> widthsFromIndices numColumns indices + return (aligns, widths, heads, lines') -table :: MarkdownParser (F Blocks) +table :: MarkdownParser Blocks table = try $ do frontCaption <- option Nothing (Just <$> tableCaption) (aligns, widths, heads, lns) <- @@ -1378,19 +1396,15 @@ table = try $ do (gridTable False <|> gridTable True)) <?> "table" optional blanklines caption <- case frontCaption of - Nothing -> option (return mempty) tableCaption + Nothing -> option mempty tableCaption Just c -> return c - return $ do - caption' <- caption - heads' <- heads - lns' <- lns - return $ B.table caption' (zip aligns widths) heads' lns' + return $ B.table caption (zip aligns widths) heads lns -- -- inline -- -inline :: MarkdownParser (F Inlines) +inline :: MarkdownParser Inlines inline = choice [ whitespace , bareURL , str @@ -1413,7 +1427,7 @@ inline = choice [ whitespace , rawLaTeXInline' , exampleRef , smart - , return . B.singleton <$> charRef + , B.singleton <$> charRef , symbol , ltSign ] <?> "inline" @@ -1424,43 +1438,42 @@ escapedChar' = try $ do (guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum)) <|> oneOf "\\`*_{}[]()>#+-.!~\"" -escapedChar :: MarkdownParser (F Inlines) +escapedChar :: MarkdownParser Inlines escapedChar = do result <- escapedChar' case result of - ' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space + ' ' -> return $ B.str "\160" -- "\ " is a nonbreaking space '\n' -> guardEnabled Ext_escaped_line_breaks >> - return (return B.linebreak) -- "\[newline]" is a linebreak - _ -> return $ return $ B.str [result] + return B.linebreak -- "\[newline]" is a linebreak + _ -> return $ B.str [result] -ltSign :: MarkdownParser (F Inlines) +ltSign :: MarkdownParser Inlines ltSign = do guardDisabled Ext_raw_html <|> (notFollowedByHtmlCloser >> notFollowedBy' (htmlTag isBlockTag)) char '<' - return $ return $ B.str "<" + return $ B.str "<" -exampleRef :: MarkdownParser (F Inlines) +exampleRef :: MarkdownParser Inlines exampleRef = try $ do guardEnabled Ext_example_lists char '@' lab <- many1 (alphaNum <|> oneOf "-_") - return $ do - st <- askF - return $ case M.lookup lab (stateExamples st) of - Just n -> B.str (show n) - Nothing -> B.str ('@':lab) + st <- ask + return $ case M.lookup lab (stateExamples st) of + Just n -> B.str (show n) + Nothing -> B.str ('@':lab) -symbol :: MarkdownParser (F Inlines) +symbol :: MarkdownParser Inlines symbol = do result <- noneOf "<\\\n\t " <|> try (do lookAhead $ char '\\' notFollowedBy' (() <$ rawTeXBlock) char '\\') - return $ return $ B.str [result] + return $ B.str [result] -- parses inline code, between n `s and n `s -code :: MarkdownParser (F Inlines) +code :: MarkdownParser Inlines code = try $ do starts <- many1 (char '`') skipSpaces @@ -1470,16 +1483,17 @@ code = try $ do notFollowedBy (char '`'))) attr <- option ([],[],[]) (try $ guardEnabled Ext_inline_code_attributes >> optional whitespace >> attributes) - return $ return $ B.codeWith attr $ trim $ concat result + return $ B.codeWith attr $ trim $ concat result -math :: MarkdownParser (F Inlines) -math = (return . B.displayMath <$> (mathDisplay >>= applyMacros')) - <|> (return . B.math <$> (mathInline >>= applyMacros')) +math :: MarkdownParser Inlines +math = (B.displayMath <$> (mathDisplay >>= applyMacros')) + <|> ((B.math <$> (mathInline >>= applyMacros')) <+?> + ((getOption readerSmart >>= guard) *> apostrophe <* notFollowedBy space)) -- Parses material enclosed in *s, **s, _s, or __s. -- Designed to avoid backtracking. enclosure :: Char - -> MarkdownParser (F Inlines) + -> MarkdownParser Inlines enclosure c = do -- we can't start an enclosure with _ if after a string and -- the intraword_underscores extension is enabled: @@ -1487,13 +1501,13 @@ enclosure c = do <|> guard (c == '*') <|> (guard =<< notAfterString) cs <- many1 (char c) - (return (B.str cs) <>) <$> whitespace - <|> do + (B.str cs <>) <$> whitespace + <|> case length cs of 3 -> three c 2 -> two c mempty 1 -> one c mempty - _ -> return (return $ B.str cs) + _ -> return $ B.str cs ender :: Char -> Int -> MarkdownParser () ender c n = try $ do @@ -1506,74 +1520,74 @@ ender c n = try $ do -- If one c, emit emph and then parse two. -- If two cs, emit strong and then parse one. -- Otherwise, emit ccc then the results. -three :: Char -> MarkdownParser (F Inlines) +three :: Char -> MarkdownParser Inlines three c = do contents <- mconcat <$> many (notFollowedBy (ender c 1) >> inline) - (ender c 3 >> return ((B.strong . B.emph) <$> contents)) - <|> (ender c 2 >> one c (B.strong <$> contents)) - <|> (ender c 1 >> two c (B.emph <$> contents)) - <|> return (return (B.str [c,c,c]) <> contents) + (ender c 3 >> return ((B.strong . B.emph) contents)) + <|> (ender c 2 >> one c (B.strong contents)) + <|> (ender c 1 >> two c (B.emph contents)) + <|> return (B.str [c,c,c] <> contents) -- Parse inlines til you hit two c's, and emit strong. -- If you never do hit two cs, emit ** plus inlines parsed. -two :: Char -> F Inlines -> MarkdownParser (F Inlines) +two :: Char -> Inlines -> MarkdownParser Inlines two c prefix' = do contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline) - (ender c 2 >> return (B.strong <$> (prefix' <> contents))) - <|> return (return (B.str [c,c]) <> (prefix' <> contents)) + (ender c 2 >> return (B.strong (prefix' <> contents))) + <|> return (B.str [c,c] <> (prefix' <> contents)) -- Parse inlines til you hit a c, and emit emph. -- If you never hit a c, emit * plus inlines parsed. -one :: Char -> F Inlines -> MarkdownParser (F Inlines) +one :: Char -> Inlines -> MarkdownParser Inlines one c prefix' = do contents <- mconcat <$> many ( (notFollowedBy (ender c 1) >> inline) <|> try (string [c,c] >> notFollowedBy (ender c 1) >> two c mempty) ) - (ender c 1 >> return (B.emph <$> (prefix' <> contents))) - <|> return (return (B.str [c]) <> (prefix' <> contents)) + (ender c 1 >> return (B.emph (prefix' <> contents))) + <|> return (B.str [c] <> (prefix' <> contents)) -strongOrEmph :: MarkdownParser (F Inlines) +strongOrEmph :: MarkdownParser Inlines strongOrEmph = enclosure '*' <|> enclosure '_' --- | Parses a list of inlines between start and end delimiters. +-- | Parses a list oInlines between start and end delimiters. inlinesBetween :: (Show b) => MarkdownParser a -> MarkdownParser b - -> MarkdownParser (F Inlines) + -> MarkdownParser Inlines inlinesBetween start end = - (trimInlinesF . mconcat) <$> try (start >> many1Till inner end) + (trimInlines . mconcat) <$> try (start >> many1Till inner end) where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline) innerSpace = try $ whitespace <* notFollowedBy' end -strikeout :: MarkdownParser (F Inlines) -strikeout = fmap B.strikeout <$> +strikeout :: MarkdownParser Inlines +strikeout = B.strikeout <$> (guardEnabled Ext_strikeout >> inlinesBetween strikeStart strikeEnd) where strikeStart = string "~~" >> lookAhead nonspaceChar >> notFollowedBy (char '~') strikeEnd = try $ string "~~" -superscript :: MarkdownParser (F Inlines) -superscript = fmap B.superscript <$> try (do +superscript :: MarkdownParser Inlines +superscript = B.superscript <$> try (do guardEnabled Ext_superscript char '^' mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '^')) -subscript :: MarkdownParser (F Inlines) -subscript = fmap B.subscript <$> try (do +subscript :: MarkdownParser Inlines +subscript = B.subscript <$> try (do guardEnabled Ext_subscript char '~' mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '~')) -whitespace :: MarkdownParser (F Inlines) -whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace" +whitespace :: MarkdownParser Inlines +whitespace = spaceChar >> (lb <|> regsp) <?> "whitespace" where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak) regsp = skipMany spaceChar >> return B.space -nonEndline :: Parser [Char] st Char +nonEndline :: Monad m => ParserT [Char] st m Char nonEndline = satisfy (/='\n') -str :: MarkdownParser (F Inlines) +str :: MarkdownParser Inlines str = do result <- many1 alphaNum updateLastStrPos @@ -1581,14 +1595,14 @@ str = do isSmart <- getOption readerSmart if isSmart then case likelyAbbrev result of - [] -> return $ return $ B.str result + [] -> return $ B.str result xs -> choice (map (\x -> try (string x >> oneOf " \n" >> lookAhead alphaNum >> - return (return $ B.str - $ result ++ spacesToNbr x ++ "\160"))) xs) - <|> (return $ return $ B.str result) - else return $ return $ B.str result + return (B.str $ + result ++ spacesToNbr x ++ "\160"))) xs) + <|> (return $ B.str result) + else return $ B.str result -- | if the string matches the beginning of an abbreviation (before -- the first period, return strings that would finish the abbreviation. @@ -1603,7 +1617,7 @@ likelyAbbrev x = in map snd $ filter (\(y,_) -> y == x) abbrPairs -- an endline character that can be treated as a space, not a structural break -endline :: MarkdownParser (F Inlines) +endline :: MarkdownParser Inlines endline = try $ do newline notFollowedBy blankline @@ -1616,18 +1630,18 @@ endline = try $ do notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced)) notFollowedByHtmlCloser (eof >> return mempty) - <|> (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak)) + <|> (guardEnabled Ext_hard_line_breaks >> return B.linebreak) <|> (guardEnabled Ext_ignore_line_breaks >> return mempty) - <|> (return $ return B.space) + <|> return B.space -- -- links -- -- a reference label for a link -reference :: MarkdownParser (F Inlines, String) +reference :: MarkdownParser (Inlines, String) reference = do notFollowedBy' (string "[^") -- footnote reference - withRaw $ trimInlinesF <$> inlinesInBalancedBrackets + withRaw $ trimInlines <$> inlinesInBalancedBrackets parenthesizedChars :: MarkdownParser [Char] parenthesizedChars = do @@ -1655,7 +1669,7 @@ source = do linkTitle :: MarkdownParser String linkTitle = quotedTitle '"' <|> quotedTitle '\'' -link :: MarkdownParser (F Inlines) +link :: MarkdownParser Inlines link = try $ do st <- getState guard $ stateAllowLinks st @@ -1665,44 +1679,43 @@ link = try $ do regLink B.link lab <|> referenceLink B.link (lab,raw) regLink :: (String -> String -> Inlines -> Inlines) - -> F Inlines -> MarkdownParser (F Inlines) + -> Inlines -> MarkdownParser Inlines regLink constructor lab = try $ do (src, tit) <- source - return $ constructor src tit <$> lab + return $ constructor src tit lab -- a link like [this][ref] or [this][] or [this] referenceLink :: (String -> String -> Inlines -> Inlines) - -> (F Inlines, String) -> MarkdownParser (F Inlines) + -> (Inlines, String) -> MarkdownParser Inlines referenceLink constructor (lab, raw) = do sp <- (True <$ lookAhead (char ' ')) <|> return False (ref,raw') <- option (mempty, "") $ lookAhead (try (spnl >> normalCite >> return (mempty, ""))) <|> try (spnl >> reference) + when (raw' == "") $ guardEnabled Ext_shortcut_reference_links let labIsRef = raw' == "" || raw' == "[]" let key = toKey $ if labIsRef then raw else raw' parsedRaw <- parseFromString (mconcat <$> many inline) raw' fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw implicitHeaderRefs <- option False $ True <$ guardEnabled Ext_implicit_header_references - let makeFallback = do - parsedRaw' <- parsedRaw - fallback' <- fallback - return $ B.str "[" <> fallback' <> B.str "]" <> + let makeFallback = + B.str "[" <> fallback <> B.str "]" <> (if sp && not (null raw) then B.space else mempty) <> - parsedRaw' - return $ do - keys <- asksF stateKeys - case M.lookup key keys of - Nothing -> do - headers <- asksF stateHeaders - ref' <- if labIsRef then lab else ref - if implicitHeaderRefs - then case M.lookup ref' headers of - Just ident -> constructor ('#':ident) "" <$> lab - Nothing -> makeFallback - else makeFallback - Just (src,tit) -> constructor src tit <$> lab + parsedRaw + keys <- asks stateKeys + headers <- asks stateHeaders + return $ + case M.lookup key keys of + Nothing -> + let ref' = if labIsRef then lab else ref in + if implicitHeaderRefs + then case M.lookup ref' headers of + Just ident -> constructor ('#':ident) "" lab + Nothing -> makeFallback + else makeFallback + Just (src,tit) -> constructor src tit lab dropBrackets :: String -> String dropBrackets = reverse . dropRB . reverse . dropLB @@ -1711,14 +1724,14 @@ dropBrackets = reverse . dropRB . reverse . dropLB dropLB ('[':xs) = xs dropLB xs = xs -bareURL :: MarkdownParser (F Inlines) +bareURL :: MarkdownParser Inlines bareURL = try $ do guardEnabled Ext_autolink_bare_uris (orig, src) <- uri <|> emailAddress notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a") - return $ return $ B.link src "" (B.str orig) + return $ B.link src "" (B.str orig) -autoLink :: MarkdownParser (F Inlines) +autoLink :: MarkdownParser Inlines autoLink = try $ do char '<' (orig, src) <- uri <|> emailAddress @@ -1727,9 +1740,9 @@ autoLink = try $ do -- final punctuation. for example: in `<http://hi---there>`, -- the URI parser will stop before the dashes. extra <- fromEntities <$> manyTill nonspaceChar (char '>') - return $ return $ B.link (src ++ escapeURI extra) "" (B.str $ orig ++ extra) + return $ B.link (src ++ escapeURI extra) "" (B.str $ orig ++ extra) -image :: MarkdownParser (F Inlines) +image :: MarkdownParser Inlines image = try $ do char '!' (lab,raw) <- reference @@ -1739,38 +1752,33 @@ image = try $ do _ -> B.image src regLink constructor lab <|> referenceLink constructor (lab,raw) -note :: MarkdownParser (F Inlines) +note :: MarkdownParser Inlines note = try $ do guardEnabled Ext_footnotes + (stateInFootnote <$> getState) >>= guard . not ref <- noteMarker - return $ do - notes <- asksF stateNotes' + notes <- asks stateNotes' + return $ case lookup ref notes of - Nothing -> return $ B.str $ "[^" ++ ref ++ "]" - Just contents -> do - st <- askF - -- process the note in a context that doesn't resolve - -- notes, to avoid infinite looping with notes inside - -- notes: - let contents' = runF contents st{ stateNotes' = [] } - return $ B.note contents' - -inlineNote :: MarkdownParser (F Inlines) + Nothing -> B.str $ "[^" ++ ref ++ "]" + Just contents -> B.note contents + +inlineNote :: MarkdownParser Inlines inlineNote = try $ do guardEnabled Ext_inline_notes char '^' contents <- inlinesInBalancedBrackets - return $ B.note . B.para <$> contents + return . B.note . B.para $ contents -rawLaTeXInline' :: MarkdownParser (F Inlines) +rawLaTeXInline' :: MarkdownParser Inlines rawLaTeXInline' = try $ do guardEnabled Ext_raw_tex lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env - RawInline _ s <- rawLaTeXInline - return $ return $ B.rawInline "tex" s + RawInline _ s <- generalize rawLaTeXInline + return $ B.rawInline "tex" s -- "tex" because it might be context or latex -rawConTeXtEnvironment :: Parser [Char] st String +rawConTeXtEnvironment :: Monad m => ParserT [Char] st m String rawConTeXtEnvironment = try $ do string "\\start" completion <- inBrackets (letter <|> digit <|> spaceChar) @@ -1779,14 +1787,14 @@ rawConTeXtEnvironment = try $ do (try $ string "\\stop" >> string completion) return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion -inBrackets :: (Parser [Char] st Char) -> Parser [Char] st String +inBrackets :: Monad m => (ParserT [Char] st m Char) -> ParserT [Char] st m String inBrackets parser = do char '[' contents <- many parser char ']' return $ "[" ++ contents ++ "]" -spanHtml :: MarkdownParser (F Inlines) +spanHtml :: MarkdownParser Inlines spanHtml = try $ do guardEnabled Ext_native_spans (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" []) @@ -1798,10 +1806,10 @@ spanHtml = try $ do Just s | null ident && null classes && map toLower (filter (`notElem` " \t;") s) == "font-variant:small-caps" - -> return $ B.smallcaps <$> contents - _ -> return $ B.spanWith (ident, classes, keyvals) <$> contents + -> return $ B.smallcaps contents + _ -> return $ B.spanWith (ident, classes, keyvals) contents -divHtml :: MarkdownParser (F Blocks) +divHtml :: MarkdownParser Blocks divHtml = try $ do guardEnabled Ext_native_divs (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" []) @@ -1819,11 +1827,11 @@ divHtml = try $ do let ident = fromMaybe "" $ lookup "id" attrs let classes = maybe [] words $ lookup "class" attrs let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] - return $ B.divWith (ident, classes, keyvals) <$> contents + return $ B.divWith (ident, classes, keyvals) contents else -- avoid backtracing - return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents + return $ B.rawBlock "html" (rawtag <> bls) <> contents -rawHtmlInline :: MarkdownParser (F Inlines) +rawHtmlInline :: MarkdownParser Inlines rawHtmlInline = do guardEnabled Ext_raw_html inHtmlBlock <- stateInHtmlBlock <$> getState @@ -1838,19 +1846,17 @@ rawHtmlInline = do then (\x -> isInlineTag x && not (isCloseBlockTag x)) else not . isTextTag - return $ return $ B.rawInline "html" result + return $ B.rawInline "html" result -- Citations -cite :: MarkdownParser (F Inlines) +cite :: MarkdownParser Inlines cite = do guardEnabled Ext_citations - citations <- textualCite - <|> do (cs, raw) <- withRaw normalCite - return $ (flip B.cite (B.text raw)) <$> cs - return citations + textualCite <|> do (cs, raw) <- withRaw normalCite + return $ B.cite cs (B.text raw) -textualCite :: MarkdownParser (F Inlines) +textualCite :: MarkdownParser Inlines textualCite = try $ do (_, key) <- citeKey let first = Citation{ citationId = key @@ -1864,29 +1870,26 @@ textualCite = try $ do case mbrest of Just (rest, raw) -> return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw) . (first:)) - <$> rest + rest Nothing -> (do (cs, raw) <- withRaw $ bareloc first - return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw)) <$> cs) - <|> return (do st <- askF - return $ case M.lookup key (stateExamples st) of - Just n -> B.str (show n) - _ -> B.cite [first] $ B.str $ '@':key) + return $ B.cite cs (B.text $ '@':key ++ " " ++ raw)) + <|> do st <- ask + return $ case M.lookup key (stateExamples st) of + Just n -> B.str (show n) + _ -> B.cite [first] $ B.str $ '@':key -bareloc :: Citation -> MarkdownParser (F [Citation]) +bareloc :: Citation -> MarkdownParser [Citation] bareloc c = try $ do spnl char '[' suff <- suffix - rest <- option (return []) $ try $ char ';' >> citeList + rest <- option [] $ try $ char ';' >> citeList spnl char ']' - return $ do - suff' <- suff - rest' <- rest - return $ c{ citationSuffix = B.toList suff' } : rest' + return $ c{ citationSuffix = B.toList suff } : rest -normalCite :: MarkdownParser (F [Citation]) +normalCite :: MarkdownParser [Citation] normalCite = try $ do char '[' spnl @@ -1895,60 +1898,57 @@ normalCite = try $ do char ']' return citations -suffix :: MarkdownParser (F Inlines) +suffix :: MarkdownParser Inlines suffix = try $ do hasSpace <- option False (notFollowedBy nonspaceChar >> return True) spnl - rest <- trimInlinesF . mconcat <$> many (notFollowedBy (oneOf ";]") >> inline) + rest <- trimInlines . mconcat <$> many (notFollowedBy (oneOf ";]") >> inline) return $ if hasSpace - then (B.space <>) <$> rest + then B.space <> rest else rest -prefix :: MarkdownParser (F Inlines) -prefix = trimInlinesF . mconcat <$> +prefix :: MarkdownParser Inlines +prefix = trimInlines . mconcat <$> manyTill inline (char ']' <|> liftM (const ']') (lookAhead citeKey)) -citeList :: MarkdownParser (F [Citation]) -citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl) +citeList :: MarkdownParser [Citation] +citeList = sepBy1 citation (try $ char ';' >> spnl) -citation :: MarkdownParser (F Citation) +citation :: MarkdownParser Citation citation = try $ do pref <- prefix (suppress_author, key) <- citeKey suff <- suffix - return $ do - x <- pref - y <- suff - return $ Citation{ citationId = key - , citationPrefix = B.toList x - , citationSuffix = B.toList y - , citationMode = if suppress_author - then SuppressAuthor - else NormalCitation - , citationNoteNum = 0 - , citationHash = 0 - } - -smart :: MarkdownParser (F Inlines) + return Citation{ citationId = key + , citationPrefix = B.toList pref + , citationSuffix = B.toList suff + , citationMode = if suppress_author + then SuppressAuthor + else NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } + +smart :: MarkdownParser Inlines smart = do getOption readerSmart >>= guard doubleQuoted <|> singleQuoted <|> - choice (map (return <$>) [apostrophe, dash, ellipses]) + choice [apostrophe, dash, ellipses] -singleQuoted :: MarkdownParser (F Inlines) +singleQuoted :: MarkdownParser Inlines singleQuoted = try $ do singleQuoteStart withQuoteContext InSingleQuote $ - fmap B.singleQuoted . trimInlinesF . mconcat <$> + B.singleQuoted . trimInlines . mconcat <$> many1Till inline singleQuoteEnd -- doubleQuoted will handle regular double-quoted sections, as well -- as dialogues with an open double-quote without a close double-quote -- in the same paragraph. -doubleQuoted :: MarkdownParser (F Inlines) +doubleQuoted :: MarkdownParser Inlines doubleQuoted = try $ do doubleQuoteStart contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) - (withQuoteContext InDoubleQuote $ doubleQuoteEnd >> return - (fmap B.doubleQuoted . trimInlinesF $ contents)) - <|> (return $ return (B.str "\8220") <> contents) + (withQuoteContext InDoubleQuote doubleQuoteEnd >> return + (B.doubleQuoted . trimInlines $ contents)) + <|> return (B.str "\8220" <> contents) diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index e43b8a86c..939d10fb2 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -58,21 +58,21 @@ import Data.Maybe (fromMaybe) import Text.Printf (printf) import Debug.Trace (trace) +import Text.Pandoc.Error + -- | Read mediawiki from an input string and return a Pandoc document. readMediaWiki :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readMediaWiki opts s = - case runParser parseMediaWiki MWState{ mwOptions = opts + readWith parseMediaWiki MWState{ mwOptions = opts , mwMaxNestingLevel = 4 , mwNextLinkNumber = 1 , mwCategoryLinks = [] , mwHeaderMap = M.empty , mwIdentifierList = [] } - "source" (s ++ "\n") of - Left err' -> error $ "\nError:\n" ++ show err' - Right result -> result + (s ++ "\n") data MWState = MWState { mwOptions :: ReaderOptions , mwMaxNestingLevel :: Int @@ -593,11 +593,17 @@ imageOption = <|> try (many1 (oneOf "x0123456789") <* string "px") <|> try (oneOfStrings ["link=","alt=","page=","class="] <* many (noneOf "|]")) +collapseUnderscores :: String -> String +collapseUnderscores [] = [] +collapseUnderscores ('_':'_':xs) = collapseUnderscores ('_':xs) +collapseUnderscores (x:xs) = x : collapseUnderscores xs + +addUnderscores :: String -> String +addUnderscores = collapseUnderscores . intercalate "_" . words + internalLink :: MWParser Inlines internalLink = try $ do sym "[[" - let addUnderscores x = let (pref,suff) = break (=='#') x - in pref ++ intercalate "_" (words suff) pagename <- unwords . words <$> many (noneOf "|]") label <- option (B.text pagename) $ char '|' *> ( (mconcat <$> many1 (notFollowedBy (char ']') *> inline)) diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs index f4dfa62c1..fc6b3362a 100644 --- a/src/Text/Pandoc/Readers/Native.hs +++ b/src/Text/Pandoc/Readers/Native.hs @@ -3,7 +3,7 @@ Copyright (C) 2011-2014 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or +the Free Software Foundation; Either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, @@ -33,6 +33,9 @@ module Text.Pandoc.Readers.Native ( readNative ) where import Text.Pandoc.Definition import Text.Pandoc.Shared (safeRead) +import Text.Pandoc.Error +import Control.Applicative + -- | Read native formatted text and return a Pandoc document. -- The input may be a full pandoc document, a block list, a block, -- an inline list, or an inline. Thus, for example, @@ -44,33 +47,18 @@ import Text.Pandoc.Shared (safeRead) -- > Pandoc nullMeta [Plain [Str "hi"]] -- readNative :: String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc -readNative s = - case safeRead s of - Just d -> d - Nothing -> Pandoc nullMeta $ readBlocks s + -> Either PandocError Pandoc +readNative s = maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) -readBlocks :: String -> [Block] -readBlocks s = - case safeRead s of - Just d -> d - Nothing -> [readBlock s] +readBlocks :: String -> Either PandocError [Block] +readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s) -readBlock :: String -> Block -readBlock s = - case safeRead s of - Just d -> d - Nothing -> Plain $ readInlines s +readBlock :: String -> Either PandocError Block +readBlock s = maybe (Plain <$> readInlines s) Right (safeRead s) -readInlines :: String -> [Inline] -readInlines s = - case safeRead s of - Just d -> d - Nothing -> [readInline s] +readInlines :: String -> Either PandocError [Inline] +readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead s) -readInline :: String -> Inline -readInline s = - case safeRead s of - Just d -> d - Nothing -> error "Cannot parse document" +readInline :: String -> Either PandocError Inline +readInline s = maybe (Left . ParseFailure $ "Could not read: " ++ s) Right (safeRead s) diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index 35d01e877..19ddba36b 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE FlexibleContexts #-} module Text.Pandoc.Readers.OPML ( readOPML ) where import Data.Char (toUpper) import Text.Pandoc.Options @@ -11,8 +12,11 @@ import Data.Generics import Data.Monoid import Control.Monad.State import Control.Applicative ((<$>), (<$)) +import Data.Default +import Text.Pandoc.Compat.Except +import Text.Pandoc.Error -type OPML = State OPMLState +type OPML = ExceptT PandocError (State OPMLState) data OPMLState = OPMLState{ opmlSectionLevel :: Int @@ -21,17 +25,19 @@ data OPMLState = OPMLState{ , opmlDocDate :: Inlines } deriving Show -readOPML :: ReaderOptions -> String -> Pandoc +instance Default OPMLState where + def = OPMLState{ opmlSectionLevel = 0 + , opmlDocTitle = mempty + , opmlDocAuthors = [] + , opmlDocDate = mempty + } + +readOPML :: ReaderOptions -> String -> Either PandocError Pandoc readOPML _ inp = setTitle (opmlDocTitle st') - $ setAuthors (opmlDocAuthors st') - $ setDate (opmlDocDate st') - $ doc $ mconcat bs - where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp) - OPMLState{ opmlSectionLevel = 0 - , opmlDocTitle = mempty - , opmlDocAuthors = [] - , opmlDocDate = mempty - } + . setAuthors (opmlDocAuthors st') + . setDate (opmlDocDate st') + . doc . mconcat <$> bs + where (bs, st') = flip runState def . runExceptT $ (mapM parseBlock $ normalizeTree $ parseXML inp) -- normalize input, consolidating adjacent Text and CRef elements normalizeTree :: [Content] -> [Content] @@ -58,14 +64,16 @@ attrValue attr elt = Just z -> z Nothing -> "" -asHtml :: String -> Inlines -asHtml s = case readHtml def s of - Pandoc _ [Plain ils] -> fromList ils - _ -> mempty +exceptT :: Either PandocError a -> OPML a +exceptT = either throwError return + +asHtml :: String -> OPML Inlines +asHtml s = (\(Pandoc _ bs) -> case bs of + [Plain ils] -> fromList ils + _ -> mempty) <$> exceptT (readHtml def s) -asMarkdown :: String -> Blocks -asMarkdown s = fromList bs - where Pandoc _ bs = readMarkdown def s +asMarkdown :: String -> OPML Blocks +asMarkdown s = (\(Pandoc _ bs) -> fromList bs) <$> exceptT (readMarkdown def s) getBlocks :: Element -> OPML Blocks getBlocks e = mconcat <$> (mapM parseBlock $ elContent e) @@ -82,8 +90,8 @@ parseBlock (Elem e) = "outline" -> gets opmlSectionLevel >>= sect . (+1) "?xml" -> return mempty _ -> getBlocks e - where sect n = do let headerText = asHtml $ attrValue "text" e - let noteBlocks = asMarkdown $ attrValue "_note" e + where sect n = do headerText <- asHtml $ attrValue "text" e + noteBlocks <- asMarkdown $ attrValue "_note" e modify $ \st -> st{ opmlSectionLevel = n } bs <- getBlocks e modify $ \st -> st{ opmlSectionLevel = n - 1 } diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index f16aed48d..fc63cc11e 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -1,6 +1,9 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE MultiParamTypeClasses #-} {- Copyright (C) 2014 Albert Krewinkel <tarleb@moltkeplatz.de> @@ -36,8 +39,7 @@ import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..), (<>) import Text.Pandoc.Definition import Text.Pandoc.Options import qualified Text.Pandoc.Parsing as P -import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF - , newline, orderedListMarker +import Text.Pandoc.Parsing hiding ( newline, orderedListMarker , parseFromString, blanklines ) import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) @@ -45,34 +47,47 @@ import Text.Pandoc.Shared (compactify', compactify'DL) import Text.TeXMath (readTeX, writePandoc, DisplayType(..)) import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap -import Control.Applicative ( Applicative, pure +import Control.Applicative ( pure , (<$>), (<$), (<*>), (<*), (*>) ) import Control.Arrow (first) -import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when) -import Control.Monad.Reader (Reader, runReader, ask, asks) +import Control.Monad (guard, mplus, mzero, when) +import Control.Monad.Reader (Reader, runReader, asks, local) import Data.Char (isAlphaNum, toLower) import Data.Default -import Data.List (intersperse, isPrefixOf, isSuffixOf) +import Data.List (intersperse, isPrefixOf, isSuffixOf, foldl') import qualified Data.Map as M import Data.Maybe (fromMaybe, isJust) -import Data.Monoid (Monoid, mconcat, mempty, mappend) +import Data.Monoid (mconcat, mempty, mappend) import Network.HTTP (urlEncode) +import Text.Pandoc.Error + -- | Parse org-mode string and return a Pandoc document. readOrg :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc -readOrg opts s = readWith parseOrg def{ orgStateOptions = opts } (s ++ "\n\n") + -> Either PandocError Pandoc +readOrg opts s = runOrg opts s parseOrg + +data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext + , finalState :: OrgParserState } + +type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal) -type OrgParser = Parser [Char] OrgParserState +runOrg :: ReaderOptions -> String -> OrgParser a -> Either PandocError a +runOrg opts inp p = fst <$> res + where + imd = readWithM (returnState p) def{ orgStateOptions = opts } (inp ++ "\n\n") + res = runReader imd def { finalState = s } + s :: OrgParserState + s = either def snd res parseOrg :: OrgParser Pandoc parseOrg = do blocks' <- parseBlocks st <- getState - let meta = runF (orgStateMeta' st) st + let meta = orgStateMeta st let removeUnwantedBlocks = dropCommentTrees . filter (/= Null) - return $ Pandoc meta $ removeUnwantedBlocks (B.toList $ runF blocks' st) + return $ Pandoc meta $ removeUnwantedBlocks (B.toList $ blocks') -- | Drop COMMENT headers and the document tree below those headers. dropCommentTrees :: [Block] -> [Block] @@ -102,7 +117,7 @@ isHeaderLevelLowerEq n blk = -- Parser State for Org -- -type OrgNoteRecord = (String, F Blocks) +type OrgNoteRecord = (String, Blocks) type OrgNoteTable = [OrgNoteRecord] type OrgBlockAttributes = M.Map String String @@ -121,10 +136,12 @@ data OrgParserState = OrgParserState , orgStateLastStrPos :: Maybe SourcePos , orgStateLinkFormatters :: OrgLinkFormatters , orgStateMeta :: Meta - , orgStateMeta' :: F Meta , orgStateNotes' :: OrgNoteTable } +instance Default OrgParserLocal where + def = OrgParserLocal NoQuote def + instance HasReaderOptions OrgParserState where extractReaderOptions = orgStateOptions @@ -138,6 +155,10 @@ instance HasLastStrPosition OrgParserState where getLastStrPos = orgStateLastStrPos setLastStrPos pos st = st{ orgStateLastStrPos = Just pos } +instance HasQuoteContext st (Reader OrgParserLocal) where + getQuoteContext = asks orgLocalQuoteContext + withQuoteContext q = local (\s -> s{orgLocalQuoteContext = q}) + instance Default OrgParserState where def = defaultOrgParserState @@ -153,13 +174,13 @@ defaultOrgParserState = OrgParserState , orgStateLastStrPos = Nothing , orgStateLinkFormatters = M.empty , orgStateMeta = nullMeta - , orgStateMeta' = return nullMeta , orgStateNotes' = [] } recordAnchorId :: String -> OrgParser () recordAnchorId i = updateState $ \s -> - s{ orgStateAnchorIds = i : (orgStateAnchorIds s) } + let as = orgStateAnchorIds s in + s{ orgStateAnchorIds = i : as } addBlockAttribute :: String -> String -> OrgParser () addBlockAttribute key val = updateState $ \s -> @@ -238,30 +259,6 @@ parseFromString parser str' = do -- Adaptions and specializations of parsing utilities -- -newtype F a = F { unF :: Reader OrgParserState a - } deriving (Monad, Applicative, Functor) - -runF :: F a -> OrgParserState -> a -runF = runReader . unF - -askF :: F OrgParserState -askF = F ask - -asksF :: (OrgParserState -> a) -> F a -asksF f = F $ asks f - -instance Monoid a => Monoid (F a) where - mempty = return mempty - mappend = liftM2 mappend - mconcat = fmap mconcat . sequence - -trimInlinesF :: F Inlines -> F Inlines -trimInlinesF = liftM trimInlines - -returnF :: a -> OrgParser (F a) -returnF = return . return - - -- | Like @Text.Parsec.Char.newline@, but causes additional state changes. newline :: OrgParser Char newline = @@ -280,10 +277,10 @@ blanklines = -- parsing blocks -- -parseBlocks :: OrgParser (F Blocks) +parseBlocks :: OrgParser Blocks parseBlocks = mconcat <$> manyTill block eof -block :: OrgParser (F Blocks) +block :: OrgParser Blocks block = choice [ mempty <$ blanklines , optionalAttributes $ choice [ orgBlock @@ -294,14 +291,14 @@ block = choice [ mempty <$ blanklines , drawer , specialLine , header - , return <$> hline + , hline , list , latexFragment , noteBlock , paraOrPlain ] <?> "block" -optionalAttributes :: OrgParser (F Blocks) -> OrgParser (F Blocks) +optionalAttributes :: OrgParser Blocks -> OrgParser Blocks optionalAttributes parser = try $ resetBlockAttributes *> parseBlockAttributes *> parser @@ -321,7 +318,7 @@ parseAndAddAttribute key value = do let key' = map toLower key () <$ addBlockAttribute key' value -lookupInlinesAttr :: String -> OrgParser (Maybe (F Inlines)) +lookupInlinesAttr :: String -> OrgParser (Maybe Inlines) lookupInlinesAttr attr = try $ do val <- lookupBlockAttribute attr maybe (return Nothing) @@ -335,20 +332,20 @@ lookupInlinesAttr attr = try $ do type BlockProperties = (Int, String) -- (Indentation, Block-Type) -orgBlock :: OrgParser (F Blocks) +orgBlock :: OrgParser Blocks orgBlock = try $ do blockProp@(_, blkType) <- blockHeaderStart ($ blockProp) $ case blkType of "comment" -> withRaw' (const mempty) - "html" -> withRaw' (return . (B.rawBlock blkType)) - "latex" -> withRaw' (return . (B.rawBlock blkType)) - "ascii" -> withRaw' (return . (B.rawBlock blkType)) - "example" -> withRaw' (return . exampleCode) - "quote" -> withParsed (fmap B.blockQuote) + "html" -> withRaw' (B.rawBlock blkType) + "latex" -> withRaw' (B.rawBlock blkType) + "ascii" -> withRaw' (B.rawBlock blkType) + "example" -> withRaw' exampleCode + "quote" -> withParsed B.blockQuote "verse" -> verseBlock "src" -> codeBlock - _ -> withParsed (fmap $ divWithClass blkType) + _ -> withParsed (divWithClass blkType) blockHeaderStart :: OrgParser (Int, String) blockHeaderStart = try $ (,) <$> indent <*> blockType @@ -356,10 +353,10 @@ blockHeaderStart = try $ (,) <$> indent <*> blockType indent = length <$> many spaceChar blockType = map toLower <$> (stringAnyCase "#+begin_" *> orgArgWord) -withRaw' :: (String -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) +withRaw' :: (String -> Blocks) -> BlockProperties -> OrgParser Blocks withRaw' f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp)) -withParsed :: (F Blocks -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) +withParsed :: (Blocks -> Blocks) -> BlockProperties -> OrgParser Blocks withParsed f blockProp = (ignHeaders *> (f <$> parsedBlockContent blockProp)) ignHeaders :: OrgParser () @@ -368,11 +365,11 @@ ignHeaders = (() <$ newline) <|> (() <$ anyLine) divWithClass :: String -> Blocks -> Blocks divWithClass cls = B.divWith ("", [cls], []) -verseBlock :: BlockProperties -> OrgParser (F Blocks) +verseBlock :: BlockProperties -> OrgParser Blocks verseBlock blkProp = try $ do ignHeaders content <- rawBlockContent blkProp - fmap B.para . mconcat . intersperse (pure B.linebreak) + B.para . mconcat . intersperse B.linebreak <$> mapM (parseFromString parseInlines) (lines content) exportsCode :: [(String, String)] -> Bool @@ -389,7 +386,7 @@ followingResultsBlock = *> blankline *> (unlines <$> many1 exampleLine)) -codeBlock :: BlockProperties -> OrgParser (F Blocks) +codeBlock :: BlockProperties -> OrgParser Blocks codeBlock blkProp = do skipSpaces (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders) @@ -399,17 +396,15 @@ codeBlock blkProp = do let includeCode = exportsCode kv let includeResults = exportsResults kv let codeBlck = B.codeBlockWith ( id', classes, kv ) content - labelledBlck <- maybe (pure codeBlck) - (labelDiv codeBlck) + labelledBlck <- maybe codeBlck (labelDiv codeBlck) <$> lookupInlinesAttr "caption" - let resultBlck = pure $ maybe mempty (exampleCode) resultsContent + let resultBlck = maybe mempty exampleCode resultsContent return $ (if includeCode then labelledBlck else mempty) <> (if includeResults then resultBlck else mempty) where labelDiv blk value = - B.divWith nullAttr <$> (mappend <$> labelledBlock value - <*> pure blk) - labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], [])) + B.divWith nullAttr (labelledBlock value <> blk) + labelledBlock = B.plain . B.spanWith ("", ["label"], []) rawBlockContent :: BlockProperties -> OrgParser String rawBlockContent (indent, blockType) = try $ @@ -418,7 +413,7 @@ rawBlockContent (indent, blockType) = try $ indentedLine = try $ ("" <$ blankline) <|> (indentWith indent *> anyLine) blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType) -parsedBlockContent :: BlockProperties -> OrgParser (F Blocks) +parsedBlockContent :: BlockProperties -> OrgParser Blocks parsedBlockContent blkProps = try $ do raw <- rawBlockContent blkProps parseFromString parseBlocks (raw ++ "\n") @@ -509,9 +504,9 @@ commaEscaped (',':cs@('*':_)) = cs commaEscaped (',':cs@('#':'+':_)) = cs commaEscaped cs = cs -example :: OrgParser (F Blocks) +example :: OrgParser Blocks example = try $ do - return . return . exampleCode =<< unlines <$> many1 exampleLine + return . exampleCode =<< unlines <$> many1 exampleLine exampleCode :: String -> Blocks exampleCode = B.codeBlockWith ("", ["example"], []) @@ -520,7 +515,7 @@ exampleLine :: OrgParser String exampleLine = try $ skipSpaces *> string ": " *> anyLine -- Drawers for properties or a logbook -drawer :: OrgParser (F Blocks) +drawer :: OrgParser Blocks drawer = try $ do drawerStart manyTill drawerLine (try drawerEnd) @@ -546,14 +541,12 @@ drawerEnd = try $ -- -- Figures (Image on a line by itself, preceded by name and/or caption) -figure :: OrgParser (F Blocks) +figure :: OrgParser Blocks figure = try $ do (cap, nam) <- nameAndCaption src <- skipSpaces *> selfTarget <* skipSpaces <* P.newline guard (isImageFilename src) - return $ do - cap' <- cap - return $ B.para $ B.image src nam cap' + return $ B.para $ B.image src nam cap where nameAndCaption = do @@ -569,8 +562,8 @@ figure = try $ do -- -- Comments, Options and Metadata -specialLine :: OrgParser (F Blocks) -specialLine = fmap return . try $ metaLine <|> commentLine +specialLine :: OrgParser Blocks +specialLine = try $ metaLine <|> commentLine metaLine :: OrgParser Blocks metaLine = try $ mempty @@ -590,14 +583,14 @@ commentLineStart = try $ mappend <$> many spaceChar <*> string "# " declarationLine :: OrgParser () declarationLine = try $ do key <- metaKey - inlinesF <- metaInlines + inlines <- metaInlines updateState $ \st -> - let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta - in st { orgStateMeta' = orgStateMeta' st <> meta' } + let meta' = B.setMeta key inlines nullMeta + in st { orgStateMeta = orgStateMeta st <> meta' } return () -metaInlines :: OrgParser (F MetaValue) -metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline +metaInlines :: OrgParser MetaValue +metaInlines = (MetaInlines . B.toList) <$> inlinesTillNewline metaKey :: OrgParser String metaKey = map toLower <$> many1 (noneOf ": \n\r") @@ -638,11 +631,11 @@ parseFormat = try $ do -- -- | Headers -header :: OrgParser (F Blocks) +header :: OrgParser Blocks header = try $ do level <- headerStart title <- inlinesTillNewline - return $ B.header level <$> title + return $ B.header level title headerStart :: OrgParser Int headerStart = try $ @@ -666,7 +659,7 @@ hline = try $ do -- Tables -- -data OrgTableRow = OrgContentRow (F [Blocks]) +data OrgTableRow = OrgContentRow [Blocks] | OrgAlignRow [Alignment] | OrgHlineRow @@ -677,13 +670,13 @@ data OrgTable = OrgTable , orgTableRows :: [[Blocks]] } -table :: OrgParser (F Blocks) +table :: OrgParser Blocks table = try $ do lookAhead tableStart do rows <- tableRows - cptn <- fromMaybe (pure "") <$> lookupInlinesAttr "caption" - return $ (<$> cptn) . orgToPandocTable . normalizeTable =<< rowsToTable rows + (cptn :: Inlines) <- fromMaybe "" <$> lookupInlinesAttr "caption" + return $ ($ cptn) . orgToPandocTable . normalizeTable . rowsToTable $ rows orgToPandocTable :: OrgTable -> Inlines @@ -699,11 +692,11 @@ tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow) tableContentRow :: OrgParser OrgTableRow tableContentRow = try $ - OrgContentRow . sequence <$> (tableStart *> manyTill tableContentCell newline) + OrgContentRow <$> (tableStart *> manyTill tableContentCell newline) -tableContentCell :: OrgParser (F Blocks) +tableContentCell :: OrgParser Blocks tableContentCell = try $ - fmap B.plain . trimInlinesF . mconcat <$> many1Till inline endOfCell + B.plain . trimInlines . mconcat <$> many1Till inline endOfCell endOfCell :: OrgParser Char endOfCell = try $ char '|' <|> lookAhead newline @@ -735,8 +728,8 @@ tableHline = try $ OrgHlineRow <$ (tableStart *> char '-' *> anyLine) rowsToTable :: [OrgTableRow] - -> F OrgTable -rowsToTable = foldM (flip rowToContent) zeroTable + -> OrgTable +rowsToTable = foldl' (flip rowToContent) zeroTable where zeroTable = OrgTable 0 mempty mempty mempty normalizeTable :: OrgTable @@ -755,45 +748,43 @@ normalizeTable (OrgTable cols aligns heads lns) = -- line as a header. All other horizontal lines are discarded. rowToContent :: OrgTableRow -> OrgTable - -> F OrgTable + -> OrgTable rowToContent OrgHlineRow t = maybeBodyToHeader t -rowToContent (OrgAlignRow as) t = setLongestRow as =<< setAligns as t -rowToContent (OrgContentRow rf) t = do - rs <- rf - setLongestRow rs =<< appendToBody rs t +rowToContent (OrgAlignRow as) t = setLongestRow as . setAligns as $ t +rowToContent (OrgContentRow rf) t = setLongestRow rf . appendToBody rf $ t setLongestRow :: [a] -> OrgTable - -> F OrgTable + -> OrgTable setLongestRow rs t = - return t{ orgTableColumns = max (length rs) (orgTableColumns t) } + t{ orgTableColumns = max (length rs) (orgTableColumns t) } maybeBodyToHeader :: OrgTable - -> F OrgTable + -> OrgTable maybeBodyToHeader t = case t of OrgTable{ orgTableHeader = [], orgTableRows = b:[] } -> - return t{ orgTableHeader = b , orgTableRows = [] } - _ -> return t + t{ orgTableHeader = b , orgTableRows = [] } + _ -> t appendToBody :: [Blocks] -> OrgTable - -> F OrgTable -appendToBody r t = return t{ orgTableRows = orgTableRows t ++ [r] } + -> OrgTable +appendToBody r t = t{ orgTableRows = orgTableRows t ++ [r] } setAligns :: [Alignment] -> OrgTable - -> F OrgTable -setAligns aligns t = return $ t{ orgTableAlignments = aligns } + -> OrgTable +setAligns aligns t = t{ orgTableAlignments = aligns } -- -- LaTeX fragments -- -latexFragment :: OrgParser (F Blocks) +latexFragment :: OrgParser Blocks latexFragment = try $ do envName <- latexEnvStart content <- mconcat <$> manyTill anyLineNewline (latexEnd envName) - return . return $ B.rawBlock "latex" (content `inLatexEnv` envName) + return $ B.rawBlock "latex" (content `inLatexEnv` envName) where c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n" , c @@ -823,7 +814,7 @@ latexEnvName = try $ do -- -- Footnote defintions -- -noteBlock :: OrgParser (F Blocks) +noteBlock :: OrgParser Blocks noteBlock = try $ do ref <- noteMarker <* skipSpaces content <- mconcat <$> blocksTillHeaderOrNote @@ -835,37 +826,37 @@ noteBlock = try $ do <|> () <$ lookAhead headerStart) -- Paragraphs or Plain text -paraOrPlain :: OrgParser (F Blocks) +paraOrPlain :: OrgParser Blocks paraOrPlain = try $ do ils <- parseInlines nl <- option False (newline >> return True) try (guard nl >> notFollowedBy (orderedListStart <|> bulletListStart) >> - return (B.para <$> ils)) - <|> (return (B.plain <$> ils)) + (return $ B.para ils)) + <|> (return $ B.plain ils) -inlinesTillNewline :: OrgParser (F Inlines) -inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline +inlinesTillNewline :: OrgParser Inlines +inlinesTillNewline = trimInlines . mconcat <$> manyTill inline newline -- -- list blocks -- -list :: OrgParser (F Blocks) +list :: OrgParser Blocks list = choice [ definitionList, bulletList, orderedList ] <?> "list" -definitionList :: OrgParser (F Blocks) +definitionList :: OrgParser Blocks definitionList = try $ do n <- lookAhead (bulletListStart' Nothing) - fmap B.definitionList . fmap compactify'DL . sequence + B.definitionList . compactify'DL <$> many1 (definitionListItem $ bulletListStart' (Just n)) -bulletList :: OrgParser (F Blocks) +bulletList :: OrgParser Blocks bulletList = try $ do n <- lookAhead (bulletListStart' Nothing) - fmap B.bulletList . fmap compactify' . sequence + B.bulletList . compactify' <$> many1 (listItem (bulletListStart' $ Just n)) -orderedList :: OrgParser (F Blocks) -orderedList = fmap B.orderedList . fmap compactify' . sequence +orderedList :: OrgParser Blocks +orderedList = B.orderedList . compactify' <$> many1 (listItem orderedListStart) genericListStart :: OrgParser String @@ -902,7 +893,7 @@ orderedListStart = genericListStart orderedListMarker where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)") definitionListItem :: OrgParser Int - -> OrgParser (F (Inlines, [Blocks])) + -> OrgParser (Inlines, [Blocks]) definitionListItem parseMarkerGetLength = try $ do markerLength <- parseMarkerGetLength term <- manyTill (noneOf "\n\r") (try $ string "::") @@ -911,12 +902,12 @@ definitionListItem parseMarkerGetLength = try $ do cont <- concat <$> many (listContinuation markerLength) term' <- parseFromString parseInlines term contents' <- parseFromString parseBlocks $ line1 ++ blank ++ cont - return $ (,) <$> term' <*> fmap (:[]) contents' + return (term', [contents']) -- parse raw text for one list item, excluding start marker and continuations listItem :: OrgParser Int - -> OrgParser (F Blocks) + -> OrgParser Blocks listItem start = try $ do markerLength <- try start firstLine <- anyLineNewline @@ -942,7 +933,7 @@ anyLineNewline = (++ "\n") <$> anyLine -- inline -- -inline :: OrgParser (F Inlines) +inline :: OrgParser Inlines inline = choice [ whitespace , linebreak @@ -964,35 +955,36 @@ inline = , subscript , superscript , inlineLaTeX + , smart , symbol ] <* (guard =<< newlinesCountWithinLimits) <?> "inline" -parseInlines :: OrgParser (F Inlines) -parseInlines = trimInlinesF . mconcat <$> many1 inline +parseInlines :: OrgParser Inlines +parseInlines = trimInlines . mconcat <$> many1 inline -- treat these as potentially non-text when parsing inline: specialChars :: [Char] specialChars = "\"$'()*+-,./:<=>[\\]^_{|}~" -whitespace :: OrgParser (F Inlines) -whitespace = pure B.space <$ skipMany1 spaceChar +whitespace :: OrgParser Inlines +whitespace = B.space <$ skipMany1 spaceChar <* updateLastPreCharPos <* updateLastForbiddenCharPos <?> "whitespace" -linebreak :: OrgParser (F Inlines) -linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline +linebreak :: OrgParser Inlines +linebreak = try $ B.linebreak <$ string "\\\\" <* skipSpaces <* newline -str :: OrgParser (F Inlines) -str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") +str :: OrgParser Inlines +str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") <* updateLastStrPos -- | An endline character that can be treated as a space, not a structural -- break. This should reflect the values of the Emacs variable -- @org-element-pagaraph-separate@. -endline :: OrgParser (F Inlines) +endline :: OrgParser Inlines endline = try $ do newline notFollowedBy blankline @@ -1010,77 +1002,72 @@ endline = try $ do decEmphasisNewlinesCount guard =<< newlinesCountWithinLimits updateLastPreCharPos - return . return $ B.space + return $ B.space -cite :: OrgParser (F Inlines) +cite :: OrgParser Inlines cite = try $ do guardEnabled Ext_citations (cs, raw) <- withRaw normalCite - return $ (flip B.cite (B.text raw)) <$> cs + return $ flip B.cite (B.text raw) cs -normalCite :: OrgParser (F [Citation]) +normalCite :: OrgParser [Citation] normalCite = try $ char '[' *> skipSpaces *> citeList <* skipSpaces <* char ']' -citeList :: OrgParser (F [Citation]) -citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces) +citeList :: OrgParser [Citation] +citeList = sepBy1 citation (try $ char ';' *> skipSpaces) -citation :: OrgParser (F Citation) +citation :: OrgParser Citation citation = try $ do pref <- prefix (suppress_author, key) <- citeKey suff <- suffix - return $ do - x <- pref - y <- suff - return $ Citation{ citationId = key - , citationPrefix = B.toList x - , citationSuffix = B.toList y - , citationMode = if suppress_author - then SuppressAuthor - else NormalCitation - , citationNoteNum = 0 - , citationHash = 0 - } + return $ Citation{ citationId = key + , citationPrefix = B.toList pref + , citationSuffix = B.toList suff + , citationMode = if suppress_author + then SuppressAuthor + else NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } where - prefix = trimInlinesF . mconcat <$> + prefix = trimInlines . mconcat <$> manyTill inline (char ']' <|> (']' <$ lookAhead citeKey)) suffix = try $ do hasSpace <- option False (notFollowedBy nonspaceChar >> return True) skipSpaces - rest <- trimInlinesF . mconcat <$> + rest <- trimInlines . mconcat <$> many (notFollowedBy (oneOf ";]") *> inline) - return $ if hasSpace - then (B.space <>) <$> rest - else rest + return $ + if hasSpace + then B.space <> rest + else rest -footnote :: OrgParser (F Inlines) +footnote :: OrgParser Inlines footnote = try $ inlineNote <|> referencedNote -inlineNote :: OrgParser (F Inlines) +inlineNote :: OrgParser Inlines inlineNote = try $ do string "[fn:" ref <- many alphaNum char ':' - note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']') + note <- B.para . trimInlines . mconcat <$> many1Till inline (char ']') when (not $ null ref) $ addToNotesTable ("fn:" ++ ref, note) - return $ B.note <$> note + return $ B.note note -referencedNote :: OrgParser (F Inlines) +referencedNote :: OrgParser Inlines referencedNote = try $ do ref <- noteMarker - return $ do - notes <- asksF orgStateNotes' + notes <- asks (orgStateNotes' . finalState) + return $ case lookup ref notes of - Nothing -> return $ B.str $ "[" ++ ref ++ "]" - Just contents -> do - st <- askF - let contents' = runF contents st{ orgStateNotes' = [] } - return $ B.note contents' + Just contents -> B.note contents + Nothing -> B.str $ "[" ++ ref ++ "]" noteMarker :: OrgParser String noteMarker = try $ do @@ -1090,37 +1077,37 @@ noteMarker = try $ do <*> many1Till (noneOf "\n\r\t ") (char ']') ] -linkOrImage :: OrgParser (F Inlines) +linkOrImage :: OrgParser Inlines linkOrImage = explicitOrImageLink <|> selflinkOrImage <|> angleLink <|> plainLink <?> "link or image" -explicitOrImageLink :: OrgParser (F Inlines) +explicitOrImageLink :: OrgParser Inlines explicitOrImageLink = try $ do char '[' - srcF <- applyCustomLinkFormat =<< possiblyEmptyLinkTarget + src <- applyCustomLinkFormat =<< possiblyEmptyLinkTarget title <- enclosedRaw (char '[') (char ']') title' <- parseFromString (mconcat <$> many inline) title char ']' - return $ do - src <- srcF - if isImageFilename src && isImageFilename title - then pure $ B.link src "" $ B.image title mempty mempty - else linkToInlinesF src =<< title' + alt <- internalLink src title' + return $ + (if isImageFilename title + then B.link src "" $ B.image title mempty mempty + else fromMaybe alt (linkToInlines src title')) -selflinkOrImage :: OrgParser (F Inlines) +selflinkOrImage :: OrgParser Inlines selflinkOrImage = try $ do src <- char '[' *> linkTarget <* char ']' - return $ linkToInlinesF src (B.str src) + return $ fromMaybe "" (linkToInlines src (B.str src)) -plainLink :: OrgParser (F Inlines) +plainLink :: OrgParser Inlines plainLink = try $ do (orig, src) <- uri - returnF $ B.link src "" (B.str orig) + return $ B.link src "" (B.str orig) -angleLink :: OrgParser (F Inlines) +angleLink :: OrgParser Inlines angleLink = try $ do char '<' link <- plainLink @@ -1136,26 +1123,31 @@ linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]") possiblyEmptyLinkTarget :: OrgParser String possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]") -applyCustomLinkFormat :: String -> OrgParser (F String) +applyCustomLinkFormat :: String -> OrgParser String applyCustomLinkFormat link = do let (linkType, rest) = break (== ':') link - return $ do - formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters - return $ maybe link ($ drop 1 rest) formatter + fmts <- asks finalState + return $ + case M.lookup linkType (orgStateLinkFormatters fmts) of + Just v -> (v (drop 1 rest)) + Nothing -> link -- TODO: might be a lot smarter/cleaner to use parsec and ADTs for this kind -- of parsing. -linkToInlinesF :: String -> Inlines -> F Inlines -linkToInlinesF s = +linkToInlines :: String -> Inlines -> Maybe Inlines +linkToInlines = \s -> case s of - "" -> pure . B.link "" "" - ('#':_) -> pure . B.link s "" - _ | isImageFilename s -> const . pure $ B.image s "" "" - _ | isFileLink s -> pure . B.link (dropLinkType s) "" - _ | isUri s -> pure . B.link s "" - _ | isAbsoluteFilePath s -> pure . B.link ("file://" ++ s) "" - _ | isRelativeFilePath s -> pure . B.link s "" - _ -> internalLink s + _ | null s -> Just . B.link "" "" + _ | isAnchor s -> Just . B.link s "" + _ | isImageFilename s -> const . Just $ B.image s "" "" + _ | isFileLink s -> Just . B.link (dropLinkType s) "" + _ | isUri s -> Just . B.link s "" + _ | isAbsoluteFilePath s -> Just . B.link ("file://" ++ s) "" + _ | isRelativeFilePath s -> Just . B.link s "" + _ -> const Nothing + +isAnchor :: String -> Bool +isAnchor s = "#" `isPrefixOf` s isFileLink :: String -> Bool isFileLink s = ("file:" `isPrefixOf` s) && not ("file://" `isPrefixOf` s) @@ -1184,12 +1176,13 @@ isImageFilename filename = imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ] protocols = [ "file", "http", "https" ] -internalLink :: String -> Inlines -> F Inlines +internalLink :: String -> Inlines -> OrgParser Inlines internalLink link title = do - anchorB <- (link `elem`) <$> asksF orgStateAnchorIds - if anchorB - then return $ B.link ('#':link) "" title - else return $ B.emph title + anchorB <- asks finalState + return $ + if link `elem` (orgStateAnchorIds anchorB) + then B.link ('#':link) "" title + else B.emph title -- | Parse an anchor like @<<anchor-id>>@ and return an empty span with -- @anchor-id@ set as id. Legal anchors in org-mode are defined through @@ -1197,11 +1190,11 @@ internalLink link title = do -- @anchor-id@ contains spaces, we are more restrictive in what is accepted as -- an anchor. -anchor :: OrgParser (F Inlines) +anchor :: OrgParser Inlines anchor = try $ do anchorId <- parseAnchor recordAnchorId anchorId - returnF $ B.spanWith (solidify anchorId, [], []) mempty + return $ B.spanWith (solidify anchorId, [], []) mempty where parseAnchor = string "<<" *> many1 (noneOf "\t\n\r<>\"' ") @@ -1219,7 +1212,7 @@ solidify = map replaceSpecialChar | otherwise = '-' -- | Parses an inline code block and marks it as an babel block. -inlineCodeBlock :: OrgParser (F Inlines) +inlineCodeBlock :: OrgParser Inlines inlineCodeBlock = try $ do string "src_" lang <- many1 orgArgWordChar @@ -1227,7 +1220,7 @@ inlineCodeBlock = try $ do inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r") let attrClasses = [translateLang lang, rundocBlockClass] let attrKeyVal = map toRundocAttrib (("language", lang) : opts) - returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode + return $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode enclosedByPair :: Char -- ^ opening char -> Char -- ^ closing char @@ -1235,50 +1228,51 @@ enclosedByPair :: Char -- ^ opening char -> OrgParser [a] enclosedByPair s e p = char s *> many1Till p (char e) -emph :: OrgParser (F Inlines) -emph = fmap B.emph <$> emphasisBetween '/' +emph :: OrgParser Inlines +emph = B.emph <$> emphasisBetween '/' -strong :: OrgParser (F Inlines) -strong = fmap B.strong <$> emphasisBetween '*' +strong :: OrgParser Inlines +strong = B.strong <$> emphasisBetween '*' -strikeout :: OrgParser (F Inlines) -strikeout = fmap B.strikeout <$> emphasisBetween '+' +strikeout :: OrgParser Inlines +strikeout = B.strikeout <$> emphasisBetween '+' -- There is no underline, so we use strong instead. -underline :: OrgParser (F Inlines) -underline = fmap B.strong <$> emphasisBetween '_' +underline :: OrgParser Inlines +underline = B.strong <$> emphasisBetween '_' -verbatim :: OrgParser (F Inlines) -verbatim = return . B.code <$> verbatimBetween '=' +verbatim :: OrgParser Inlines +verbatim = B.code <$> verbatimBetween '=' -code :: OrgParser (F Inlines) -code = return . B.code <$> verbatimBetween '~' +code :: OrgParser Inlines +code = B.code <$> verbatimBetween '~' -subscript :: OrgParser (F Inlines) -subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr) +subscript :: OrgParser Inlines +subscript = B.subscript <$> try (char '_' *> subOrSuperExpr) -superscript :: OrgParser (F Inlines) -superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr) +superscript :: OrgParser Inlines +superscript = B.superscript <$> try (char '^' *> subOrSuperExpr) -math :: OrgParser (F Inlines) -math = return . B.math <$> choice [ math1CharBetween '$' +math :: OrgParser Inlines +math = B.math <$> choice [ math1CharBetween '$' , mathStringBetween '$' , rawMathBetween "\\(" "\\)" ] -displayMath :: OrgParser (F Inlines) -displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" - , rawMathBetween "$$" "$$" - ] -symbol :: OrgParser (F Inlines) -symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions) +displayMath :: OrgParser Inlines +displayMath = B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" + , rawMathBetween "$$" "$$" + ] + +symbol :: OrgParser Inlines +symbol = B.str . (: "") <$> (oneOf specialChars >>= updatePositions) where updatePositions c = do when (c `elem` emphasisPreChars) updateLastPreCharPos when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos return c emphasisBetween :: Char - -> OrgParser (F Inlines) + -> OrgParser Inlines emphasisBetween c = try $ do startEmphasisNewlinesCounting emphasisAllowedNewlines res <- enclosedInlines (emphasisStart c) (emphasisEnd c) @@ -1355,9 +1349,9 @@ mathEnd c = try $ do enclosedInlines :: OrgParser a -> OrgParser b - -> OrgParser (F Inlines) + -> OrgParser Inlines enclosedInlines start end = try $ - trimInlinesF . mconcat <$> enclosed start end inline + trimInlines . mconcat <$> enclosed start end inline enclosedRaw :: OrgParser a -> OrgParser b @@ -1436,7 +1430,7 @@ notAfterForbiddenBorderChar = do return $ lastFBCPos /= Just pos -- | Read a sub- or superscript expression -subOrSuperExpr :: OrgParser (F Inlines) +subOrSuperExpr :: OrgParser Inlines subOrSuperExpr = try $ choice [ id <$> charsInBalanced '{' '}' (noneOf "\n\r") , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r") @@ -1451,10 +1445,10 @@ simpleSubOrSuperString = try $ <*> many1 alphaNum ] -inlineLaTeX :: OrgParser (F Inlines) +inlineLaTeX :: OrgParser Inlines inlineLaTeX = try $ do cmd <- inlineLaTeXCommand - maybe mzero returnF $ + maybe mzero return $ parseAsMath cmd `mplus` parseAsMathMLSym cmd `mplus` parseAsInlineLaTeX cmd where parseAsMath :: String -> Maybe Inlines @@ -1486,3 +1480,31 @@ inlineLaTeXCommand = try $ do count len anyChar return cs _ -> mzero + +smart :: OrgParser Inlines +smart = do + getOption readerSmart >>= guard + doubleQuoted <|> singleQuoted <|> + choice [orgApostrophe, dash, ellipses] + where orgApostrophe = + (char '\'' <|> char '\8217') <* updateLastPreCharPos + <* updateLastForbiddenCharPos + *> return (B.str "\x2019") + +singleQuoted :: OrgParser Inlines +singleQuoted = try $ do + singleQuoteStart + withQuoteContext InSingleQuote $ + B.singleQuoted . trimInlines . mconcat <$> + many1Till inline singleQuoteEnd + +-- doubleQuoted will handle regular double-quoted sections, as well +-- as dialogues with an open double-quote without a close double-quote +-- in the same paragraph. +doubleQuoted :: OrgParser Inlines +doubleQuoted = try $ do + doubleQuoteStart + contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) + (withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return + (B.doubleQuoted . trimInlines $ contents)) + <|> (return $ (B.str "\8220") <> contents) diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index b9a77c5d6..a8112bc81 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -51,14 +51,16 @@ import Data.Monoid (mconcat, mempty) import Data.Sequence (viewr, ViewR(..)) import Data.Char (toLower, isHexDigit, isSpace) +import Text.Pandoc.Error + -- | Parse reStructuredText string and return Pandoc document. readRST :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readRST opts s = (readWith parseRST) def{ stateOptions = opts } (s ++ "\n\n") -readRSTWithWarnings :: ReaderOptions -> String -> (Pandoc, [String]) -readRSTWithWarnings opts s = (readWithWarnings parseRST) def{ stateOptions = opts } (s ++ "\n\n") +readRSTWithWarnings :: ReaderOptions -> String -> Either PandocError (Pandoc, [String]) +readRSTWithWarnings opts s = (readWith (returnWarnings parseRST)) def{ stateOptions = opts } (s ++ "\n\n") type RSTParser = Parser [Char] ParserState diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs index 9f5738478..07b414431 100644 --- a/src/Text/Pandoc/Readers/TWiki.hs +++ b/src/Text/Pandoc/Readers/TWiki.hs @@ -48,17 +48,18 @@ import Data.Maybe (fromMaybe) import Text.HTML.TagSoup import Data.Char (isAlphaNum) import qualified Data.Foldable as F +import Text.Pandoc.Error -- | Read twiki from an input string and return a Pandoc document. readTWiki :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readTWiki opts s = (readWith parseTWiki) def{ stateOptions = opts } (s ++ "\n\n") readTWikiWithWarnings :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> (Pandoc, [String]) + -> Either PandocError (Pandoc, [String]) readTWikiWithWarnings opts s = (readWith parseTWikiWithWarnings) def{ stateOptions = opts } (s ++ "\n\n") where parseTWikiWithWarnings = do diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 63ab80eb9..4565b26a1 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -68,11 +68,12 @@ import Text.Printf import Control.Applicative ((<$>), (*>), (<*), (<$)) import Data.Monoid import Debug.Trace (trace) +import Text.Pandoc.Error -- | Parse a Textile text and return a Pandoc document. readTextile :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readTextile opts s = (readWith parseTextile) def{ stateOptions = opts } (s ++ "\n\n") diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index 834d18c5c..304d6d4c5 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -48,6 +48,7 @@ import Data.Monoid (Monoid, mconcat, mempty, mappend) import Control.Monad (void, guard, when) import Data.Default import Control.Monad.Reader (Reader, runReader, asks) +import Text.Pandoc.Error import Data.Time.LocalTime (getZonedTime) import Text.Pandoc.Compat.Directory(getModificationTime) @@ -83,12 +84,12 @@ getT2TMeta inps out = do return $ T2TMeta curDate curMtime (intercalate ", " inps) out -- | Read Txt2Tags from an input string returning a Pandoc document -readTxt2Tags :: T2TMeta -> ReaderOptions -> String -> Pandoc +readTxt2Tags :: T2TMeta -> ReaderOptions -> String -> Either PandocError Pandoc readTxt2Tags t opts s = flip runReader t $ readWithM parseT2T (def {stateOptions = opts}) (s ++ "\n\n") -- | Read Txt2Tags (ignoring all macros) from an input string returning -- a Pandoc document -readTxt2TagsNoMacros :: ReaderOptions -> String -> Pandoc +readTxt2TagsNoMacros :: ReaderOptions -> String -> Either PandocError Pandoc readTxt2TagsNoMacros = readTxt2Tags def parseT2T :: T2T Pandoc diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index bc960fd38..e0460c66e 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -85,6 +85,8 @@ module Text.Pandoc.Shared ( -- * Error handling err, warn, + mapLeft, + hush, -- * Safe read safeRead, -- * Temp directory @@ -113,7 +115,7 @@ import System.FilePath ( (</>), takeExtension, dropExtension) import Data.Generics (Typeable, Data) import qualified Control.Monad.State as S import qualified Control.Exception as E -import Control.Monad (msum, unless) +import Control.Monad (msum, unless, MonadPlus(..)) import Text.Pandoc.Pretty (charWidth) import Text.Pandoc.Compat.Locale (defaultTimeLocale) import Data.Time @@ -855,6 +857,14 @@ warn msg = do name <- getProgName UTF8.hPutStrLn stderr $ name ++ ": " ++ msg +mapLeft :: (a -> b) -> Either a c -> Either b c +mapLeft f (Left x) = Left (f x) +mapLeft _ (Right x) = Right x + +hush :: Either a b -> Maybe b +hush (Left _) = Nothing +hush (Right x) = Just x + -- | Remove intermediate "." and ".." directories from a path. -- -- > collapseFilePath "./foo" == "foo" @@ -883,11 +893,11 @@ collapseFilePath = joinPath . reverse . foldl go [] . splitDirectories -- Safe read -- -safeRead :: (Monad m, Read a) => String -> m a +safeRead :: (MonadPlus m, Read a) => String -> m a safeRead s = case reads s of (d,x):_ | all isSpace x -> return d - _ -> fail $ "Could not read `" ++ s ++ "'" + _ -> mzero -- -- Temp directory diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index e5b8c5167..1c33b004a 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -126,7 +126,7 @@ blockToAsciiDoc :: WriterOptions -- ^ Options blockToAsciiDoc _ Null = return empty blockToAsciiDoc opts (Plain inlines) = do contents <- inlineListToAsciiDoc opts inlines - return $ contents <> cr + return $ contents <> blankline blockToAsciiDoc opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = do blockToAsciiDoc opts (Para [Image alt (src,tit)]) blockToAsciiDoc opts (Para inlines) = do @@ -272,7 +272,7 @@ bulletListItemToAsciiDoc opts blocks = do contents <- foldM addBlock empty blocks modify $ \s -> s{ bulletListLevel = lev } let marker = text (replicate lev '*') - return $ marker <> space <> contents <> cr + return $ marker <> text " " <> contents <> cr -- | Convert ordered list item (a list of blocks) to asciidoc. orderedListItemToAsciiDoc :: WriterOptions -- ^ options @@ -292,7 +292,7 @@ orderedListItemToAsciiDoc opts marker blocks = do modify $ \s -> s{ orderedListLevel = lev + 1 } contents <- foldM addBlock empty blocks modify $ \s -> s{ orderedListLevel = lev } - return $ text marker <> space <> contents <> cr + return $ text marker <> text " " <> contents <> cr -- | Convert definition list item (label, list of blocks) to asciidoc. definitionListItemToAsciiDoc :: WriterOptions diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs new file mode 100644 index 000000000..706b27175 --- /dev/null +++ b/src/Text/Pandoc/Writers/CommonMark.hs @@ -0,0 +1,178 @@ +{- +Copyright (C) 2015 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.CommonMark + Copyright : Copyright (C) 2015 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to CommonMark. + +CommonMark: <http://commonmark.org> +-} +module Text.Pandoc.Writers.CommonMark (writeCommonMark) where + +import Text.Pandoc.Writers.HTML (writeHtmlString) +import Text.Pandoc.Definition +import Text.Pandoc.Shared (isTightList) +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Writers.Shared +import Text.Pandoc.Options +import CMark +import qualified Data.Text as T +import Control.Monad.Identity (runIdentity, Identity) +import Control.Monad.State (runState, State, modify, get) +import Text.Pandoc.Walk (walkM) + +-- | Convert Pandoc to CommonMark. +writeCommonMark :: WriterOptions -> Pandoc -> String +writeCommonMark opts (Pandoc meta blocks) = rendered + where main = runIdentity $ blocksToCommonMark opts (blocks' ++ notes') + (blocks', notes) = runState (walkM processNotes blocks) [] + notes' = if null notes + then [] + else [OrderedList (1, Decimal, Period) $ reverse notes] + metadata = runIdentity $ metaToJSON opts + (blocksToCommonMark opts) + (inlinesToCommonMark opts) + meta + context = defField "body" main $ metadata + rendered = if writerStandalone opts + then renderTemplate' (writerTemplate opts) context + else main + +processNotes :: Inline -> State [[Block]] Inline +processNotes (Note bs) = do + modify (bs :) + notes <- get + return $ Str $ "[" ++ show (length notes) ++ "]" +processNotes x = return x + +node :: NodeType -> [Node] -> Node +node = Node Nothing + +blocksToCommonMark :: WriterOptions -> [Block] -> Identity String +blocksToCommonMark opts bs = return $ + T.unpack $ nodeToCommonmark cmarkOpts colwidth + $ node DOCUMENT (blocksToNodes bs) + where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] + colwidth = if writerWrapText opts + then writerColumns opts + else 0 + +inlinesToCommonMark :: WriterOptions -> [Inline] -> Identity String +inlinesToCommonMark opts ils = return $ + T.unpack $ nodeToCommonmark cmarkOpts colwidth + $ node PARAGRAPH (inlinesToNodes ils) + where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] + colwidth = if writerWrapText opts + then writerColumns opts + else 0 + +blocksToNodes :: [Block] -> [Node] +blocksToNodes = foldr blockToNodes [] + +blockToNodes :: Block -> [Node] -> [Node] +blockToNodes (Plain xs) = (node PARAGRAPH (inlinesToNodes xs) :) +blockToNodes (Para xs) = (node PARAGRAPH (inlinesToNodes xs) :) +blockToNodes (CodeBlock (_,classes,_) xs) = + (node (CODE_BLOCK (T.pack (unwords classes)) (T.pack xs)) [] :) +blockToNodes (RawBlock fmt xs) + | fmt == Format "html" = (node (HTML (T.pack xs)) [] :) + | otherwise = id +blockToNodes (BlockQuote bs) = + (node BLOCK_QUOTE (blocksToNodes bs) :) +blockToNodes (BulletList items) = + (node (LIST ListAttributes{ + listType = BULLET_LIST, + listDelim = PERIOD_DELIM, + listTight = isTightList items, + listStart = 1 }) (map (node ITEM . blocksToNodes) items) :) +blockToNodes (OrderedList (start, _sty, delim) items) = + (node (LIST ListAttributes{ + listType = ORDERED_LIST, + listDelim = case delim of + OneParen -> PAREN_DELIM + TwoParens -> PAREN_DELIM + _ -> PERIOD_DELIM, + listTight = isTightList items, + listStart = start }) (map (node ITEM . blocksToNodes) items) :) +blockToNodes HorizontalRule = (node HRULE [] :) +blockToNodes (Header lev _ ils) = (node (HEADER lev) (inlinesToNodes ils) :) +blockToNodes (Div _ bs) = (blocksToNodes bs ++) +blockToNodes (DefinitionList items) = blockToNodes (BulletList items') + where items' = map dlToBullet items + dlToBullet (term, ((Para xs : ys) : zs)) = + Para (term ++ [LineBreak] ++ xs) : ys ++ concat zs + dlToBullet (term, ((Plain xs : ys) : zs)) = + Plain (term ++ [LineBreak] ++ xs) : ys ++ concat zs + dlToBullet (term, xs) = + Para term : concat xs +blockToNodes t@(Table _ _ _ _ _) = + (node (HTML (T.pack $! writeHtmlString def $! Pandoc nullMeta [t])) [] :) +blockToNodes Null = id + +inlinesToNodes :: [Inline] -> [Node] +inlinesToNodes = foldr inlineToNodes [] + +inlineToNodes :: Inline -> [Node] -> [Node] +inlineToNodes (Str s) = (node (TEXT (T.pack s)) [] :) +inlineToNodes Space = (node (TEXT (T.pack " ")) [] :) +inlineToNodes LineBreak = (node LINEBREAK [] :) +inlineToNodes (Emph xs) = (node EMPH (inlinesToNodes xs) :) +inlineToNodes (Strong xs) = (node STRONG (inlinesToNodes xs) :) +inlineToNodes (Strikeout xs) = + ((node (INLINE_HTML (T.pack "<s>")) [] : inlinesToNodes xs ++ + [node (INLINE_HTML (T.pack "</s>")) []]) ++ ) +inlineToNodes (Superscript xs) = + ((node (INLINE_HTML (T.pack "<sub>")) [] : inlinesToNodes xs ++ + [node (INLINE_HTML (T.pack "</sub>")) []]) ++ ) +inlineToNodes (Subscript xs) = + ((node (INLINE_HTML (T.pack "<sup>")) [] : inlinesToNodes xs ++ + [node (INLINE_HTML (T.pack "</sup>")) []]) ++ ) +inlineToNodes (SmallCaps xs) = + ((node (INLINE_HTML (T.pack "<span style=\"font-variant:small-caps;\">")) [] + : inlinesToNodes xs ++ + [node (INLINE_HTML (T.pack "</span>")) []]) ++ ) +inlineToNodes (Link ils (url,tit)) = + (node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes ils) :) +inlineToNodes (Image ils (url,tit)) = + (node (IMAGE (T.pack url) (T.pack tit)) (inlinesToNodes ils) :) +inlineToNodes (RawInline fmt xs) + | fmt == Format "html" = (node (INLINE_HTML (T.pack xs)) [] :) + | otherwise = id +inlineToNodes (Quoted qt ils) = + ((node (TEXT start) [] : inlinesToNodes ils ++ [node (TEXT end) []]) ++) + where (start, end) = case qt of + SingleQuote -> (T.pack "‘", T.pack "’") + DoubleQuote -> (T.pack "“", T.pack "”") +inlineToNodes (Code _ str) = (node (CODE (T.pack str)) [] :) +inlineToNodes (Math mt str) = + case mt of + InlineMath -> + (node (INLINE_HTML (T.pack ("\\(" ++ str ++ "\\)"))) [] :) + DisplayMath -> + (node (INLINE_HTML (T.pack ("\\[" ++ str ++ "\\]"))) [] :) +inlineToNodes (Span _ ils) = (inlinesToNodes ils ++) +inlineToNodes (Cite _ ils) = (inlinesToNodes ils ++) +inlineToNodes (Note _) = id -- should not occur +-- we remove Note elements in preprocessing diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index b10317506..19f8f2f11 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -114,7 +114,8 @@ elementToDocbook opts lvl (Sec _ _num (id',_,_) title elements) = n | n == 0 -> "chapter" | n >= 1 && n <= 5 -> "sect" ++ show n | otherwise -> "simplesect" - in inTags True tag [("id", writerIdentifierPrefix opts ++ id')] $ + in inTags True tag [("id", writerIdentifierPrefix opts ++ id') | + not (null id')] $ inTagsSimple "title" (inlinesToDocbook opts title) $$ vcat (map (elementToDocbook opts (lvl + 1)) elements') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 441392918..4809d2a14 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE ScopedTypeVariables, PatternGuards #-} +{-# LANGUAGE ScopedTypeVariables, PatternGuards, ViewPatterns #-} {- Copyright (C) 2012-2014 John MacFarlane <jgm@berkeley.edu> @@ -29,7 +29,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to docx. -} module Text.Pandoc.Writers.Docx ( writeDocx ) where -import Data.List ( intercalate, isPrefixOf, isSuffixOf, stripPrefix ) +import Data.List ( intercalate, isPrefixOf, isSuffixOf ) import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BL8 @@ -54,6 +54,8 @@ import Text.Pandoc.Walk import Text.Highlighting.Kate.Types () import Text.XML.Light as XML import Text.TeXMath +import Text.Pandoc.Readers.Docx.StyleMap +import Text.Pandoc.Readers.Docx.Util (elemName) import Control.Monad.State import Text.Highlighting.Kate import Data.Unique (hashUnique, newUnique) @@ -63,8 +65,8 @@ import qualified Control.Exception as E import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef, extensionFromMimeType) import Control.Applicative ((<$>), (<|>), (<*>)) -import Data.Maybe (fromMaybe, mapMaybe) -import Data.Char (isDigit) +import Data.Maybe (fromMaybe, mapMaybe, maybeToList) +import Data.Char (ord) data ListMarker = NoMarker | BulletMarker @@ -106,8 +108,9 @@ data WriterState = WriterState{ , stChangesAuthor :: String , stChangesDate :: String , stPrintWidth :: Integer - , stHeadingStyles :: [(Int,String)] + , stStyleMaps :: StyleMaps , stFirstPara :: Bool + , stTocTitle :: [Inline] } defaultWriterState :: WriterState @@ -127,8 +130,9 @@ defaultWriterState = WriterState{ , stChangesAuthor = "unknown" , stChangesDate = "1969-12-31T19:00:00Z" , stPrintWidth = 1 - , stHeadingStyles = [] + , stStyleMaps = defaultStyleMaps , stFirstPara = False + , stTocTitle = normalizeInlines [Str "Table of Contents"] } type WS a = StateT WriterState IO a @@ -175,13 +179,36 @@ renumId f renumMap e renumIds :: (QName -> Bool) -> (M.Map String String) -> [Element] -> [Element] renumIds f renumMap = map (renumId f renumMap) +-- | Certain characters are invalid in XML even if escaped. +-- See #1992 +stripInvalidChars :: Pandoc -> Pandoc +stripInvalidChars = bottomUp (filter isValidChar) + +-- | See XML reference +isValidChar :: Char -> Bool +isValidChar (ord -> c) + | c == 0x9 = True + | c == 0xA = True + | c == 0xD = True + | 0x20 <= c && c <= 0xD7FF = True + | 0xE000 <= c && c <= 0xFFFD = True + | 0x10000 <= c && c <= 0x10FFFF = True + | otherwise = False + +metaValueToInlines :: MetaValue -> [Inline] +metaValueToInlines (MetaString s) = normalizeInlines [Str s] +metaValueToInlines (MetaInlines ils) = ils +metaValueToInlines (MetaBlocks bs) = query return bs +metaValueToInlines (MetaBool b) = [Str $ show b] +metaValueToInlines _ = [] + -- | Produce an Docx file from a Pandoc document. writeDocx :: WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert -> IO BL.ByteString writeDocx opts doc@(Pandoc meta _) = do let datadir = writerUserDataDir opts - let doc' = walk fixDisplayMath doc + let doc' = stripInvalidChars . walk fixDisplayMath $ doc username <- lookup "USERNAME" <$> getEnvironment utctime <- getCurrentTime refArchive <- liftM (toArchive . toLazy) $ @@ -215,32 +242,18 @@ writeDocx opts doc@(Pandoc meta _) = do styledoc <- parseXml refArchive distArchive stylepath -- parse styledoc for heading styles - let styleNamespaces = map ((,) <$> qName . attrKey <*> attrVal) . - filter ((==Just "xmlns") . qPrefix . attrKey) . - elAttribs $ styledoc - let headingStyles = - let - mywURI = lookup "w" styleNamespaces - myName name = QName name mywURI (Just "w") - getAttrStyleId = findAttr (myName "styleId") - getNameVal = findChild (myName "name") >=> findAttr (myName "val") - getNum s | not $ null s, all isDigit s = Just (read s :: Int) - | otherwise = Nothing - getEngHeader = getAttrStyleId >=> stripPrefix "Heading" >=> getNum - getIntHeader = getNameVal >=> stripPrefix "heading " >=> getNum - toTuple getF = liftM2 (,) <$> getF <*> getAttrStyleId - toMap getF = mapMaybe (toTuple getF) $ - findChildren (myName "style") styledoc - select a b | not $ null a = a - | otherwise = b - in - select (toMap getEngHeader) (toMap getIntHeader) + let styleMaps = getStyleMaps styledoc + + let tocTitle = fromMaybe (stTocTitle defaultWriterState) $ + metaValueToInlines <$> lookupMeta "toc-title" meta ((contents, footnotes), st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc') defaultWriterState{ stChangesAuthor = fromMaybe "unknown" username , stChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime , stPrintWidth = (maybe 420 (\x -> quot x 20) pgContentWidth) - , stHeadingStyles = headingStyles} + , stStyleMaps = styleMaps + , stTocTitle = tocTitle + } let epochtime = floor $ utcTimeToPOSIXSeconds utctime let imgs = M.elems $ stImages st @@ -393,9 +406,18 @@ writeDocx opts doc@(Pandoc meta _) = do linkrels -- styles - let newstyles = styleToOpenXml $ writerHighlightStyle opts - let styledoc' = styledoc{ elContent = elContent styledoc ++ - [Elem x | x <- newstyles, writerHighlight opts] } + let newstyles = styleToOpenXml styleMaps $ writerHighlightStyle opts + let styledoc' = styledoc{ elContent = modifyContent (elContent styledoc) } + where + modifyContent + | writerHighlight opts = (++ map Elem newstyles) + | otherwise = filter notTokStyle + notTokStyle (Elem el) = notStyle el || notTokId el + notTokStyle _ = True + notStyle = (/= elemName' "style") . elName + notTokId = maybe True (`notElem` tokStys) . findAttr (elemName' "styleId") + tokStys = "SourceCode" : map show (enumFromTo KeywordTok NormalTok) + elemName' = elemName (sNameSpaces styleMaps) "w" let styleEntry = toEntry stylepath epochtime $ renderXml styledoc' -- construct word/numbering.xml @@ -440,6 +462,17 @@ writeDocx opts doc@(Pandoc meta _) = do ] let relsEntry = toEntry relsPath epochtime $ renderXml rels + -- we use dist archive for settings.xml, because Word sometimes + -- adds references to footnotes or endnotes we don't have... + -- we do, however, copy some settings over from reference + let settingsPath = "word/settings.xml" + settingsList = [ "w:autoHyphenation" + , "w:consecutiveHyphenLimit" + , "w:hyphenationZone" + , "w:doNotHyphenateCap" + ] + settingsEntry <- copyChildren refArchive distArchive settingsPath epochtime settingsList + let entryFromArchive arch path = maybe (fail $ path ++ " corrupt or missing in reference docx") return @@ -447,9 +480,6 @@ writeDocx opts doc@(Pandoc meta _) = do docPropsAppEntry <- entryFromArchive refArchive "docProps/app.xml" themeEntry <- entryFromArchive refArchive "word/theme/theme1.xml" fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml" - -- we use dist archive for settings.xml, because Word sometimes - -- adds references to footnotes or endnotes we don't have... - settingsEntry <- entryFromArchive distArchive "word/settings.xml" webSettingsEntry <- entryFromArchive refArchive "word/webSettings.xml" headerFooterEntries <- mapM (entryFromArchive refArchive) $ mapMaybe (fmap ("word/" ++) . extractTarget) @@ -472,10 +502,13 @@ writeDocx opts doc@(Pandoc meta _) = do miscRelEntries ++ otherMediaEntries return $ fromArchive archive -styleToOpenXml :: Style -> [Element] -styleToOpenXml style = parStyle : map toStyle alltoktypes +styleToOpenXml :: StyleMaps -> Style -> [Element] +styleToOpenXml sm style = + maybeToList parStyle ++ mapMaybe toStyle alltoktypes where alltoktypes = enumFromTo KeywordTok NormalTok - toStyle toktype = mknode "w:style" [("w:type","character"), + toStyle toktype | hasStyleName (show toktype) (sCharStyleMap sm) = Nothing + | otherwise = Just $ + mknode "w:style" [("w:type","character"), ("w:customStyle","1"),("w:styleId",show toktype)] [ mknode "w:name" [("w:val",show toktype)] () , mknode "w:basedOn" [("w:val","VerbatimChar")] () @@ -496,17 +529,35 @@ styleToOpenXml style = parStyle : map toStyle alltoktypes tokBg toktype = maybe "auto" (drop 1 . fromColor) $ (tokenBackground =<< lookup toktype tokStyles) `mplus` backgroundColor style - parStyle = mknode "w:style" [("w:type","paragraph"), + parStyle | hasStyleName "Source Code" (sParaStyleMap sm) = Nothing + | otherwise = Just $ + mknode "w:style" [("w:type","paragraph"), ("w:customStyle","1"),("w:styleId","SourceCode")] [ mknode "w:name" [("w:val","Source Code")] () , mknode "w:basedOn" [("w:val","Normal")] () , mknode "w:link" [("w:val","VerbatimChar")] () , mknode "w:pPr" [] $ mknode "w:wordWrap" [("w:val","off")] () + : mknode "w:noProof" [] () : ( maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill",drop 1 $ fromColor col)] ()]) $ backgroundColor style ) ] +copyChildren :: Archive -> Archive -> String -> Integer -> [String] -> IO Entry +copyChildren refArchive distArchive path timestamp elNames = do + ref <- parseXml refArchive distArchive path + dist <- parseXml distArchive distArchive path + return $ toEntry path timestamp $ renderXml dist{ + elContent = elContent dist ++ copyContent ref + } + where + strName QName{qName=name, qPrefix=prefix} + | Just p <- prefix = p++":"++name + | otherwise = name + shouldCopy = (`elem` elNames) . strName + cleanElem el@Element{elName=name} = Elem el{elName=name{qURI=Nothing}} + copyContent = map cleanElem . filterChildrenName shouldCopy + -- this is the lowest number used for a list numId baseListId :: Int baseListId = 1000 @@ -584,6 +635,34 @@ mkLvl marker lvl = getNumId :: WS Int getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists +makeTOC :: WriterOptions -> WS [Element] +makeTOC opts | writerTableOfContents opts = do + let depth = "1-"++(show (writerTOCDepth opts)) + let tocCmd = "TOC \\o \""++depth++"\" \\h \\z \\u" + tocTitle <- gets stTocTitle + title <- withParaPropM (pStyleM "TOC Heading") (blocksToOpenXML opts [Para tocTitle]) + return $ + [mknode "w:sdt" [] ([ + mknode "w:sdtPr" [] ( + mknode "w:docPartObj" [] ( + [mknode "w:docPartGallery" [("w:val","Table of Contents")] (), + mknode "w:docPartUnique" [] ()] + ) -- w:docPartObj + ), -- w:sdtPr + mknode "w:sdtContent" [] (title++[ + mknode "w:p" [] ( + mknode "w:r" [] ([ + mknode "w:fldChar" [("w:fldCharType","begin"),("w:dirty","true")] (), + mknode "w:instrText" [("xml:space","preserve")] tocCmd, + mknode "w:fldChar" [("w:fldCharType","separate")] (), + mknode "w:fldChar" [("w:fldCharType","end")] () + ]) -- w:r + ) -- w:p + ]) + ])] -- w:sdt +makeTOC _ = return [] + + -- | Convert Pandoc document to two lists of -- OpenXML elements (the main document and footnotes). writeOpenXML :: WriterOptions -> Pandoc -> WS ([Element], [Element]) @@ -602,32 +681,45 @@ writeOpenXML opts (Pandoc meta blocks) = do Just (MetaBlocks [Para xs]) -> xs Just (MetaInlines xs) -> xs _ -> [] - title <- withParaProp (pStyle "Title") $ blocksToOpenXML opts [Para tit | not (null tit)] - subtitle <- withParaProp (pStyle "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')] - authors <- withParaProp (pStyle "Author") $ blocksToOpenXML opts $ + title <- withParaPropM (pStyleM "Title") $ blocksToOpenXML opts [Para tit | not (null tit)] + subtitle <- withParaPropM (pStyleM "Subtitle") $ blocksToOpenXML opts [Para subtitle' | not (null subtitle')] + authors <- withParaProp (pCustomStyle "Author") $ blocksToOpenXML opts $ map Para auths - date <- withParaProp (pStyle "Date") $ blocksToOpenXML opts [Para dat | not (null dat)] + date <- withParaPropM (pStyleM "Date") $ blocksToOpenXML opts [Para dat | not (null dat)] abstract <- if null abstract' then return [] - else withParaProp (pStyle "Abstract") $ blocksToOpenXML opts abstract' + else withParaProp (pCustomStyle "Abstract") $ blocksToOpenXML opts abstract' let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs convertSpace xs = xs let blocks' = bottomUp convertSpace blocks doc' <- (setFirstPara >> blocksToOpenXML opts blocks') notes' <- reverse `fmap` gets stFootnotes - let meta' = title ++ subtitle ++ authors ++ date ++ abstract + toc <- makeTOC opts + let meta' = title ++ subtitle ++ authors ++ date ++ abstract ++ toc return (meta' ++ doc', notes') -- | Convert a list of Pandoc blocks to OpenXML. blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element] blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls -pStyle :: String -> Element -pStyle sty = mknode "w:pStyle" [("w:val",sty)] () +pCustomStyle :: String -> Element +pCustomStyle sty = mknode "w:pStyle" [("w:val",sty)] () + +pStyleM :: String -> WS XML.Element +pStyleM styleName = do + styleMaps <- gets stStyleMaps + let sty' = getStyleId styleName $ sParaStyleMap styleMaps + return $ mknode "w:pStyle" [("w:val",sty')] () + +rCustomStyle :: String -> Element +rCustomStyle sty = mknode "w:rStyle" [("w:val",sty)] () -rStyle :: String -> Element -rStyle sty = mknode "w:rStyle" [("w:val",sty)] () +rStyleM :: String -> WS XML.Element +rStyleM styleName = do + styleMaps <- gets stStyleMaps + let sty' = getStyleId styleName $ sCharStyleMap styleMaps + return $ mknode "w:rStyle" [("w:val",sty')] () getUniqueId :: MonadIO m => m String -- the + 20 is to ensure that there are no clashes with the rIds @@ -641,13 +733,12 @@ blockToOpenXML opts (Div (_,["references"],_) bs) = do let (hs, bs') = span isHeaderBlock bs header <- blocksToOpenXML opts hs -- We put the Bibliography style on paragraphs after the header - rest <- withParaProp (pStyle "Bibliography") $ blocksToOpenXML opts bs' + rest <- withParaPropM (pStyleM "Bibliography") $ blocksToOpenXML opts bs' return (header ++ rest) blockToOpenXML opts (Div _ bs) = blocksToOpenXML opts bs blockToOpenXML opts (Header lev (ident,_,_) lst) = do setFirstPara - headingStyles <- gets stHeadingStyles - paraProps <- maybe id (withParaProp . pStyle) (lookup lev headingStyles) $ + paraProps <- withParaPropM (pStyleM ("Heading "++show lev)) $ getParaProps False contents <- inlinesToOpenXML opts lst usedIdents <- gets stSectionIds @@ -660,26 +751,32 @@ blockToOpenXML opts (Header lev (ident,_,_) lst) = do ,("w:name",bookmarkName)] () let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] () return [mknode "w:p" [] (paraProps ++ [bookmarkStart, bookmarkEnd] ++ contents)] -blockToOpenXML opts (Plain lst) = withParaProp (pStyle "Compact") +blockToOpenXML opts (Plain lst) = withParaProp (pCustomStyle "Compact") $ blockToOpenXML opts (Para lst) -- title beginning with fig: indicates that the image is a figure blockToOpenXML opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = do setFirstPara + pushParaProp $ pCustomStyle $ + if null alt + then "Figure" + else "FigureWithCaption" paraProps <- getParaProps False + popParaProp contents <- inlinesToOpenXML opts [Image alt (src,tit)] - captionNode <- withParaProp (pStyle "ImageCaption") + captionNode <- withParaProp (pCustomStyle "ImageCaption") $ blockToOpenXML opts (Para alt) return $ mknode "w:p" [] (paraProps ++ contents) : captionNode -- fixDisplayMath sometimes produces a Para [] as artifact blockToOpenXML _ (Para []) = return [] blockToOpenXML opts (Para lst) = do - isFirstPara <- gets stFirstPara + isFirstPara <- gets stFirstPara paraProps <- getParaProps $ case lst of [Math DisplayMath _] -> True _ -> False + bodyTextStyle <- pStyleM "Body Text" let paraProps' = case paraProps of - [] | isFirstPara -> [mknode "w:pPr" [] [(pStyle "FirstParagraph")]] - [] -> [mknode "w:pPr" [] [(pStyle "BodyText")]] + [] | isFirstPara -> [mknode "w:pPr" [] [pCustomStyle "FirstParagraph"]] + [] -> [mknode "w:pPr" [] [bodyTextStyle]] ps -> ps modify $ \s -> s { stFirstPara = False } contents <- inlinesToOpenXML opts lst @@ -688,11 +785,11 @@ blockToOpenXML _ (RawBlock format str) | format == Format "openxml" = return [ x | Elem x <- parseXML str ] | otherwise = return [] blockToOpenXML opts (BlockQuote blocks) = do - p <- withParaProp (pStyle "BlockQuote") $ blocksToOpenXML opts blocks + p <- withParaPropM (pStyleM "Block Text") $ blocksToOpenXML opts blocks setFirstPara return p blockToOpenXML opts (CodeBlock attrs str) = do - p <- withParaProp (pStyle "SourceCode") $ (blockToOpenXML opts $ Para [Code attrs str]) + p <- withParaProp (pCustomStyle "SourceCode") (blockToOpenXML opts $ Para [Code attrs str]) setFirstPara return p blockToOpenXML _ HorizontalRule = do @@ -707,7 +804,7 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do let captionStr = stringify caption caption' <- if null caption then return [] - else withParaProp (pStyle "TableCaption") + else withParaProp (pCustomStyle "TableCaption") $ blockToOpenXML opts (Para caption) let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] () let cellToOpenXML (al, cell) = withParaProp (alignmentFor al) @@ -718,32 +815,36 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do [ mknode "w:tcBorders" [] $ mknode "w:bottom" [("w:val","single")] () , mknode "w:vAlign" [("w:val","bottom")] () ] - let emptyCell = [mknode "w:p" [] [mknode "w:pPr" [] - [mknode "w:pStyle" [("w:val","Compact")] ()]]] + let emptyCell = [mknode "w:p" [] [pCustomStyle "Compact"]] let mkcell border contents = mknode "w:tc" [] $ [ borderProps | border ] ++ if null contents then emptyCell else contents - let mkrow border cells = mknode "w:tr" [] $ map (mkcell border) cells + let mkrow border cells = mknode "w:tr" [] $ + [mknode "w:trPr" [] [ + mknode "w:cnfStyle" [("w:firstRow","1")] ()] | border] + ++ map (mkcell border) cells let textwidth = 7920 -- 5.5 in in twips, 1/20 pt let fullrow = 5000 -- 100% specified in pct let rowwidth = fullrow * sum widths let mkgridcol w = mknode "w:gridCol" [("w:w", show (floor (textwidth * w) :: Integer))] () + let hasHeader = not (all null headers) return $ caption' ++ [mknode "w:tbl" [] ( mknode "w:tblPr" [] ( mknode "w:tblStyle" [("w:val","TableNormal")] () : mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () : + mknode "w:tblLook" [("w:firstRow","1") | hasHeader ] () : [ mknode "w:tblCaption" [("w:val", captionStr)] () | not (null caption) ] ) : mknode "w:tblGrid" [] (if all (==0) widths then [] else map mkgridcol widths) - : [ mkrow True headers' | not (all null headers) ] ++ + : [ mkrow True headers' | hasHeader ] ++ map (mkrow False) rows' )] blockToOpenXML opts (BulletList lst) = do @@ -767,9 +868,9 @@ blockToOpenXML opts (DefinitionList items) = do definitionListItemToOpenXML :: WriterOptions -> ([Inline],[[Block]]) -> WS [Element] definitionListItemToOpenXML opts (term,defs) = do - term' <- withParaProp (pStyle "DefinitionTerm") + term' <- withParaProp (pCustomStyle "DefinitionTerm") $ blockToOpenXML opts (Para term) - defs' <- withParaProp (pStyle "Definition") + defs' <- withParaProp (pCustomStyle "Definition") $ concat `fmap` mapM (blocksToOpenXML opts) defs return $ term' ++ defs' @@ -833,6 +934,9 @@ withTextProp d p = do popTextProp return res +withTextPropM :: WS Element -> WS a -> WS a +withTextPropM = (. flip withTextProp) . (>>=) + getParaProps :: Bool -> WS [Element] getParaProps displayMathPara = do props <- gets stParaProperties @@ -861,6 +965,9 @@ withParaProp d p = do popParaProp return res +withParaPropM :: WS Element -> WS a -> WS a +withParaPropM = (. flip withParaProp) . (>>=) + formattedString :: String -> WS [Element] formattedString str = do props <- getTextProps @@ -943,25 +1050,26 @@ inlineToOpenXML opts (Math mathType str) = do Right r -> return [r] Left _ -> inlinesToOpenXML opts (texMathToInlines mathType str) inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst -inlineToOpenXML opts (Code attrs str) = - withTextProp (rStyle "VerbatimChar") - $ if writerHighlight opts - then case highlight formatOpenXML attrs str of - Nothing -> unhighlighted - Just h -> return h - else unhighlighted - where unhighlighted = intercalate [br] `fmap` - (mapM formattedString $ lines str) - formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) - toHlTok (toktype,tok) = mknode "w:r" [] - [ mknode "w:rPr" [] - [ rStyle $ show toktype ] - , mknode "w:t" [("xml:space","preserve")] tok ] +inlineToOpenXML opts (Code attrs str) = do + let unhighlighted = intercalate [br] `fmap` + (mapM formattedString $ lines str) + formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) + toHlTok (toktype,tok) = mknode "w:r" [] + [ mknode "w:rPr" [] + [ rCustomStyle (show toktype) ] + , mknode "w:t" [("xml:space","preserve")] tok ] + withTextProp (rCustomStyle "VerbatimChar") + $ if writerHighlight opts + then case highlight formatOpenXML attrs str of + Nothing -> unhighlighted + Just h -> return h + else unhighlighted inlineToOpenXML opts (Note bs) = do notes <- gets stFootnotes notenum <- getUniqueId + footnoteStyle <- rStyleM "Footnote Reference" let notemarker = mknode "w:r" [] - [ mknode "w:rPr" [] (rStyle "FootnoteRef") + [ mknode "w:rPr" [] footnoteStyle , mknode "w:footnoteRef" [] () ] let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : ils) : xs @@ -971,22 +1079,22 @@ inlineToOpenXML opts (Note bs) = do oldParaProperties <- gets stParaProperties oldTextProperties <- gets stTextProperties modify $ \st -> st{ stListLevel = -1, stParaProperties = [], stTextProperties = [] } - contents <- withParaProp (pStyle "FootnoteText") $ blocksToOpenXML opts + contents <- withParaPropM (pStyleM "Footnote Text") $ blocksToOpenXML opts $ insertNoteRef bs modify $ \st -> st{ stListLevel = oldListLevel, stParaProperties = oldParaProperties, stTextProperties = oldTextProperties } let newnote = mknode "w:footnote" [("w:id", notenum)] $ contents modify $ \s -> s{ stFootnotes = newnote : notes } return [ mknode "w:r" [] - [ mknode "w:rPr" [] (rStyle "FootnoteRef") + [ mknode "w:rPr" [] footnoteStyle , mknode "w:footnoteReference" [("w:id", notenum)] () ] ] -- internal link: inlineToOpenXML opts (Link txt ('#':xs,_)) = do - contents <- withTextProp (rStyle "Link") $ inlinesToOpenXML opts txt + contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt return [ mknode "w:hyperlink" [("w:anchor",xs)] contents ] -- external link: inlineToOpenXML opts (Link txt (src,_)) = do - contents <- withTextProp (rStyle "Link") $ inlinesToOpenXML opts txt + contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt extlinks <- gets stExternalLinks id' <- case M.lookup src extlinks of Just i -> return i @@ -1088,7 +1196,7 @@ defaultFootnotes = [ mknode "w:footnote" [ mknode "w:p" [] $ [ mknode "w:r" [] $ [ mknode "w:continuationSeparator" [] ()]]]] - + parseXml :: Archive -> Archive -> String -> IO Element parseXml refArchive distArchive relpath = case ((findEntryByPath relpath refArchive `mplus` diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 63c3b5501..29ea44e02 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -495,6 +495,9 @@ writeEPUB opts doc@(Pandoc meta _) = do [] -> "UNTITLED" (x:_) -> titleText x x -> stringify x + + let tocTitle = fromMaybe plainTitle $ + metaValueToString <$> lookupMeta "toc-title" meta let uuid = case epubIdentifier metadata of (x:_) -> identifierText x -- use first identifier as UUID [] -> error "epubIdentifier is null" -- shouldn't happen @@ -539,7 +542,7 @@ writeEPUB opts doc@(Pandoc meta _) = do map chapterRefNode chapterEntries) , unode "guide" $ [ unode "reference" ! - [("type","toc"),("title",plainTitle), + [("type","toc"),("title", tocTitle), ("href","nav.xhtml")] $ () ] ++ [ unode "reference" ! @@ -620,7 +623,7 @@ writeEPUB opts doc@(Pandoc meta _) = do let navBlocks = [RawBlock (Format "html") $ ppElement $ unode navtag ! ([("epub:type","toc") | epub3] ++ [("id","toc")]) $ - [ unode "h1" ! [("id","toc-title")] $ plainTitle + [ unode "h1" ! [("id","toc-title")] $ tocTitle , unode "ol" ! [("class","toc")] $ evalState (mapM (navPointNode navXhtmlFormatter) secs) 1]] let landmarks = if epub3 then [RawBlock (Format "html") $ ppElement $ diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index ef00ea036..53dc931cc 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -446,19 +446,25 @@ blockToHtml opts (Para lst) = do contents <- inlineListToHtml opts lst return $ H.p contents blockToHtml opts (Div attr@(_,classes,_) bs) = do - contents <- blockListToHtml opts bs + let speakerNotes = "notes" `elem` classes + -- we don't want incremental output inside speaker notes, see #1394 + let opts' = if speakerNotes then opts{ writerIncremental = False } else opts + contents <- blockListToHtml opts' bs let contents' = nl opts >> contents >> nl opts return $ - if "notes" `elem` classes - then let opts' = opts{ writerIncremental = False } in - -- we don't want incremental output inside speaker notes - case writerSlideVariant opts of + if speakerNotes + then case writerSlideVariant opts of RevealJsSlides -> addAttrs opts' attr $ H5.aside $ contents' NoSlides -> addAttrs opts' attr $ H.div $ contents' _ -> mempty else addAttrs opts attr $ H.div $ contents' -blockToHtml _ (RawBlock f str) +blockToHtml opts (RawBlock f str) | f == Format "html" = return $ preEscapedString str + | f == Format "latex" = + case writerHTMLMathMethod opts of + MathJax _ -> do modify (\st -> st{ stMath = True }) + return $ toHtml str + _ -> return mempty | otherwise = return mempty blockToHtml opts (HorizontalRule) = return $ if writerHtml5 opts then H5.hr else H.hr blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do @@ -769,6 +775,8 @@ inlineToHtml opts inline = case writerHTMLMathMethod opts of LaTeXMathML _ -> do modify (\st -> st {stMath = True}) return $ toHtml str + MathJax _ -> do modify (\st -> st {stMath = True}) + return $ toHtml str _ -> return mempty | f == Format "html" -> return $ preEscapedString str | otherwise -> return mempty diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 0e5ec5c18..58456e3ab 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -42,6 +42,7 @@ import Data.List ( (\\), isSuffixOf, isInfixOf, stripPrefix, isPrefixOf, intercalate, intersperse ) import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, ord ) import Data.Maybe ( fromMaybe ) +import Data.Aeson.Types ( (.:), parseMaybe, withObject ) import Control.Applicative ((<|>)) import Control.Monad.State import Text.Pandoc.Pretty @@ -102,8 +103,16 @@ pandocToLaTeX options (Pandoc meta blocks) = do modify $ \s -> s{ stInternalLinks = query isInternalLink blocks' } let template = writerTemplate options -- set stBook depending on documentclass + let colwidth = if writerWrapText options + then Just $ writerColumns options + else Nothing + metadata <- metaToJSON options + (fmap (render colwidth) . blockListToLaTeX) + (fmap (render colwidth) . inlineListToLaTeX) + meta let bookClasses = ["memoir","book","report","scrreprt","scrbook"] - case lookup "documentclass" (writerVariables options) of + case lookup "documentclass" (writerVariables options) `mplus` + parseMaybe (withObject "object" (.: "documentclass")) metadata of Just x | x `elem` bookClasses -> modify $ \s -> s{stBook = True} | otherwise -> return () Nothing | any (\x -> "\\documentclass" `isPrefixOf` x && @@ -114,13 +123,6 @@ pandocToLaTeX options (Pandoc meta blocks) = do -- \enquote{...} for smart quotes: when ("{csquotes}" `isInfixOf` template) $ modify $ \s -> s{stCsquotes = True} - let colwidth = if writerWrapText options - then Just $ writerColumns options - else Nothing - metadata <- metaToJSON options - (fmap (render colwidth) . blockListToLaTeX) - (fmap (render colwidth) . inlineListToLaTeX) - meta let (blocks'', lastHeader) = if writerCiteMethod options == Citeproc then (blocks', []) else case last blocks' of @@ -701,7 +703,7 @@ inlineListToLaTeX lst = ("\\\\[" ++ show (length lbs) ++ "\\baselineskip]") : fixBreaks rest fixBreaks (y:ys) = y : fixBreaks ys - + isQuoted :: Inline -> Bool isQuoted (Quoted _ _) = True isQuoted _ = False @@ -750,10 +752,11 @@ inlineToLaTeX (Cite cits lst) = do inlineToLaTeX (Code (_,classes,_) str) = do opts <- gets stOptions + inHeading <- gets stInHeading case () of - _ | writerListings opts -> listingsCode + _ | writerListings opts && not inHeading -> listingsCode | writerHighlight opts && not (null classes) -> highlightCode - | otherwise -> rawCode + | otherwise -> rawCode where listingsCode = do inNote <- gets stInNote when inNote $ modify $ \s -> s{ stVerbInNote = True } diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index d71f0daf8..dee4d56a4 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -57,12 +57,15 @@ import qualified Data.Text as T type Notes = [[Block]] type Refs = [([Inline], Target)] -data WriterState = WriterState { stNotes :: Notes - , stRefs :: Refs - , stIds :: [String] - , stPlain :: Bool } +data WriterState = WriterState { stNotes :: Notes + , stRefs :: Refs + , stRefShortcutable :: Bool + , stInList :: Bool + , stIds :: [String] + , stPlain :: Bool } instance Default WriterState - where def = WriterState{ stNotes = [], stRefs = [], stIds = [], stPlain = False } + where def = WriterState{ stNotes = [], stRefs = [], stRefShortcutable = True, + stInList = False, stIds = [], stPlain = False } -- | Convert Pandoc to Markdown. writeMarkdown :: WriterOptions -> Pandoc -> String @@ -453,7 +456,7 @@ blockToMarkdown opts t@(Table caption aligns widths headers rows) = do $ Pandoc nullMeta [t] return $ nst $ tbl $$ blankline $$ caption'' $$ blankline blockToMarkdown opts (BulletList items) = do - contents <- mapM (bulletListItemToMarkdown opts) items + contents <- inList $ mapM (bulletListItemToMarkdown opts) items return $ cat contents <> blankline blockToMarkdown opts (OrderedList (start,sty,delim) items) = do let start' = if isEnabled Ext_startnum opts then start else 1 @@ -464,13 +467,22 @@ blockToMarkdown opts (OrderedList (start,sty,delim) items) = do let markers' = map (\m -> if length m < 3 then m ++ replicate (3 - length m) ' ' else m) markers - contents <- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $ + contents <- inList $ + mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $ zip markers' items return $ cat contents <> blankline blockToMarkdown opts (DefinitionList items) = do - contents <- mapM (definitionListItemToMarkdown opts) items + contents <- inList $ mapM (definitionListItemToMarkdown opts) items return $ cat contents <> blankline +inList :: State WriterState a -> State WriterState a +inList p = do + oldInList <- gets stInList + modify $ \st -> st{ stInList = True } + res <- p + modify $ \st -> st{ stInList = oldInList } + return res + addMarkdownAttribute :: String -> String addMarkdownAttribute s = case span isTagText $ reverse $ parseTags s of @@ -497,7 +509,12 @@ pipeTable headless aligns rawHeaders rawRows = do AlignCenter -> ':':replicate w '-' ++ ":" AlignRight -> replicate (w + 1) '-' ++ ":" AlignDefault -> replicate (w + 2) '-' - let header = if headless then empty else torow rawHeaders + -- note: pipe tables can't completely lack a + -- header; for a headerless table, we need a header of empty cells. + -- see jgm/pandoc#1996. + let header = if headless + then torow (replicate (length aligns) empty) + else torow rawHeaders let border = nowrap $ text "|" <> hcat (intersperse (text "|") $ map toborder $ zip aligns widths) <> text "|" let body = vcat $ map torow rawRows @@ -677,12 +694,53 @@ getReference label (src, tit) = do -- | Convert list of Pandoc inline elements to markdown. inlineListToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc -inlineListToMarkdown opts lst = - mapM (inlineToMarkdown opts) (avoidBadWraps lst) >>= return . cat - where avoidBadWraps [] = [] - avoidBadWraps (Space:Str (c:cs):xs) - | c `elem` ("-*+>" :: String) = Str (' ':c:cs) : avoidBadWraps xs - avoidBadWraps (x:xs) = x : avoidBadWraps xs +inlineListToMarkdown opts lst = do + inlist <- gets stInList + go (if inlist then avoidBadWrapsInList lst else lst) + where go [] = return empty + go (i:is) = case i of + (Link _ _) -> case is of + -- If a link is followed by another link or '[' we don't shortcut + (Link _ _):_ -> unshortcutable + Space:(Link _ _):_ -> unshortcutable + Space:(Str('[':_)):_ -> unshortcutable + Space:(RawInline _ ('[':_)):_ -> unshortcutable + Space:(Cite _ _):_ -> unshortcutable + (Cite _ _):_ -> unshortcutable + Str ('[':_):_ -> unshortcutable + (RawInline _ ('[':_)):_ -> unshortcutable + (RawInline _ (' ':'[':_)):_ -> unshortcutable + _ -> shortcutable + _ -> shortcutable + where shortcutable = liftM2 (<>) (inlineToMarkdown opts i) (go is) + unshortcutable = do + iMark <- withState (\s -> s { stRefShortcutable = False }) + (inlineToMarkdown opts i) + modify (\s -> s {stRefShortcutable = True }) + fmap (iMark <>) (go is) + +avoidBadWrapsInList :: [Inline] -> [Inline] +avoidBadWrapsInList [] = [] +avoidBadWrapsInList (Space:Str ('>':cs):xs) = + Str (' ':'>':cs) : avoidBadWrapsInList xs +avoidBadWrapsInList (Space:Str [c]:[]) + | c `elem` ['-','*','+'] = Str [' ', c] : [] +avoidBadWrapsInList (Space:Str [c]:Space:xs) + | c `elem` ['-','*','+'] = Str [' ', c] : Space : avoidBadWrapsInList xs +avoidBadWrapsInList (Space:Str cs:Space:xs) + | isOrderedListMarker cs = Str (' ':cs) : Space : avoidBadWrapsInList xs +avoidBadWrapsInList (Space:Str cs:[]) + | isOrderedListMarker cs = Str (' ':cs) : [] +avoidBadWrapsInList (x:xs) = x : avoidBadWrapsInList xs + +isOrderedListMarker :: String -> Bool +isOrderedListMarker xs = (last xs `elem` ['.',')']) && + isRight (runParserT (anyOrderedListMarker >> eof) + defaultParserState "" xs) + +isRight :: Either a b -> Bool +isRight (Right _) = True +isRight (Left _) = False escapeSpaces :: Inline -> Inline escapeSpaces (Str s) = Str $ substitute " " "\\ " s @@ -692,8 +750,10 @@ escapeSpaces x = x -- | Convert Pandoc inline element to markdown. inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc inlineToMarkdown opts (Span attrs ils) = do + plain <- gets stPlain contents <- inlineListToMarkdown opts ils - return $ if isEnabled Ext_raw_html opts + return $ if not plain && + (isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts) then tagWithAttrs "span" attrs <> contents <> text "</span>" else contents inlineToMarkdown opts (Emph lst) = do @@ -726,13 +786,14 @@ inlineToMarkdown opts (Subscript lst) = do else "<sub>" <> contents <> "</sub>" inlineToMarkdown opts (SmallCaps lst) = do plain <- gets stPlain - if plain - then inlineListToMarkdown opts $ capitalize lst - else do + if not plain && + (isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts) + then do contents <- inlineListToMarkdown opts lst return $ tagWithAttrs "span" - ("",[],[("style","font-variant:small-caps;")]) + ("",[],[("style","font-variant:small-caps;")]) <> contents <> text "</span>" + else inlineListToMarkdown opts $ capitalize lst inlineToMarkdown opts (Quoted SingleQuote lst) = do contents <- inlineListToMarkdown opts lst return $ "‘" <> contents <> "’" @@ -838,6 +899,9 @@ inlineToMarkdown opts (Link txt (src, tit)) = do [Str s] | escapeURI s == srcSuffix -> True _ -> False let useRefLinks = writerReferenceLinks opts && not useAuto + shortcutable <- gets stRefShortcutable + let useShortcutRefLinks = shortcutable && + isEnabled Ext_shortcut_reference_links opts ref <- if useRefLinks then getReference txt (src, tit) else return [] reftext <- inlineListToMarkdown opts ref return $ if useAuto @@ -847,7 +911,9 @@ inlineToMarkdown opts (Link txt (src, tit)) = do else if useRefLinks then let first = "[" <> linktext <> "]" second = if txt == ref - then "[]" + then if useShortcutRefLinks + then "" + else "[]" else "[" <> reftext <> "]" in first <> second else if plain diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 2a4129512..81bbdaf3f 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -127,7 +127,7 @@ writeODT opts doc@(Pandoc meta _) = do return $ fromArchive archive'' transformPicMath :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline -transformPicMath opts entriesRef (Image lab (src,_)) = do +transformPicMath opts entriesRef (Image lab (src,t)) = do res <- fetchItem' (writerMediaBag opts) (writerSourceURL opts) src case res of Left (_ :: E.SomeException) -> do @@ -145,7 +145,9 @@ transformPicMath opts entriesRef (Image lab (src,_)) = do epochtime <- floor `fmap` getPOSIXTime let entry = toEntry newsrc epochtime $ toLazy img modifyIORef entriesRef (entry:) - return $ Image lab (newsrc, tit') + let fig | "fig:" `isPrefixOf` t = "fig:" + | otherwise = "" + return $ Image lab (newsrc, fig++tit') transformPicMath _ entriesRef (Math t math) = do entries <- readIORef entriesRef let dt = if t == InlineMath then DisplayInline else DisplayBlock diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 865b7fb35..aee656413 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -288,6 +288,8 @@ blockToOpenDocument o bs | Plain b <- bs = if null b then return empty else inParagraphTags =<< inlinesToOpenDocument o b + | Para [Image c (s,'f':'i':'g':':':t)] <- bs + = figure c s t | Para b <- bs = if null b then return empty else inParagraphTags =<< inlinesToOpenDocument o b @@ -334,7 +336,7 @@ blockToOpenDocument o bs mapM_ addParaStyle . newPara $ paraHStyles ++ paraStyles captionDoc <- if null c then return empty - else withParagraphStyle o "Caption" [Para c] + else withParagraphStyle o "TableCaption" [Para c] th <- if all null h then return empty else colHeadsToOpenDocument o name (map fst paraHStyles) h @@ -342,6 +344,12 @@ blockToOpenDocument o bs return $ inTags True "table:table" [ ("table:name" , name) , ("table:style-name", name) ] (vcat columns $$ th $$ vcat tr) $$ captionDoc + figure caption source title | null caption = + withParagraphStyle o "Figure" [Para [Image caption (source,title)]] + | otherwise = do + imageDoc <- withParagraphStyle o "FigureWithCaption" [Para [Image caption (source,title)]] + captionDoc <- withParagraphStyle o "FigureCaption" [Para caption] + return $ imageDoc $$ captionDoc colHeadsToOpenDocument :: WriterOptions -> String -> [String] -> [[Block]] -> State WriterState Doc colHeadsToOpenDocument o tn ns hs = @@ -553,4 +561,3 @@ textStyleAttr s ,("style:font-name-asian" ,"Courier New") ,("style:font-name-complex" ,"Courier New")] | otherwise = [] - diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 717a47000..2dd899680 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -48,11 +48,13 @@ import Data.Char (isSpace, toLower) type Refs = [([Inline], Target)] data WriterState = - WriterState { stNotes :: [[Block]] - , stLinks :: Refs - , stImages :: [([Inline], (String, String, Maybe String))] - , stHasMath :: Bool - , stOptions :: WriterOptions + WriterState { stNotes :: [[Block]] + , stLinks :: Refs + , stImages :: [([Inline], (String, String, Maybe String))] + , stHasMath :: Bool + , stHasRawTeX :: Bool + , stOptions :: WriterOptions + , stTopLevel :: Bool } -- | Convert Pandoc to RST. @@ -60,7 +62,8 @@ writeRST :: WriterOptions -> Pandoc -> String writeRST opts document = let st = WriterState { stNotes = [], stLinks = [], stImages = [], stHasMath = False, - stOptions = opts } + stHasRawTeX = False, stOptions = opts, + stTopLevel = True} in evalState (pandocToRST document) st -- | Return RST representation of document. @@ -78,23 +81,32 @@ pandocToRST (Pandoc meta blocks) = do (fmap (render colwidth) . blockListToRST) (fmap (trimr . render colwidth) . inlineListToRST) $ deleteMeta "title" $ deleteMeta "subtitle" meta - body <- blockListToRST blocks + body <- blockListToRST' True $ normalizeHeadings 1 blocks notes <- liftM (reverse . stNotes) get >>= notesToRST -- note that the notes may contain refs, so we do them first refs <- liftM (reverse . stLinks) get >>= refsToRST pics <- liftM (reverse . stImages) get >>= pictRefsToRST hasMath <- liftM stHasMath get + rawTeX <- liftM stHasRawTeX get let main = render colwidth $ foldl ($+$) empty $ [body, notes, refs, pics] let context = defField "body" main $ defField "toc" (writerTableOfContents opts) - $ defField "toc-depth" (writerTOCDepth opts) + $ defField "toc-depth" (show $ writerTOCDepth opts) $ defField "math" hasMath $ defField "title" (render Nothing title :: String) $ defField "math" hasMath + $ defField "rawtex" rawTeX $ metadata if writerStandalone opts then return $ renderTemplate' (writerTemplate opts) context else return main + where + normalizeHeadings lev (Header l a i:bs) = Header lev a i:normalizeHeadings (lev+1) cont ++ normalizeHeadings lev bs' + where (cont,bs') = break (headerLtEq l) bs + headerLtEq level (Header l' _ _) = l' <= level + headerLtEq _ _ = False + normalizeHeadings lev (b:bs) = b:normalizeHeadings lev bs + normalizeHeadings _ [] = [] -- | Return RST representation of reference key table. refsToRST :: Refs -> State WriterState Doc @@ -188,11 +200,21 @@ blockToRST (RawBlock f@(Format f') str) (nest 3 $ text str) $$ blankline blockToRST HorizontalRule = return $ blankline $$ "--------------" $$ blankline -blockToRST (Header level _ inlines) = do +blockToRST (Header level (name,classes,_) inlines) = do contents <- inlineListToRST inlines - let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1) - let border = text $ replicate (offset contents) headerChar - return $ nowrap $ contents $$ border $$ blankline + isTopLevel <- gets stTopLevel + if isTopLevel + then do + let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1) + let border = text $ replicate (offset contents) headerChar + return $ nowrap $ contents $$ border $$ blankline + else do + let rub = "rubric:: " <> contents + let name' | null name = empty + | otherwise = ":name: " <> text name + let cls | null classes = empty + | otherwise = ":class: " <> text (unwords classes) + return $ nowrap $ hang 3 ".. " (rub $$ name' $$ cls) $$ blankline blockToRST (CodeBlock (_,classes,kvs) str) = do opts <- stOptions <$> get let tabstop = writerTabStop opts @@ -294,9 +316,19 @@ definitionListItemToRST (label, defs) = do return $ label' $$ nest tabstop (nestle contents <> cr) -- | Convert list of Pandoc block elements to RST. +blockListToRST' :: Bool + -> [Block] -- ^ List of block elements + -> State WriterState Doc +blockListToRST' topLevel blocks = do + tl <- gets stTopLevel + modify (\s->s{stTopLevel=topLevel}) + res <- vcat `fmap` mapM blockToRST blocks + modify (\s->s{stTopLevel=tl}) + return res + blockListToRST :: [Block] -- ^ List of block elements -> State WriterState Doc -blockListToRST blocks = mapM blockToRST blocks >>= return . vcat +blockListToRST = blockListToRST' False -- | Convert list of Pandoc inline elements to RST. inlineListToRST :: [Inline] -> State WriterState Doc @@ -392,6 +424,9 @@ inlineToRST (Math t str) = do else blankline $$ (".. math:: " <> text str) $$ blankline inlineToRST (RawInline f x) | f == "rst" = return $ text x + | f == "latex" || f == "tex" = do + modify $ \st -> st{ stHasRawTeX = True } + return $ ":raw-latex:`" <> text x <> "`" | otherwise = return empty inlineToRST (LineBreak) = return cr -- there's no line break in RST (see Para) inlineToRST Space = return space |