diff options
author | John MacFarlane <jgm@berkeley.edu> | 2015-03-28 12:12:48 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2015-03-28 12:12:48 -0700 |
commit | 6a3a04c4280817df39519bf1d73eee3b9e0b3841 (patch) | |
tree | 304a696d66d3d8cfb6f1441086947c12f2cb1cb8 | |
parent | d744b83b61bc635419339b73b687b9280ee757fc (diff) | |
parent | ad39bc7009e320b3afb91a5683521eb1eccf0ef7 (diff) | |
download | pandoc-6a3a04c4280817df39519bf1d73eee3b9e0b3841.tar.gz |
Merge branch 'errortype' of https://github.com/mpickering/pandoc into mpickering-errortype
Conflicts:
benchmark/benchmark-pandoc.hs
src/Text/Pandoc/Readers/Markdown.hs
src/Text/Pandoc/Readers/Org.hs
src/Text/Pandoc/Readers/RST.hs
tests/Tests/Readers/LaTeX.hs
34 files changed, 376 insertions, 254 deletions
diff --git a/benchmark/benchmark-pandoc.hs b/benchmark/benchmark-pandoc.hs index 2a34696b9..5c0dab460 100644 --- a/benchmark/benchmark-pandoc.hs +++ b/benchmark/benchmark-pandoc.hs @@ -22,20 +22,22 @@ import System.Environment (getArgs) import Data.Monoid import Data.Maybe (mapMaybe) import Debug.Trace (trace) +import Text.Pandoc.Error +import Control.Applicative readerBench :: Pandoc - -> (String, ReaderOptions -> String -> IO Pandoc) + -> (String, ReaderOptions -> String -> IO (Either PandocError Pandoc)) -> Maybe Benchmark readerBench doc (name, reader) = case lookup name writers of Just (PureStringWriter writer) -> let inp = writer def{ writerWrapText = True} doc in return $ bench (name ++ " reader") $ nfIO $ - (reader def{ readerSmart = True }) inp + (fmap handleError <$> reader def{ readerSmart = True }) inp _ | name == "commonmark" -> let inp = writeMarkdown def{ writerWrapText = True} doc in return $ bench (name ++ " reader") $ nfIO $ - (reader def{ readerSmart = True }) inp + (fmap handleError <$> reader def{ readerSmart = True }) inp | otherwise -> trace ("\nCould not find writer for " ++ name ++ "\n") Nothing @@ -52,7 +54,7 @@ main = do defaultOptions args inp <- readFile "tests/testsuite.txt" let opts = def{ readerSmart = True } - let doc = readMarkdown opts inp + let doc = handleError $ readMarkdown opts inp let readers' = [(n,r) | (n, StringReader r) <- readers] let readerBs = mapMaybe (readerBench doc) $ filter (\(n,_) -> n /="haddock") readers' diff --git a/pandoc.cabal b/pandoc.cabal index d699eb83b..2edb0d80a 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -290,6 +290,7 @@ Library Text.Pandoc.Pretty, Text.Pandoc.Shared, Text.Pandoc.MediaBag, + Text.Pandoc.Error, Text.Pandoc.Readers.HTML, Text.Pandoc.Readers.LaTeX, Text.Pandoc.Readers.Markdown, @@ -72,6 +72,8 @@ import Control.Applicative ((<$>), (<|>)) import Text.Pandoc.Readers.Txt2Tags (getT2TMeta) import Data.Monoid +import Text.Pandoc.Error + type Transform = Pandoc -> Pandoc copyrightMessage :: String @@ -1275,17 +1277,17 @@ main = do then 0 else tabStop) - let handleIncludes' = if readerName' == "latex" || - readerName' == "latex+lhs" + let handleIncludes' :: String -> IO (Either PandocError String) + handleIncludes' = if readerName' `elem` ["latex", "latex+lhs"] then handleIncludes - else return - - (doc, media) <- - case reader of - StringReader r-> (, mempty) <$> - ( readSources >=> - handleIncludes' . convertTabs . intercalate "\n" >=> - r readerOpts ) sources + else return . Right + + (doc, media) <- fmap handleError $ + case reader of + StringReader r-> do + srcs <- convertTabs . intercalate "\n" <$> readSources sources + doc <- handleIncludes' srcs + either (return . Left) (\s -> fmap (,mempty) <$> r readerOpts s) doc ByteStringReader r -> readFiles sources >>= r readerOpts let writerOptions = def { writerStandalone = standalone', diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 435e60eb1..b36046a5e 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -166,8 +166,9 @@ import Text.Pandoc.Writers.Haddock import Text.Pandoc.Writers.Custom import Text.Pandoc.Templates import Text.Pandoc.Options -import Text.Pandoc.Shared (safeRead, warn) +import Text.Pandoc.Shared (safeRead, warn, mapLeft) import Text.Pandoc.MediaBag (MediaBag) +import Text.Pandoc.Error import Data.Aeson import qualified Data.ByteString.Lazy as BL import Data.List (intercalate) @@ -203,19 +204,22 @@ parseFormatSpec = parse formatSpec "" '-' -> Set.delete ext _ -> Set.insert ext -data Reader = StringReader (ReaderOptions -> String -> IO Pandoc) - | ByteStringReader (ReaderOptions -> BL.ByteString -> IO (Pandoc, MediaBag)) -mkStringReader :: (ReaderOptions -> String -> Pandoc) -> Reader +data Reader = StringReader (ReaderOptions -> String -> IO (Either PandocError Pandoc)) + | ByteStringReader (ReaderOptions -> BL.ByteString -> IO (Either PandocError (Pandoc,MediaBag))) + +mkStringReader :: (ReaderOptions -> String -> (Either PandocError Pandoc)) -> Reader mkStringReader r = StringReader (\o s -> return $ r o s) -mkStringReaderWithWarnings :: (ReaderOptions -> String -> (Pandoc, [String])) -> Reader +mkStringReaderWithWarnings :: (ReaderOptions -> String -> Either PandocError (Pandoc, [String])) -> Reader mkStringReaderWithWarnings r = StringReader $ \o s -> do - let (doc, warnings) = r o s - mapM_ warn warnings - return doc + case r o s of + Left err -> return $ Left err + Right (doc, warnings) -> do + mapM_ warn warnings + return (Right doc) -mkBSReader :: (ReaderOptions -> BL.ByteString -> (Pandoc, MediaBag)) -> Reader +mkBSReader :: (ReaderOptions -> BL.ByteString -> (Either PandocError (Pandoc, MediaBag))) -> Reader mkBSReader r = ByteStringReader (\o s -> return $ r o s) -- | Association list of formats and readers. @@ -360,8 +364,8 @@ class ToJSONFilter a => ToJsonFilter a where toJsonFilter :: a -> IO () toJsonFilter = toJSONFilter -readJSON :: ReaderOptions -> String -> Pandoc -readJSON _ = either error id . eitherDecode' . UTF8.fromStringLazy +readJSON :: ReaderOptions -> String -> Either PandocError Pandoc +readJSON _ = mapLeft ParseFailure . eitherDecode' . UTF8.fromStringLazy writeJSON :: WriterOptions -> Pandoc -> String writeJSON _ = UTF8.toStringLazy . encode diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs new file mode 100644 index 000000000..73d1e8f08 --- /dev/null +++ b/src/Text/Pandoc/Error.hs @@ -0,0 +1,64 @@ +{- +Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} +{- | + Module : Text.Pandoc.Error + Copyright : Copyright (C) 2006-2015 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +This module provides a standard way to deal with possible errors encounted +during parsing. + +-} +module Text.Pandoc.Error (PandocError(..), handleError) where + +import Text.Parsec.Error +import Text.Parsec.Pos hiding (Line) +import Text.Pandoc.Compat.Except + +type Input = String + +data PandocError = -- | Generic parse failure + ParseFailure String + -- | Error thrown by a Parsec parser + | ParsecError Input ParseError + deriving (Show) + + +instance Error PandocError where + strMsg = ParseFailure + + +-- | An unsafe method to handle `PandocError`s. +handleError :: Either PandocError a -> a +handleError (Right r) = r +handleError (Left err) = + case err of + ParseFailure string -> error string + ParsecError input err' -> + let errPos = errorPos err' + errLine = sourceLine errPos + errColumn = sourceColumn errPos + theline = (lines input ++ [""]) !! (errLine - 1) + in error $ "\nError at " ++ show err' ++ "\n" ++ + theline ++ "\n" ++ replicate (errColumn - 1) ' ' ++ + "^" + diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index 68b34dcf3..8f0a991ba 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -1,4 +1,5 @@ {-# LANGUAGE OverloadedStrings, ScopedTypeVariables #-} +{-# OPTIONS_GHC -fno-warn-type-defaults #-} {- Copyright (C) 2011-2014 John MacFarlane <jgm@berkeley.edu> @@ -38,8 +39,11 @@ import Control.Monad import Data.Bits import Data.Binary import Data.Binary.Get -import Text.Pandoc.Shared (safeRead) +import Text.Pandoc.Shared (safeRead, hush) import qualified Data.Map as M +import Text.Pandoc.Compat.Except +import Control.Monad.Trans +import Data.Maybe (fromMaybe) -- quick and dirty functions to get image sizes -- algorithms borrowed from wwwis.pl @@ -64,7 +68,7 @@ imageType img = case B.take 4 img of "%!PS" | (B.take 4 $ B.drop 1 $ B.dropWhile (/=' ') img) == "EPSF" -> return Eps - _ -> fail "Unknown image type" + _ -> (hush . Left) "Unknown image type" imageSize :: ByteString -> Maybe ImageSize imageSize img = do @@ -114,7 +118,7 @@ pngSize img = do ([w1,w2,w3,w4,h1,h2,h3,h4] :: [Integer]) -> return ((shift w1 24) + (shift w2 16) + (shift w3 8) + w4, (shift h1 24) + (shift h2 16) + (shift h3 8) + h4) - _ -> fail "PNG parse error" + _ -> (hush . Left) "PNG parse error" let (dpix, dpiy) = findpHYs rest'' return $ ImageSize { pxX = x, pxY = y, dpiX = dpix, dpiY = dpiy } @@ -143,7 +147,7 @@ gifSize img = do dpiX = 72, dpiY = 72 } - _ -> fail "GIF parse error" + _ -> (hush . Left) "GIF parse error" jpegSize :: ByteString -> Maybe ImageSize jpegSize img = do @@ -174,36 +178,37 @@ findJfifSize bs = do Just (c,bs'') | c >= '\xc0' && c <= '\xc3' -> do case map fromIntegral $ unpack $ B.take 4 $ B.drop 3 bs'' of [h1,h2,w1,w2] -> return (shift w1 8 + w2, shift h1 8 + h2) - _ -> fail "JPEG parse error" + _ -> (hush . Left) "JPEG parse error" Just (_,bs'') -> do case map fromIntegral $ unpack $ B.take 2 bs'' of [c1,c2] -> do let len = shift c1 8 + c2 -- skip variables findJfifSize $ B.drop len bs'' - _ -> fail "JPEG parse error" - Nothing -> fail "Did not find length record" + _ -> (hush . Left) "JPEG parse error" + Nothing -> (hush . Left) "Did not find length record" exifSize :: ByteString -> Maybe ImageSize -exifSize bs = runGet (Just <$> exifHeader bl) bl +exifSize bs = hush . runGet header $ bl where bl = BL.fromChunks [bs] + header = runExceptT $ exifHeader bl -- NOTE: It would be nicer to do -- runGet ((Just <$> exifHeader) <|> return Nothing) -- which would prevent pandoc from raising an error when an exif header can't -- be parsed. But we only get an Alternative instance for Get in binary 0.6, -- and binary 0.5 ships with ghc 7.6. -exifHeader :: BL.ByteString -> Get ImageSize +exifHeader :: BL.ByteString -> ExceptT String Get ImageSize exifHeader hdr = do - _app1DataSize <- getWord16be - exifHdr <- getWord32be - unless (exifHdr == 0x45786966) $ fail "Did not find exif header" - zeros <- getWord16be - unless (zeros == 0) $ fail "Expected zeros after exif header" + _app1DataSize <- lift getWord16be + exifHdr <- lift getWord32be + unless (exifHdr == 0x45786966) $ throwError "Did not find exif header" + zeros <- lift getWord16be + unless (zeros == 0) $ throwError "Expected zeros after exif header" -- beginning of tiff header -- we read whole thing to use -- in getting data from offsets: let tiffHeader = BL.drop 8 hdr - byteAlign <- getWord16be + byteAlign <- lift getWord16be let bigEndian = byteAlign == 0x4d4d let (getWord16, getWord32, getWord64) = if bigEndian @@ -213,17 +218,17 @@ exifHeader hdr = do num <- getWord32 den <- getWord32 return $ fromIntegral num / fromIntegral den - tagmark <- getWord16 - unless (tagmark == 0x002a) $ fail "Failed alignment sanity check" - ifdOffset <- getWord32 - skip (fromIntegral ifdOffset - 8) -- skip to IDF - numentries <- getWord16 - let ifdEntry = do - tag <- getWord16 >>= \t -> - maybe (return UnknownTagType) return - (M.lookup t tagTypeTable) - dataFormat <- getWord16 - numComponents <- getWord32 + tagmark <- lift getWord16 + unless (tagmark == 0x002a) $ throwError "Failed alignment sanity check" + ifdOffset <- lift getWord32 + lift $ skip (fromIntegral ifdOffset - 8) -- skip to IDF + numentries <- lift getWord16 + let ifdEntry :: ExceptT String Get (TagType, DataFormat) + ifdEntry = do + tag <- fromMaybe UnknownTagType . flip M.lookup tagTypeTable + <$> lift getWord16 + dataFormat <- lift getWord16 + numComponents <- lift getWord32 (fmt, bytesPerComponent) <- case dataFormat of 1 -> return (UnsignedByte . runGet getWord8, 1) @@ -238,9 +243,10 @@ exifHeader hdr = do 10 -> return (SignedRational . runGet getRational, 8) 11 -> return (SingleFloat . runGet getWord32 {- TODO -}, 4) 12 -> return (DoubleFloat . runGet getWord64 {- TODO -}, 8) - _ -> fail $ "Unknown data format " ++ show dataFormat + _ -> throwError $ "Unknown data format " ++ show dataFormat let totalBytes = fromIntegral $ numComponents * bytesPerComponent - payload <- if totalBytes <= 4 -- data is right here + payload <- lift $ + if totalBytes <= 4 -- data is right here then fmt <$> (getLazyByteString (fromIntegral totalBytes) <* skip (4 - totalBytes)) @@ -252,9 +258,9 @@ exifHeader hdr = do entries <- sequence $ replicate (fromIntegral numentries) ifdEntry subentries <- case lookup ExifOffset entries of Just (UnsignedLong offset) -> do - pos <- bytesRead - skip (fromIntegral offset - (fromIntegral pos - 8)) - numsubentries <- getWord16 + pos <- lift bytesRead + lift $ skip (fromIntegral offset - (fromIntegral pos - 8)) + numsubentries <- lift getWord16 sequence $ replicate (fromIntegral numsubentries) ifdEntry _ -> return [] diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index aebdcae4c..c18aa331f 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -190,6 +190,8 @@ import Control.Applicative ((<$>), (<*>), (*>), (<*), (<$)) import Data.Monoid import Data.Maybe (catMaybes) +import Text.Pandoc.Error + type Parser t s = Parsec t s type ParserT = ParsecT @@ -845,25 +847,16 @@ readWithM :: (Monad m, Functor m) => ParserT [Char] st m a -- ^ parser -> st -- ^ initial state -> String -- ^ input - -> m a + -> m (Either PandocError a) readWithM parser state input = - handleError <$> (runParserT parser state "source" input) - where - handleError (Left err') = - let errPos = errorPos err' - errLine = sourceLine errPos - errColumn = sourceColumn errPos - theline = (lines input ++ [""]) !! (errLine - 1) - in error $ "\nError at " ++ show err' ++ "\n" ++ - theline ++ "\n" ++ replicate (errColumn - 1) ' ' ++ - "^" - handleError (Right result) = result + mapLeft (ParsecError input) <$> runParserT parser state "source" input + -- | Parse a string with a given parser and state readWith :: Parser [Char] st a -> st -> String - -> a + -> Either PandocError a readWith p t inp = runIdentity $ readWithM p t inp returnWarnings :: (Stream s m c) diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs index 2f2656086..9a97dfc21 100644 --- a/src/Text/Pandoc/Pretty.hs +++ b/src/Text/Pandoc/Pretty.hs @@ -250,6 +250,11 @@ renderDoc :: (IsString a, Monoid a) => Doc -> DocState a renderDoc = renderList . toList . unDoc +data IsBlock = IsBlock Int [String] + +-- This would be nicer with a pattern synonym +-- pattern VBlock i s <- mkIsBlock -> Just (IsBlock ..) + renderList :: (IsString a, Monoid a) => [D] -> DocState a renderList [] = return () @@ -323,11 +328,11 @@ renderList (BreakingSpace : xs) = do outp 1 " " renderList xs' -renderList (b1@Block{} : b2@Block{} : xs) = - renderList (mergeBlocks False b1 b2 : xs) +renderList (Block i1 s1 : Block i2 s2 : xs) = + renderList (mergeBlocks False (IsBlock i1 s1) (IsBlock i2 s2) : xs) -renderList (b1@Block{} : BreakingSpace : b2@Block{} : xs) = - renderList (mergeBlocks True b1 b2 : xs) +renderList (Block i1 s1 : BreakingSpace : Block i2 s2 : xs) = + renderList (mergeBlocks True (IsBlock i1 s1) (IsBlock i2 s2) : xs) renderList (Block width lns : xs) = do st <- get @@ -339,15 +344,14 @@ renderList (Block width lns : xs) = do modify $ \s -> s{ prefix = oldPref } renderList xs -mergeBlocks :: Bool -> D -> D -> D -mergeBlocks addSpace (Block w1 lns1) (Block w2 lns2) = +mergeBlocks :: Bool -> IsBlock -> IsBlock -> D +mergeBlocks addSpace (IsBlock w1 lns1) (IsBlock w2 lns2) = Block (w1 + w2 + if addSpace then 1 else 0) $ zipWith (\l1 l2 -> pad w1 l1 ++ l2) (lns1 ++ empties) (map sp lns2 ++ empties) where empties = replicate (abs $ length lns1 - length lns2) "" pad n s = s ++ replicate (n - realLength s) ' ' sp "" = "" sp xs = if addSpace then (' ' : xs) else xs -mergeBlocks _ _ _ = error "mergeBlocks tried on non-Block!" blockToDoc :: Int -> [String] -> Doc blockToDoc _ lns = text $ intercalate "\n" lns diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index f8a2ec28e..51a35c8ad 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -37,10 +37,11 @@ import Data.Text (unpack, pack) import Data.List (groupBy) import Text.Pandoc.Definition import Text.Pandoc.Options +import Text.Pandoc.Error -- | Parse a CommonMark formatted string into a 'Pandoc' structure. -readCommonMark :: ReaderOptions -> String -> Pandoc -readCommonMark opts = nodeToPandoc . commonmarkToNode opts' . pack +readCommonMark :: ReaderOptions -> String -> Either PandocError Pandoc +readCommonMark opts = Right . nodeToPandoc . commonmarkToNode opts' . pack where opts' = if readerSmart opts then [optNormalize, optSmart] else [optNormalize] diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 663960a87..f82158ab4 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -15,6 +15,9 @@ import Control.Applicative ((<$>)) import Data.List (intersperse) import Data.Maybe (fromMaybe) import Text.TeXMath (readMathML, writeTeX) +import Text.Pandoc.Error (PandocError) +import Text.Pandoc.Compat.Except +import Data.Default {- @@ -497,7 +500,7 @@ List of all DocBook tags, with [x] indicating implemented, [x] ?asciidoc-br? - line break from asciidoc docbook output -} -type DB = State DBState +type DB = ExceptT PandocError (State DBState) data DBState = DBState{ dbSectionLevel :: Int , dbQuoteType :: QuoteType @@ -507,16 +510,18 @@ data DBState = DBState{ dbSectionLevel :: Int , dbFigureTitle :: Inlines } deriving Show -readDocBook :: ReaderOptions -> String -> Pandoc -readDocBook _ inp = Pandoc (dbMeta st') (toList $ mconcat bs) - where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp') - DBState{ dbSectionLevel = 0 - , dbQuoteType = DoubleQuote - , dbMeta = mempty - , dbAcceptsMeta = False - , dbBook = False - , dbFigureTitle = mempty - } +instance Default DBState where + def = DBState{ dbSectionLevel = 0 + , dbQuoteType = DoubleQuote + , dbMeta = mempty + , dbAcceptsMeta = False + , dbBook = False + , dbFigureTitle = mempty } + + +readDocBook :: ReaderOptions -> String -> Either PandocError Pandoc +readDocBook _ inp = (\blocks -> Pandoc (dbMeta st') (toList . mconcat $ blocks)) <$> bs + where (bs , st') = flip runState def . runExceptT . mapM parseBlock . normalizeTree . parseXML $ inp' inp' = handleInstructions inp -- We treat <?asciidoc-br?> specially (issue #1236), converting it diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index d61cc8b1b..67a97ae85 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -96,14 +96,17 @@ import Control.Applicative ((<$>)) import Data.Sequence (ViewL(..), viewl) import qualified Data.Sequence as Seq (null) +import Text.Pandoc.Error +import Text.Pandoc.Compat.Except + readDocx :: ReaderOptions -> B.ByteString - -> (Pandoc, MediaBag) + -> Either PandocError (Pandoc, MediaBag) readDocx opts bytes = case archiveToDocx (toArchive bytes) of - Right docx -> (Pandoc meta blks, mediaBag) where - (meta, blks, mediaBag) = (docxToOutput opts docx) - Left _ -> error $ "couldn't parse docx file" + Right docx -> (\(meta, blks, mediaBag) -> (Pandoc meta blks, mediaBag)) + <$> (docxToOutput opts docx) + Left _ -> Left (ParseFailure "couldn't parse docx file") data DState = DState { docxAnchorMap :: M.Map String String , docxMediaBag :: MediaBag @@ -122,10 +125,10 @@ data DEnv = DEnv { docxOptions :: ReaderOptions instance Default DEnv where def = DEnv def False -type DocxContext = ReaderT DEnv (State DState) +type DocxContext = ExceptT PandocError (ReaderT DEnv (State DState)) -evalDocxContext :: DocxContext a -> DEnv -> DState -> a -evalDocxContext ctx env st = evalState (runReaderT ctx env) st +evalDocxContext :: DocxContext a -> DEnv -> DState -> Either PandocError a +evalDocxContext ctx env st = flip evalState st . flip runReaderT env . runExceptT $ ctx -- This is empty, but we put it in for future-proofing. spansToKeep :: [String] @@ -551,7 +554,7 @@ bodyToOutput (Body bps) = do blks', mediaBag) -docxToOutput :: ReaderOptions -> Docx -> (Meta, [Block], MediaBag) +docxToOutput :: ReaderOptions -> Docx -> Either PandocError (Meta, [Block], MediaBag) docxToOutput opts (Docx (Document _ body)) = let dEnv = def { docxOptions = opts} in evalDocxContext (bodyToOutput body) dEnv def diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index b061d8683..338540533 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -35,18 +35,20 @@ import Control.DeepSeq.Generics (deepseq, NFData) import Debug.Trace (trace) +import Text.Pandoc.Error + type Items = M.Map String (FilePath, MimeType) -readEPUB :: ReaderOptions -> BL.ByteString -> (Pandoc, MediaBag) +readEPUB :: ReaderOptions -> BL.ByteString -> Either PandocError (Pandoc, MediaBag) readEPUB opts bytes = runEPUB (archiveToEPUB opts $ toArchive bytes) -runEPUB :: Except String a -> a -runEPUB = either error id . runExcept +runEPUB :: Except PandocError a -> Either PandocError a +runEPUB = runExcept -- Note that internal reference are aggresively normalised so that all ids -- are of the form "filename#id" -- -archiveToEPUB :: (MonadError String m) => ReaderOptions -> Archive -> m (Pandoc, MediaBag) +archiveToEPUB :: (MonadError PandocError m) => ReaderOptions -> Archive -> m (Pandoc, MediaBag) archiveToEPUB os archive = do -- root is path to folder with manifest file in (root, content) <- getManifest archive @@ -64,19 +66,20 @@ archiveToEPUB os archive = do return $ (ast, mediaBag) where os' = os {readerParseRaw = True} - parseSpineElem :: MonadError String m => FilePath -> (FilePath, MimeType) -> m Pandoc + parseSpineElem :: MonadError PandocError m => FilePath -> (FilePath, MimeType) -> m Pandoc parseSpineElem (normalise -> r) (normalise -> path, mime) = do when (readerTrace os) (traceM path) doc <- mimeToReader mime r path let docSpan = B.doc $ B.para $ B.spanWith (takeFileName path, [], []) mempty return $ docSpan <> doc - mimeToReader :: MonadError String m => MimeType -> FilePath -> FilePath -> m Pandoc + mimeToReader :: MonadError PandocError m => MimeType -> FilePath -> FilePath -> m Pandoc mimeToReader "application/xhtml+xml" (normalise -> root) (normalise -> path) = do fname <- findEntryByPathE (root </> path) archive - return $ fixInternalReferences path . + html <- either throwError return . readHtml os' . UTF8.toStringLazy $ fromEntry fname + return $ fixInternalReferences path html mimeToReader s _ path | s `elem` imageMimes = return $ imageToPandoc path | otherwise = return $ mempty @@ -114,7 +117,7 @@ imageMimes = ["image/gif", "image/jpeg", "image/png"] type CoverImage = FilePath -parseManifest :: (MonadError String m) => Element -> m (Maybe CoverImage, Items) +parseManifest :: (MonadError PandocError m) => Element -> m (Maybe CoverImage, Items) parseManifest content = do manifest <- findElementE (dfName "manifest") content let items = findChildren (dfName "item") manifest @@ -130,7 +133,7 @@ parseManifest content = do mime <- findAttrE (emptyName "media-type") e return (uid, (href, mime)) -parseSpine :: MonadError String m => Items -> Element -> m [(FilePath, MimeType)] +parseSpine :: MonadError PandocError m => Items -> Element -> m [(FilePath, MimeType)] parseSpine is e = do spine <- findElementE (dfName "spine") e let itemRefs = findChildren (dfName "itemref") spine @@ -141,7 +144,7 @@ parseSpine is e = do guard linear findAttr (emptyName "idref") ref -parseMeta :: MonadError String m => Element -> m Meta +parseMeta :: MonadError PandocError m => Element -> m Meta parseMeta content = do meta <- findElementE (dfName "metadata") content let dcspace (QName _ (Just "http://purl.org/dc/elements/1.1/") (Just "dc")) = True @@ -159,7 +162,7 @@ renameMeta :: String -> String renameMeta "creator" = "author" renameMeta s = s -getManifest :: MonadError String m => Archive -> m (String, Element) +getManifest :: MonadError PandocError m => Archive -> m (String, Element) getManifest archive = do metaEntry <- findEntryByPathE ("META-INF" </> "container.xml") archive docElem <- (parseXMLDocE . UTF8.toStringLazy . fromEntry) metaEntry @@ -266,18 +269,18 @@ emptyName s = QName s Nothing Nothing -- Convert Maybe interface to Either -findAttrE :: MonadError String m => QName -> Element -> m String +findAttrE :: MonadError PandocError m => QName -> Element -> m String findAttrE q e = mkE "findAttr" $ findAttr q e -findEntryByPathE :: MonadError String m => FilePath -> Archive -> m Entry +findEntryByPathE :: MonadError PandocError m => FilePath -> Archive -> m Entry findEntryByPathE (normalise -> path) a = mkE ("No entry on path: " ++ path) $ findEntryByPath path a -parseXMLDocE :: MonadError String m => String -> m Element +parseXMLDocE :: MonadError PandocError m => String -> m Element parseXMLDocE doc = mkE "Unable to parse XML doc" $ parseXMLDoc doc -findElementE :: MonadError String m => QName -> Element -> m Element +findElementE :: MonadError PandocError m => QName -> Element -> m Element findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x -mkE :: MonadError String m => String -> Maybe a -> m a -mkE s = maybe (throwError s) return +mkE :: MonadError PandocError m => String -> Maybe a -> m a +mkE s = maybe (throwError . ParseFailure $ s) return diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 02ff07e73..59f71589e 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -1,4 +1,5 @@ -{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses #-} +{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses, +ViewPatterns#-} {- Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> @@ -43,7 +44,7 @@ import Text.Pandoc.Definition import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..)) import Text.Pandoc.Shared ( extractSpaces, renderTags' - , escapeURI, safeRead ) + , escapeURI, safeRead, mapLeft ) import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace) , Extension (Ext_epub_html_exts, Ext_native_divs, Ext_native_spans)) @@ -62,15 +63,18 @@ import Text.TeXMath (readMathML, writeTeX) import Data.Default (Default (..), def) import Control.Monad.Reader (Reader,ask, asks, local, runReader) +import Text.Pandoc.Error + +import Text.Parsec.Error + -- | Convert HTML-formatted string to 'Pandoc' document. readHtml :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assumes @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readHtml opts inp = - case flip runReader def $ runParserT parseDoc (HTMLState def{ stateOptions = opts } []) "source" tags of - Left err' -> error $ "\nError at " ++ show err' - Right result -> result + mapLeft (ParseFailure . getError) . flip runReader def $ + runParserT parseDoc (HTMLState def{ stateOptions = opts } []) "source" tags where tags = stripPrefixes . canonicalizeTags $ parseTagsOptions parseOptions{ optTagPosition = True } inp parseDoc = do @@ -78,6 +82,9 @@ readHtml opts inp = meta <- stateMeta . parserState <$> getState bs' <- replaceNotes (B.toList blocks) return $ Pandoc meta bs' + getError (errorMessages -> ms) = case ms of + [] -> "" + (m:_) -> messageString m replaceNotes :: [Block] -> TagParser [Block] replaceNotes = walkM replaceNotes' diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index c03382c17..aa2534afc 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -26,15 +26,17 @@ import Documentation.Haddock.Parser import Documentation.Haddock.Types import Debug.Trace (trace) +import Text.Pandoc.Error + -- | Parse Haddock markup and return a 'Pandoc' document. readHaddock :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse - -> Pandoc + -> Either PandocError Pandoc readHaddock opts = #if MIN_VERSION_haddock_library(1,2,0) - B.doc . docHToBlocks . trace' . _doc . parseParas + Right . B.doc . docHToBlocks . trace' . _doc . parseParas #else - B.doc . docHToBlocks . trace' . parseParas + Right . B.doc . docHToBlocks . trace' . parseParas #endif where trace' x = if readerTrace opts then trace (show x) x diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 31ac37bf1..a517f9566 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -57,11 +57,12 @@ import qualified Data.Map as M import qualified Control.Exception as E import System.FilePath (takeExtension, addExtension) import Text.Pandoc.Highlighting (fromListingsLanguage) +import Text.Pandoc.Error -- | Parse LaTeX from string and return 'Pandoc' document. readLaTeX :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assumes @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readLaTeX opts = readWith parseLaTeX def{ stateOptions = opts } parseLaTeX :: LP Pandoc @@ -853,12 +854,8 @@ rawEnv name = do type IncludeParser = ParserT [Char] [String] IO String -- | Replace "include" commands with file contents. -handleIncludes :: String -> IO String -handleIncludes s = do - res <- runParserT includeParser' [] "input" s - case res of - Right s' -> return s' - Left e -> error $ show e +handleIncludes :: String -> IO (Either PandocError String) +handleIncludes s = mapLeft (ParsecError s) <$> runParserT includeParser' [] "input" s includeParser' :: IncludeParser includeParser' = diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 8892f60fb..369c889d1 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1,4 +1,5 @@ {-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7 +{-# LANGUAGE ScopedTypeVariables #-} {- Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> @@ -64,13 +65,14 @@ import Text.HTML.TagSoup.Match (tagOpen) import qualified Data.Set as Set import Text.Printf (printf) import Debug.Trace (trace) +import Text.Pandoc.Error type MarkdownParser a = ParserT [Char] ParserState (Reader ParserState) a -- | Read markdown from an input string and return a Pandoc document. readMarkdown :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readMarkdown opts s = runMarkdown opts s parseMarkdown @@ -78,16 +80,17 @@ readMarkdown opts s = -- and a list of warnings. readMarkdownWithWarnings :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> (Pandoc, [String]) + -> Either PandocError (Pandoc, [String]) readMarkdownWithWarnings opts s = runMarkdown opts s (returnWarnings parseMarkdown) -runMarkdown :: ReaderOptions -> String -> MarkdownParser a -> a -runMarkdown opts inp p = fst res +runMarkdown :: forall a . ReaderOptions -> String -> MarkdownParser a -> Either PandocError a +runMarkdown opts inp p = fst <$> res where imd = readWithM (returnState p) def{ stateOptions = opts } (inp ++ "\n\n") + res :: Either PandocError (a, ParserState) res = runReader imd s s :: ParserState - s = snd $ runReader imd s + s = either def snd res -- -- Constants and data structure definitions @@ -246,8 +249,9 @@ yamlMetaBlock = try $ do H.foldrWithKey (\k v m -> if ignorable k then m - else B.setMeta (T.unpack k) - (yamlToMeta opts v) m) + else case yamlToMeta opts v of + Left _ -> m + Right v' -> B.setMeta (T.unpack k) v' m) nullMeta hashmap Right Yaml.Null -> return nullMeta Right _ -> do @@ -279,38 +283,42 @@ yamlMetaBlock = try $ do ignorable :: Text -> Bool ignorable t = T.pack "_" `T.isSuffixOf` t -toMetaValue :: ReaderOptions -> Text -> MetaValue -toMetaValue opts x = - case readMarkdown opts' (T.unpack x) of - Pandoc _ [Plain xs] -> MetaInlines xs - Pandoc _ [Para xs] +toMetaValue :: ReaderOptions -> Text -> Either PandocError MetaValue +toMetaValue opts x = toMeta <$> readMarkdown opts' (T.unpack x) + where + toMeta p = + case p of + Pandoc _ [Plain xs] -> MetaInlines xs + Pandoc _ [Para xs] | endsWithNewline x -> MetaBlocks [Para xs] | otherwise -> MetaInlines xs - Pandoc _ bs -> MetaBlocks bs - where endsWithNewline t = T.pack "\n" `T.isSuffixOf` t - opts' = opts{readerExtensions=readerExtensions opts `Set.difference` meta_exts} - meta_exts = Set.fromList [ Ext_pandoc_title_block - , Ext_mmd_title_block - , Ext_yaml_metadata_block - ] - -yamlToMeta :: ReaderOptions -> Yaml.Value -> MetaValue + Pandoc _ bs -> MetaBlocks bs + endsWithNewline t = T.pack "\n" `T.isSuffixOf` t + opts' = opts{readerExtensions=readerExtensions opts `Set.difference` meta_exts} + meta_exts = Set.fromList [ Ext_pandoc_title_block + , Ext_mmd_title_block + , Ext_yaml_metadata_block + ] + +yamlToMeta :: ReaderOptions -> Yaml.Value -> Either PandocError MetaValue yamlToMeta opts (Yaml.String t) = toMetaValue opts t yamlToMeta _ (Yaml.Number n) -- avoid decimal points for numbers that don't need them: - | base10Exponent n >= 0 = MetaString $ show + | base10Exponent n >= 0 = return $ MetaString $ show $ coefficient n * (10 ^ base10Exponent n) - | otherwise = MetaString $ show n -yamlToMeta _ (Yaml.Bool b) = MetaBool b -yamlToMeta opts (Yaml.Array xs) = B.toMetaValue $ map (yamlToMeta opts) - $ V.toList xs -yamlToMeta opts (Yaml.Object o) = MetaMap $ H.foldrWithKey (\k v m -> + | otherwise = return $ MetaString $ show n +yamlToMeta _ (Yaml.Bool b) = return $ MetaBool b +yamlToMeta opts (Yaml.Array xs) = B.toMetaValue <$> mapM (yamlToMeta opts) + (V.toList xs) +yamlToMeta opts (Yaml.Object o) = MetaMap <$> H.foldrWithKey (\k v m -> if ignorable k then m - else M.insert (T.unpack k) - (yamlToMeta opts v) m) - M.empty o -yamlToMeta _ _ = MetaString "" + else (do + v' <- yamlToMeta opts v + m' <- m + return (M.insert (T.unpack k) v' m'))) + (return M.empty) o +yamlToMeta _ _ = return $ MetaString "" stopLine :: MarkdownParser () stopLine = try $ (string "---" <|> string "...") >> blankline >> return () @@ -466,6 +474,7 @@ block = do res <- choice [ mempty <$ blanklines , codeBlockFenced , yamlMetaBlock + , guardEnabled Ext_latex_macros *> macro -- note: bulletList needs to be before header because of -- the possibility of empty list items: - , bulletList @@ -475,7 +484,6 @@ block = do , htmlBlock , table , codeBlockIndented - , guardEnabled Ext_latex_macros *> macro , rawTeXBlock , lineBlock , blockQuote diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index d1ba35ba0..939d10fb2 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -58,21 +58,21 @@ import Data.Maybe (fromMaybe) import Text.Printf (printf) import Debug.Trace (trace) +import Text.Pandoc.Error + -- | Read mediawiki from an input string and return a Pandoc document. readMediaWiki :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readMediaWiki opts s = - case runParser parseMediaWiki MWState{ mwOptions = opts + readWith parseMediaWiki MWState{ mwOptions = opts , mwMaxNestingLevel = 4 , mwNextLinkNumber = 1 , mwCategoryLinks = [] , mwHeaderMap = M.empty , mwIdentifierList = [] } - "source" (s ++ "\n") of - Left err' -> error $ "\nError:\n" ++ show err' - Right result -> result + (s ++ "\n") data MWState = MWState { mwOptions :: ReaderOptions , mwMaxNestingLevel :: Int diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs index f4dfa62c1..fc6b3362a 100644 --- a/src/Text/Pandoc/Readers/Native.hs +++ b/src/Text/Pandoc/Readers/Native.hs @@ -3,7 +3,7 @@ Copyright (C) 2011-2014 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or +the Free Software Foundation; Either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, @@ -33,6 +33,9 @@ module Text.Pandoc.Readers.Native ( readNative ) where import Text.Pandoc.Definition import Text.Pandoc.Shared (safeRead) +import Text.Pandoc.Error +import Control.Applicative + -- | Read native formatted text and return a Pandoc document. -- The input may be a full pandoc document, a block list, a block, -- an inline list, or an inline. Thus, for example, @@ -44,33 +47,18 @@ import Text.Pandoc.Shared (safeRead) -- > Pandoc nullMeta [Plain [Str "hi"]] -- readNative :: String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc -readNative s = - case safeRead s of - Just d -> d - Nothing -> Pandoc nullMeta $ readBlocks s + -> Either PandocError Pandoc +readNative s = maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) -readBlocks :: String -> [Block] -readBlocks s = - case safeRead s of - Just d -> d - Nothing -> [readBlock s] +readBlocks :: String -> Either PandocError [Block] +readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s) -readBlock :: String -> Block -readBlock s = - case safeRead s of - Just d -> d - Nothing -> Plain $ readInlines s +readBlock :: String -> Either PandocError Block +readBlock s = maybe (Plain <$> readInlines s) Right (safeRead s) -readInlines :: String -> [Inline] -readInlines s = - case safeRead s of - Just d -> d - Nothing -> [readInline s] +readInlines :: String -> Either PandocError [Inline] +readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead s) -readInline :: String -> Inline -readInline s = - case safeRead s of - Just d -> d - Nothing -> error "Cannot parse document" +readInline :: String -> Either PandocError Inline +readInline s = maybe (Left . ParseFailure $ "Could not read: " ++ s) Right (safeRead s) diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index 35d01e877..19ddba36b 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE FlexibleContexts #-} module Text.Pandoc.Readers.OPML ( readOPML ) where import Data.Char (toUpper) import Text.Pandoc.Options @@ -11,8 +12,11 @@ import Data.Generics import Data.Monoid import Control.Monad.State import Control.Applicative ((<$>), (<$)) +import Data.Default +import Text.Pandoc.Compat.Except +import Text.Pandoc.Error -type OPML = State OPMLState +type OPML = ExceptT PandocError (State OPMLState) data OPMLState = OPMLState{ opmlSectionLevel :: Int @@ -21,17 +25,19 @@ data OPMLState = OPMLState{ , opmlDocDate :: Inlines } deriving Show -readOPML :: ReaderOptions -> String -> Pandoc +instance Default OPMLState where + def = OPMLState{ opmlSectionLevel = 0 + , opmlDocTitle = mempty + , opmlDocAuthors = [] + , opmlDocDate = mempty + } + +readOPML :: ReaderOptions -> String -> Either PandocError Pandoc readOPML _ inp = setTitle (opmlDocTitle st') - $ setAuthors (opmlDocAuthors st') - $ setDate (opmlDocDate st') - $ doc $ mconcat bs - where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp) - OPMLState{ opmlSectionLevel = 0 - , opmlDocTitle = mempty - , opmlDocAuthors = [] - , opmlDocDate = mempty - } + . setAuthors (opmlDocAuthors st') + . setDate (opmlDocDate st') + . doc . mconcat <$> bs + where (bs, st') = flip runState def . runExceptT $ (mapM parseBlock $ normalizeTree $ parseXML inp) -- normalize input, consolidating adjacent Text and CRef elements normalizeTree :: [Content] -> [Content] @@ -58,14 +64,16 @@ attrValue attr elt = Just z -> z Nothing -> "" -asHtml :: String -> Inlines -asHtml s = case readHtml def s of - Pandoc _ [Plain ils] -> fromList ils - _ -> mempty +exceptT :: Either PandocError a -> OPML a +exceptT = either throwError return + +asHtml :: String -> OPML Inlines +asHtml s = (\(Pandoc _ bs) -> case bs of + [Plain ils] -> fromList ils + _ -> mempty) <$> exceptT (readHtml def s) -asMarkdown :: String -> Blocks -asMarkdown s = fromList bs - where Pandoc _ bs = readMarkdown def s +asMarkdown :: String -> OPML Blocks +asMarkdown s = (\(Pandoc _ bs) -> fromList bs) <$> exceptT (readMarkdown def s) getBlocks :: Element -> OPML Blocks getBlocks e = mconcat <$> (mapM parseBlock $ elContent e) @@ -82,8 +90,8 @@ parseBlock (Elem e) = "outline" -> gets opmlSectionLevel >>= sect . (+1) "?xml" -> return mempty _ -> getBlocks e - where sect n = do let headerText = asHtml $ attrValue "text" e - let noteBlocks = asMarkdown $ attrValue "_note" e + where sect n = do headerText <- asHtml $ attrValue "text" e + noteBlocks <- asMarkdown $ attrValue "_note" e modify $ \st -> st{ opmlSectionLevel = n } bs <- getBlocks e modify $ \st -> st{ opmlSectionLevel = n - 1 } diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 457db200b..1dfbdd700 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -60,10 +60,12 @@ import Data.Maybe (fromMaybe, isJust) import Data.Monoid (mconcat, mempty, mappend) import Network.HTTP (urlEncode) +import Text.Pandoc.Error + -- | Parse org-mode string and return a Pandoc document. readOrg :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readOrg opts s = runOrg opts s parseOrg data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext @@ -71,13 +73,13 @@ data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal) -runOrg :: ReaderOptions -> String -> OrgParser a -> a -runOrg opts inp p = fst res +runOrg :: ReaderOptions -> String -> OrgParser a -> Either PandocError a +runOrg opts inp p = fst <$> res where imd = readWithM (returnState p) def{ orgStateOptions = opts } (inp ++ "\n\n") res = runReader imd def { finalState = s } s :: OrgParserState - s = snd $ runReader imd (def { finalState = s }) + s = either def snd res parseOrg :: OrgParser Pandoc parseOrg = do @@ -1259,17 +1261,15 @@ math = B.math <$> choice [ math1CharBetween '$' displayMath :: OrgParser Inlines displayMath = B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" - , rawMathBetween "$$" "$$" ] - -updatePositions :: Char - -> OrgParser (Char) -updatePositions c = do - when (c `elem` emphasisPreChars) updateLastPreCharPos - when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos - return c + , rawMathBetween "$$" "$$" + ] symbol :: OrgParser Inlines symbol = B.str . (: "") <$> (oneOf specialChars >>= updatePositions) + where updatePositions c = do + when (c `elem` emphasisPreChars) updateLastPreCharPos + when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos + return c emphasisBetween :: Char -> OrgParser Inlines diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 4ae9d52ae..a8112bc81 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -51,13 +51,15 @@ import Data.Monoid (mconcat, mempty) import Data.Sequence (viewr, ViewR(..)) import Data.Char (toLower, isHexDigit, isSpace) +import Text.Pandoc.Error + -- | Parse reStructuredText string and return Pandoc document. readRST :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readRST opts s = (readWith parseRST) def{ stateOptions = opts } (s ++ "\n\n") -readRSTWithWarnings :: ReaderOptions -> String -> (Pandoc, [String]) +readRSTWithWarnings :: ReaderOptions -> String -> Either PandocError (Pandoc, [String]) readRSTWithWarnings opts s = (readWith (returnWarnings parseRST)) def{ stateOptions = opts } (s ++ "\n\n") type RSTParser = Parser [Char] ParserState diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs index 9f5738478..07b414431 100644 --- a/src/Text/Pandoc/Readers/TWiki.hs +++ b/src/Text/Pandoc/Readers/TWiki.hs @@ -48,17 +48,18 @@ import Data.Maybe (fromMaybe) import Text.HTML.TagSoup import Data.Char (isAlphaNum) import qualified Data.Foldable as F +import Text.Pandoc.Error -- | Read twiki from an input string and return a Pandoc document. readTWiki :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readTWiki opts s = (readWith parseTWiki) def{ stateOptions = opts } (s ++ "\n\n") readTWikiWithWarnings :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> (Pandoc, [String]) + -> Either PandocError (Pandoc, [String]) readTWikiWithWarnings opts s = (readWith parseTWikiWithWarnings) def{ stateOptions = opts } (s ++ "\n\n") where parseTWikiWithWarnings = do diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 63ab80eb9..4565b26a1 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -68,11 +68,12 @@ import Text.Printf import Control.Applicative ((<$>), (*>), (<*), (<$)) import Data.Monoid import Debug.Trace (trace) +import Text.Pandoc.Error -- | Parse a Textile text and return a Pandoc document. readTextile :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc + -> Either PandocError Pandoc readTextile opts s = (readWith parseTextile) def{ stateOptions = opts } (s ++ "\n\n") diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index 834d18c5c..304d6d4c5 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -48,6 +48,7 @@ import Data.Monoid (Monoid, mconcat, mempty, mappend) import Control.Monad (void, guard, when) import Data.Default import Control.Monad.Reader (Reader, runReader, asks) +import Text.Pandoc.Error import Data.Time.LocalTime (getZonedTime) import Text.Pandoc.Compat.Directory(getModificationTime) @@ -83,12 +84,12 @@ getT2TMeta inps out = do return $ T2TMeta curDate curMtime (intercalate ", " inps) out -- | Read Txt2Tags from an input string returning a Pandoc document -readTxt2Tags :: T2TMeta -> ReaderOptions -> String -> Pandoc +readTxt2Tags :: T2TMeta -> ReaderOptions -> String -> Either PandocError Pandoc readTxt2Tags t opts s = flip runReader t $ readWithM parseT2T (def {stateOptions = opts}) (s ++ "\n\n") -- | Read Txt2Tags (ignoring all macros) from an input string returning -- a Pandoc document -readTxt2TagsNoMacros :: ReaderOptions -> String -> Pandoc +readTxt2TagsNoMacros :: ReaderOptions -> String -> Either PandocError Pandoc readTxt2TagsNoMacros = readTxt2Tags def parseT2T :: T2T Pandoc diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index bc960fd38..e0460c66e 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -85,6 +85,8 @@ module Text.Pandoc.Shared ( -- * Error handling err, warn, + mapLeft, + hush, -- * Safe read safeRead, -- * Temp directory @@ -113,7 +115,7 @@ import System.FilePath ( (</>), takeExtension, dropExtension) import Data.Generics (Typeable, Data) import qualified Control.Monad.State as S import qualified Control.Exception as E -import Control.Monad (msum, unless) +import Control.Monad (msum, unless, MonadPlus(..)) import Text.Pandoc.Pretty (charWidth) import Text.Pandoc.Compat.Locale (defaultTimeLocale) import Data.Time @@ -855,6 +857,14 @@ warn msg = do name <- getProgName UTF8.hPutStrLn stderr $ name ++ ": " ++ msg +mapLeft :: (a -> b) -> Either a c -> Either b c +mapLeft f (Left x) = Left (f x) +mapLeft _ (Right x) = Right x + +hush :: Either a b -> Maybe b +hush (Left _) = Nothing +hush (Right x) = Just x + -- | Remove intermediate "." and ".." directories from a path. -- -- > collapseFilePath "./foo" == "foo" @@ -883,11 +893,11 @@ collapseFilePath = joinPath . reverse . foldl go [] . splitDirectories -- Safe read -- -safeRead :: (Monad m, Read a) => String -> m a +safeRead :: (MonadPlus m, Read a) => String -> m a safeRead s = case reads s of (d,x):_ | all isSpace x -> return d - _ -> fail $ "Could not read `" ++ s ++ "'" + _ -> mzero -- -- Temp directory diff --git a/tests/Tests/Old.hs b/tests/Tests/Old.hs index 5bdf325b1..047ad0481 100644 --- a/tests/Tests/Old.hs +++ b/tests/Tests/Old.hs @@ -18,6 +18,7 @@ import Prelude hiding ( readFile ) import qualified Data.ByteString.Lazy as B import Text.Pandoc.UTF8 (toStringLazy) import Text.Printf +import Text.Pandoc.Error readFileUTF8 :: FilePath -> IO String readFileUTF8 f = B.readFile f >>= return . toStringLazy @@ -182,7 +183,7 @@ lhsReaderTest :: String -> Test lhsReaderTest format = testWithNormalize normalizer "lhs" ["-r", format, "-w", "native"] ("lhs-test" <.> format) norm - where normalizer = writeNative def . normalize . readNative + where normalizer = writeNative def . normalize . handleError . readNative norm = if format == "markdown+lhs" then "lhs-test-markdown.native" else "lhs-test.native" diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index 06e8a3a9c..47292bc99 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -13,6 +13,7 @@ import Text.Pandoc.Writers.Native (writeNative) import qualified Data.Map as M import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory) import Codec.Archive.Zip +import Text.Pandoc.Error -- We define a wrapper around pandoc that doesn't normalize in the -- tests. Since we do our own normalization, we want to make sure @@ -41,8 +42,8 @@ compareOutput :: ReaderOptions compareOutput opts docxFile nativeFile = do df <- B.readFile docxFile nf <- Prelude.readFile nativeFile - let (p, _) = readDocx opts df - return $ (noNorm p, noNorm (readNative nf)) + let (p, _) = handleError $ readDocx opts df + return $ (noNorm p, noNorm (handleError $ readNative nf)) testCompareWithOptsIO :: ReaderOptions -> String -> FilePath -> FilePath -> IO Test testCompareWithOptsIO opts name docxFile nativeFile = do @@ -79,7 +80,7 @@ compareMediaPathIO mediaPath mediaBag docxPath = do compareMediaBagIO :: FilePath -> IO Bool compareMediaBagIO docxFile = do df <- B.readFile docxFile - let (_, mb) = readDocx def df + let (_, mb) = handleError $ readDocx def df bools <- mapM (\(fp, _, _) -> compareMediaPathIO fp mb docxFile) (mediaDirectory mb) diff --git a/tests/Tests/Readers/EPUB.hs b/tests/Tests/Readers/EPUB.hs index 0d19a8400..bfdaa45b7 100644 --- a/tests/Tests/Readers/EPUB.hs +++ b/tests/Tests/Readers/EPUB.hs @@ -9,9 +9,10 @@ import Text.Pandoc.Readers.EPUB import Text.Pandoc.MediaBag (MediaBag, mediaDirectory) import Control.Applicative import System.FilePath (joinPath) +import Text.Pandoc.Error getMediaBag :: FilePath -> IO MediaBag -getMediaBag fp = snd . readEPUB def <$> BL.readFile fp +getMediaBag fp = snd . handleError . readEPUB def <$> BL.readFile fp testMediaBag :: FilePath -> [(String, String, Int)] -> IO () testMediaBag fp bag = do diff --git a/tests/Tests/Readers/LaTeX.hs b/tests/Tests/Readers/LaTeX.hs index 47916b0c0..38363af59 100644 --- a/tests/Tests/Readers/LaTeX.hs +++ b/tests/Tests/Readers/LaTeX.hs @@ -8,9 +8,10 @@ import Tests.Arbitrary() import Text.Pandoc.Builder import Text.Pandoc import Data.Monoid (mempty) +import Text.Pandoc.Error latex :: String -> Pandoc -latex = readLaTeX def +latex = handleError . readLaTeX def infix 4 =: (=:) :: ToString c diff --git a/tests/Tests/Readers/Markdown.hs b/tests/Tests/Readers/Markdown.hs index fdb1a7417..03884a8e5 100644 --- a/tests/Tests/Readers/Markdown.hs +++ b/tests/Tests/Readers/Markdown.hs @@ -9,19 +9,20 @@ import Text.Pandoc.Builder import qualified Data.Set as Set -- import Text.Pandoc.Shared ( normalize ) import Text.Pandoc +import Text.Pandoc.Error markdown :: String -> Pandoc -markdown = readMarkdown def +markdown = handleError . readMarkdown def markdownSmart :: String -> Pandoc -markdownSmart = readMarkdown def { readerSmart = True } +markdownSmart = handleError . readMarkdown def { readerSmart = True } markdownCDL :: String -> Pandoc -markdownCDL = readMarkdown def { readerExtensions = Set.insert +markdownCDL = handleError . readMarkdown def { readerExtensions = Set.insert Ext_compact_definition_lists $ readerExtensions def } markdownGH :: String -> Pandoc -markdownGH = readMarkdown def { readerExtensions = githubMarkdownExtensions } +markdownGH = handleError . readMarkdown def { readerExtensions = githubMarkdownExtensions } infix 4 =: (=:) :: ToString c @@ -30,7 +31,7 @@ infix 4 =: testBareLink :: (String, Inlines) -> Test testBareLink (inp, ils) = - test (readMarkdown def{ readerExtensions = + test (handleError . readMarkdown def{ readerExtensions = Set.fromList [Ext_autolink_bare_uris, Ext_raw_html] }) inp (inp, doc $ para ils) @@ -220,7 +221,7 @@ tests = [ testGroup "inline code" =?> para (note (para "See [^1]")) ] , testGroup "lhs" - [ test (readMarkdown def{ readerExtensions = Set.insert + [ test (handleError . readMarkdown def{ readerExtensions = Set.insert Ext_literate_haskell $ readerExtensions def }) "inverse bird tracks and html" $ "> a\n\n< b\n\n<div>\n" diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index c373d52cc..f555447c7 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -8,12 +8,13 @@ import Text.Pandoc.Builder import Text.Pandoc import Data.List (intersperse) import Data.Monoid (mempty, mappend, mconcat) +import Text.Pandoc.Error org :: String -> Pandoc -org = readOrg def +org = handleError . readOrg def orgSmart :: String -> Pandoc -orgSmart = readOrg def { readerSmart = True } +orgSmart = handleError . readOrg def { readerSmart = True } infix 4 =: (=:) :: ToString c diff --git a/tests/Tests/Readers/RST.hs b/tests/Tests/Readers/RST.hs index 1aaf4897f..5eabec89a 100644 --- a/tests/Tests/Readers/RST.hs +++ b/tests/Tests/Readers/RST.hs @@ -7,10 +7,11 @@ import Tests.Helpers import Tests.Arbitrary() import Text.Pandoc.Builder import Text.Pandoc +import Text.Pandoc.Error import Data.Monoid (mempty) rst :: String -> Pandoc -rst = readRST def{ readerStandalone = True } +rst = handleError . readRST def{ readerStandalone = True } infix 4 =: (=:) :: ToString c diff --git a/tests/Tests/Readers/Txt2Tags.hs b/tests/Tests/Readers/Txt2Tags.hs index fd7c767e0..938a2b455 100644 --- a/tests/Tests/Readers/Txt2Tags.hs +++ b/tests/Tests/Readers/Txt2Tags.hs @@ -7,12 +7,13 @@ import Tests.Helpers import Tests.Arbitrary() import Text.Pandoc.Builder import Text.Pandoc +import Text.Pandoc.Error import Data.List (intersperse) import Data.Monoid (mempty, mconcat) import Text.Pandoc.Readers.Txt2Tags t2t :: String -> Pandoc -t2t s = readTxt2Tags (T2TMeta "date" "mtime" "in" "out") def s +t2t = handleError . readTxt2Tags (T2TMeta "date" "mtime" "in" "out") def infix 4 =: (=:) :: ToString c diff --git a/tests/Tests/Writers/Docx.hs b/tests/Tests/Writers/Docx.hs index 80ce0014d..068c5a935 100644 --- a/tests/Tests/Writers/Docx.hs +++ b/tests/Tests/Writers/Docx.hs @@ -7,6 +7,7 @@ import Tests.Helpers import Test.Framework import Text.Pandoc.Readers.Docx import Text.Pandoc.Writers.Docx +import Text.Pandoc.Error type Options = (WriterOptions, ReaderOptions) @@ -15,9 +16,9 @@ compareOutput :: Options -> IO (Pandoc, Pandoc) compareOutput opts nativeFile = do nf <- Prelude.readFile nativeFile - df <- writeDocx (fst opts) (readNative nf) - let (p, _) = readDocx (snd opts) df - return (p, readNative nf) + df <- writeDocx (fst opts) (handleError $ readNative nf) + let (p, _) = handleError $ readDocx (snd opts) df + return (p, handleError $ readNative nf) testCompareWithOptsIO :: Options -> String -> FilePath -> IO Test testCompareWithOptsIO opts name nativeFile = do |