diff options
author | John MacFarlane <jgm@berkeley.edu> | 2018-11-29 15:59:33 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2018-11-29 15:59:33 -0800 |
commit | 9f8de4be4327809a69d01cfdbbc0ff2dc6e41cff (patch) | |
tree | 8699ef8cce83a1de3a9d3243167a3e6520cf9ef2 /src | |
parent | 747f079bad8c2b2db3e279daf4649659bd50625e (diff) | |
download | pandoc-9f8de4be4327809a69d01cfdbbc0ff2dc6e41cff.tar.gz |
Improve error message for UTF-8 decoding errors.
Give the filename and the byte offset.
Closes #4765.
Diffstat (limited to 'src')
-rw-r--r-- | src/Text/Pandoc/App.hs | 23 |
1 files changed, 17 insertions, 6 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index a14e4e017..6b320df12 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -54,6 +54,7 @@ import qualified Data.Text as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Encoding as TE import qualified Data.Text.Encoding.Error as TE +import qualified Data.Text.Encoding.Error as TSE import qualified Data.YAML as YAML import Network.URI (URI (..), parseURI) import System.Directory (getAppUserDataDirectory) @@ -344,15 +345,25 @@ applyTransforms :: Monad m => [Transform] -> Pandoc -> m Pandoc applyTransforms transforms d = return $ foldr ($) d transforms readSource :: FilePath -> PandocIO Text -readSource "-" = liftIO (UTF8.toText <$> BS.getContents) readSource src = case parseURI src of Just u | uriScheme u `elem` ["http:","https:"] -> readURI src - | uriScheme u == "file:" -> - liftIO $ UTF8.toText <$> - BS.readFile (uriPathToPath $ uriPath u) - _ -> liftIO $ UTF8.toText <$> - BS.readFile src + | uriScheme u == "file:" -> liftIO $ + readTextFile (uriPathToPath $ uriPath u) + _ -> liftIO $ readTextFile src + where readTextFile :: FilePath -> IO Text + readTextFile fp = do + bs <- if src == "-" + then BS.getContents + else BS.readFile fp + E.catch (return $! UTF8.toText bs) + (\e -> case e of + TSE.DecodeError _ (Just w) -> do + case BS.elemIndex w bs of + Just offset -> E.throwIO $ + PandocUTF8DecodingError fp offset w + _ -> E.throwIO $ PandocUTF8DecodingError fp 0 w + _ -> E.throwIO $ PandocAppError (show e)) readURI :: FilePath -> PandocIO Text readURI src = UTF8.toText . fst <$> openURL src |