aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2018-11-29 15:59:33 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2018-11-29 15:59:33 -0800
commit9f8de4be4327809a69d01cfdbbc0ff2dc6e41cff (patch)
tree8699ef8cce83a1de3a9d3243167a3e6520cf9ef2
parent747f079bad8c2b2db3e279daf4649659bd50625e (diff)
downloadpandoc-9f8de4be4327809a69d01cfdbbc0ff2dc6e41cff.tar.gz
Improve error message for UTF-8 decoding errors.
Give the filename and the byte offset. Closes #4765.
-rw-r--r--src/Text/Pandoc/App.hs23
1 files changed, 17 insertions, 6 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index a14e4e017..6b320df12 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -54,6 +54,7 @@ import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TE
import qualified Data.Text.Encoding.Error as TE
+import qualified Data.Text.Encoding.Error as TSE
import qualified Data.YAML as YAML
import Network.URI (URI (..), parseURI)
import System.Directory (getAppUserDataDirectory)
@@ -344,15 +345,25 @@ applyTransforms :: Monad m => [Transform] -> Pandoc -> m Pandoc
applyTransforms transforms d = return $ foldr ($) d transforms
readSource :: FilePath -> PandocIO Text
-readSource "-" = liftIO (UTF8.toText <$> BS.getContents)
readSource src = case parseURI src of
Just u | uriScheme u `elem` ["http:","https:"] ->
readURI src
- | uriScheme u == "file:" ->
- liftIO $ UTF8.toText <$>
- BS.readFile (uriPathToPath $ uriPath u)
- _ -> liftIO $ UTF8.toText <$>
- BS.readFile src
+ | uriScheme u == "file:" -> liftIO $
+ readTextFile (uriPathToPath $ uriPath u)
+ _ -> liftIO $ readTextFile src
+ where readTextFile :: FilePath -> IO Text
+ readTextFile fp = do
+ bs <- if src == "-"
+ then BS.getContents
+ else BS.readFile fp
+ E.catch (return $! UTF8.toText bs)
+ (\e -> case e of
+ TSE.DecodeError _ (Just w) -> do
+ case BS.elemIndex w bs of
+ Just offset -> E.throwIO $
+ PandocUTF8DecodingError fp offset w
+ _ -> E.throwIO $ PandocUTF8DecodingError fp 0 w
+ _ -> E.throwIO $ PandocAppError (show e))
readURI :: FilePath -> PandocIO Text
readURI src = UTF8.toText . fst <$> openURL src