From c80ffe9171391f1a15d0c4b069f4b994c1aef050 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 26 Sep 2012 09:04:21 -0700 Subject: UTF8: Better error message for invalid UTF8. Read bytestring and use Text's decodeUtf8 instead of using System.IO's hGetContents. This way you get a message saying "invalid UTF-8 stream" instead of "invalid byte sequence." You are also told which byte caused the problem. --- src/Text/Pandoc/UTF8.hs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs index 1f2b4b695..e134c97ae 100644 --- a/src/Text/Pandoc/UTF8.hs +++ b/src/Text/Pandoc/UTF8.hs @@ -52,7 +52,8 @@ import Codec.Binary.UTF8.String (encodeString, decodeString) import System.IO hiding (readFile, writeFile, getContents, putStr, putStrLn, hPutStr, hPutStrLn, hGetContents) -import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn ) +import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn, + catch) import qualified System.IO as IO import qualified Data.ByteString.Char8 as B import qualified Data.ByteString.Lazy as BL @@ -86,9 +87,10 @@ hPutStrLn :: Handle -> String -> IO () hPutStrLn h s = hSetEncoding h utf8 >> IO.hPutStrLn h s hGetContents :: Handle -> IO String -hGetContents h = hSetEncoding h utf8_bom - >> hSetNewlineMode h universalNewlineMode - >> IO.hGetContents h +hGetContents h = fmap (TL.unpack . TL.decodeUtf8) $ BL.hGetContents h +-- hGetContents h = hSetEncoding h utf8_bom +-- >> hSetNewlineMode h universalNewlineMode +-- >> IO.hGetContents h toString :: B.ByteString -> String toString = T.unpack . T.decodeUtf8With lenientDecode -- cgit v1.2.3