From c80ffe9171391f1a15d0c4b069f4b994c1aef050 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Wed, 26 Sep 2012 09:04:21 -0700
Subject: UTF8:  Better error message for invalid UTF8.

Read bytestring and use Text's decodeUtf8 instead of using System.IO's
hGetContents.  This way you get a message saying "invalid UTF-8 stream"
instead of "invalid byte sequence."  You are also told which byte caused
the problem.
---
 src/Text/Pandoc/UTF8.hs | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
index 1f2b4b695..e134c97ae 100644
--- a/src/Text/Pandoc/UTF8.hs
+++ b/src/Text/Pandoc/UTF8.hs
@@ -52,7 +52,8 @@ import Codec.Binary.UTF8.String (encodeString, decodeString)
 
 import System.IO hiding (readFile, writeFile, getContents,
                           putStr, putStrLn, hPutStr, hPutStrLn, hGetContents)
-import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn )
+import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn,
+                       catch)
 import qualified System.IO as IO
 import qualified Data.ByteString.Char8 as B
 import qualified Data.ByteString.Lazy as BL
@@ -86,9 +87,10 @@ hPutStrLn :: Handle -> String -> IO ()
 hPutStrLn h s = hSetEncoding h utf8 >> IO.hPutStrLn h s
 
 hGetContents :: Handle -> IO String
-hGetContents h = hSetEncoding h utf8_bom
-                  >> hSetNewlineMode h universalNewlineMode
-                  >> IO.hGetContents h
+hGetContents h = fmap (TL.unpack . TL.decodeUtf8) $ BL.hGetContents h
+-- hGetContents h = hSetEncoding h utf8_bom
+--                   >> hSetNewlineMode h universalNewlineMode
+--                   >> IO.hGetContents h
 
 toString :: B.ByteString -> String
 toString = T.unpack . T.decodeUtf8With lenientDecode
-- 
cgit v1.2.3