aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <fiddlosopher@gmail.com>2012-09-26 09:04:21 -0700
committerJohn MacFarlane <fiddlosopher@gmail.com>2012-09-26 09:04:21 -0700
commitc80ffe9171391f1a15d0c4b069f4b994c1aef050 (patch)
tree07fb7df7b51c5b451e39448a40712ac74b4325f0 /src
parent885ef2072f68fd01137d2eb41d1258932d2134c5 (diff)
downloadpandoc-c80ffe9171391f1a15d0c4b069f4b994c1aef050.tar.gz
UTF8: Better error message for invalid UTF8.
Read bytestring and use Text's decodeUtf8 instead of using System.IO's hGetContents. This way you get a message saying "invalid UTF-8 stream" instead of "invalid byte sequence." You are also told which byte caused the problem.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/UTF8.hs10
1 files changed, 6 insertions, 4 deletions
diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
index 1f2b4b695..e134c97ae 100644
--- a/src/Text/Pandoc/UTF8.hs
+++ b/src/Text/Pandoc/UTF8.hs
@@ -52,7 +52,8 @@ import Codec.Binary.UTF8.String (encodeString, decodeString)
import System.IO hiding (readFile, writeFile, getContents,
putStr, putStrLn, hPutStr, hPutStrLn, hGetContents)
-import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn )
+import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn,
+ catch)
import qualified System.IO as IO
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Lazy as BL
@@ -86,9 +87,10 @@ hPutStrLn :: Handle -> String -> IO ()
hPutStrLn h s = hSetEncoding h utf8 >> IO.hPutStrLn h s
hGetContents :: Handle -> IO String
-hGetContents h = hSetEncoding h utf8_bom
- >> hSetNewlineMode h universalNewlineMode
- >> IO.hGetContents h
+hGetContents h = fmap (TL.unpack . TL.decodeUtf8) $ BL.hGetContents h
+-- hGetContents h = hSetEncoding h utf8_bom
+-- >> hSetNewlineMode h universalNewlineMode
+-- >> IO.hGetContents h
toString :: B.ByteString -> String
toString = T.unpack . T.decodeUtf8With lenientDecode