From c7f4333f83b7441ad9a3a43c798b8f029670ffa3 Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Sun, 6 Jan 2013 16:35:41 -0800
Subject: UTF8 module:  Remove `\r` when reading.

This should prevent problems with extra CRs on windows.
---
 src/Text/Pandoc/UTF8.hs | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
index 46be536d0..582afb6dc 100644
--- a/src/Text/Pandoc/UTF8.hs
+++ b/src/Text/Pandoc/UTF8.hs
@@ -56,7 +56,6 @@ import qualified Data.Text.Encoding as T
 import qualified Data.Text as T
 import qualified Data.Text.Lazy as TL
 import qualified Data.Text.Lazy.Encoding as TL
-import Data.Text.Encoding.Error
 
 readFile :: FilePath -> IO String
 readFile f = do
@@ -82,19 +81,23 @@ hPutStrLn :: Handle -> String -> IO ()
 hPutStrLn h s = hSetEncoding h utf8 >> IO.hPutStrLn h s
 
 hGetContents :: Handle -> IO String
-hGetContents h = fmap (TL.unpack . TL.decodeUtf8) $ BL.hGetContents h
+hGetContents = fmap toStringLazy . BL.hGetContents
 -- hGetContents h = hSetEncoding h utf8_bom
 --                   >> hSetNewlineMode h universalNewlineMode
 --                   >> IO.hGetContents h
 
+-- | Convert UTF8-encoded ByteString to String, also
+-- removing '\r' characters.
 toString :: B.ByteString -> String
-toString = T.unpack . T.decodeUtf8With lenientDecode
+toString = filter (/='\r') . T.unpack . T.decodeUtf8
 
 fromString :: String -> B.ByteString
 fromString = T.encodeUtf8 . T.pack
 
+-- | Convert UTF8-encoded ByteString to String, also
+-- removing '\r' characters.
 toStringLazy :: BL.ByteString -> String
-toStringLazy = TL.unpack . TL.decodeUtf8With lenientDecode
+toStringLazy = filter (/='\r') . TL.unpack . TL.decodeUtf8
 
 fromStringLazy :: String -> BL.ByteString
 fromStringLazy = TL.encodeUtf8 . TL.pack
-- 
cgit v1.2.3