From d19a347fd5e93802e80e5bd8e13f29de74a53f3c Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 5 May 2015 12:41:35 -0700
Subject: UTF8: Better handling of bare CRs in input files.

Previously we just stripped them out; now we convert
other line ending styles to LF line endings.

Closes #2132.
---
 src/Text/Pandoc/UTF8.hs | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
index c1bf84995..de3314a0d 100644
--- a/src/Text/Pandoc/UTF8.hs
+++ b/src/Text/Pandoc/UTF8.hs
@@ -93,10 +93,16 @@ dropBOM :: String -> String
 dropBOM ('\xFEFF':xs) = xs
 dropBOM xs = xs
 
+filterCRs :: String -> String
+filterCRs ('\r':'\n':xs) = '\n': filterCRs xs
+filterCRs ('\r':xs) = '\n' : filterCRs xs
+filterCRs (x:xs) = x : filterCRs xs
+filterCRs []     = []
+
 -- | Convert UTF8-encoded ByteString to String, also
 -- removing '\r' characters.
 toString :: B.ByteString -> String
-toString = filter (/='\r') . dropBOM . T.unpack . T.decodeUtf8
+toString = filterCRs . dropBOM . T.unpack . T.decodeUtf8
 
 fromString :: String -> B.ByteString
 fromString = T.encodeUtf8 . T.pack
@@ -104,7 +110,7 @@ fromString = T.encodeUtf8 . T.pack
 -- | Convert UTF8-encoded ByteString to String, also
 -- removing '\r' characters.
 toStringLazy :: BL.ByteString -> String
-toStringLazy = filter (/='\r') . dropBOM . TL.unpack . TL.decodeUtf8
+toStringLazy = filterCRs . dropBOM . TL.unpack . TL.decodeUtf8
 
 fromStringLazy :: String -> BL.ByteString
 fromStringLazy = TL.encodeUtf8 . TL.pack
-- 
cgit v1.2.3