aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/UTF8.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/UTF8.hs')
-rw-r--r--src/Text/Pandoc/UTF8.hs14
1 files changed, 10 insertions, 4 deletions
diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
index 543f39ab0..de3314a0d 100644
--- a/src/Text/Pandoc/UTF8.hs
+++ b/src/Text/Pandoc/UTF8.hs
@@ -1,6 +1,6 @@
{-# LANGUAGE CPP #-}
{-
-Copyright (C) 2010-2014 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2010-2015 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.UTF8
- Copyright : Copyright (C) 2010-2014 John MacFarlane
+ Copyright : Copyright (C) 2010-2015 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -93,10 +93,16 @@ dropBOM :: String -> String
dropBOM ('\xFEFF':xs) = xs
dropBOM xs = xs
+filterCRs :: String -> String
+filterCRs ('\r':'\n':xs) = '\n': filterCRs xs
+filterCRs ('\r':xs) = '\n' : filterCRs xs
+filterCRs (x:xs) = x : filterCRs xs
+filterCRs [] = []
+
-- | Convert UTF8-encoded ByteString to String, also
-- removing '\r' characters.
toString :: B.ByteString -> String
-toString = filter (/='\r') . dropBOM . T.unpack . T.decodeUtf8
+toString = filterCRs . dropBOM . T.unpack . T.decodeUtf8
fromString :: String -> B.ByteString
fromString = T.encodeUtf8 . T.pack
@@ -104,7 +110,7 @@ fromString = T.encodeUtf8 . T.pack
-- | Convert UTF8-encoded ByteString to String, also
-- removing '\r' characters.
toStringLazy :: BL.ByteString -> String
-toStringLazy = filter (/='\r') . dropBOM . TL.unpack . TL.decodeUtf8
+toStringLazy = filterCRs . dropBOM . TL.unpack . TL.decodeUtf8
fromStringLazy :: String -> BL.ByteString
fromStringLazy = TL.encodeUtf8 . TL.pack