diff options
Diffstat (limited to 'src/Text/Pandoc/UTF8.hs')
| -rw-r--r-- | src/Text/Pandoc/UTF8.hs | 14 |
1 files changed, 10 insertions, 4 deletions
diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs index 543f39ab0..de3314a0d 100644 --- a/src/Text/Pandoc/UTF8.hs +++ b/src/Text/Pandoc/UTF8.hs @@ -1,6 +1,6 @@ {-# LANGUAGE CPP #-} {- -Copyright (C) 2010-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2010-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.UTF8 - Copyright : Copyright (C) 2010-2014 John MacFarlane + Copyright : Copyright (C) 2010-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -93,10 +93,16 @@ dropBOM :: String -> String dropBOM ('\xFEFF':xs) = xs dropBOM xs = xs +filterCRs :: String -> String +filterCRs ('\r':'\n':xs) = '\n': filterCRs xs +filterCRs ('\r':xs) = '\n' : filterCRs xs +filterCRs (x:xs) = x : filterCRs xs +filterCRs [] = [] + -- | Convert UTF8-encoded ByteString to String, also -- removing '\r' characters. toString :: B.ByteString -> String -toString = filter (/='\r') . dropBOM . T.unpack . T.decodeUtf8 +toString = filterCRs . dropBOM . T.unpack . T.decodeUtf8 fromString :: String -> B.ByteString fromString = T.encodeUtf8 . T.pack @@ -104,7 +110,7 @@ fromString = T.encodeUtf8 . T.pack -- | Convert UTF8-encoded ByteString to String, also -- removing '\r' characters. toStringLazy :: BL.ByteString -> String -toStringLazy = filter (/='\r') . dropBOM . TL.unpack . TL.decodeUtf8 +toStringLazy = filterCRs . dropBOM . TL.unpack . TL.decodeUtf8 fromStringLazy :: String -> BL.ByteString fromStringLazy = TL.encodeUtf8 . TL.pack |
