aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/UTF8.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/UTF8.hs')
-rw-r--r--src/Text/Pandoc/UTF8.hs1
1 files changed, 1 insertions, 0 deletions
diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
index be26f4993..16bdb9218 100644
--- a/src/Text/Pandoc/UTF8.hs
+++ b/src/Text/Pandoc/UTF8.hs
@@ -16,6 +16,7 @@ module Text.Pandoc.UTF8 (
-- | Take a UTF-8 string and decode it into a Unicode string.
fromUTF8 :: String -> String
fromUTF8 "" = ""
+fromUTF8 ('\xef':'\xbb':'\xbf':cs) = fromUTF8 cs -- skip BOM (byte order marker)
fromUTF8 (c:c':cs) | '\xc0' <= c && c <= '\xdf' &&
'\x80' <= c' && c' <= '\xbf' =
toEnum ((fromEnum c `mod` 0x20) * 0x40 + fromEnum c' `mod` 0x40) : fromUTF8 cs