From 65a5db2d413f554530c1a358b2d852d58c909061 Mon Sep 17 00:00:00 2001
From: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>
Date: Sat, 27 Oct 2007 22:55:40 +0000
Subject: Modified fromUTF8 to strip out the BOM (byte order marker) wherever
 it is present.  See http://en.wikipedia.org/wiki/Byte_Order_Mark and
 http://six.pairlist.net/pipermail/markdown-discuss/2007-October/000874.html.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1054 788f1e2b-df1e-0410-8736-df70ead52e1b
---
 src/Text/Pandoc/UTF8.hs | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/Text/Pandoc')

diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
index be26f4993..16bdb9218 100644
--- a/src/Text/Pandoc/UTF8.hs
+++ b/src/Text/Pandoc/UTF8.hs
@@ -16,6 +16,7 @@ module Text.Pandoc.UTF8 (
 -- | Take a UTF-8 string and decode it into a Unicode string.
 fromUTF8 :: String -> String
 fromUTF8 "" = ""
+fromUTF8 ('\xef':'\xbb':'\xbf':cs) = fromUTF8 cs -- skip BOM (byte order marker)
 fromUTF8 (c:c':cs) | '\xc0' <= c  && c  <= '\xdf' && 
 		             '\x80' <= c' && c' <= '\xbf' =
 	toEnum ((fromEnum c `mod` 0x20) * 0x40 + fromEnum c' `mod` 0x40) : fromUTF8 cs
-- 
cgit v1.2.3