From ebcebccc3226eae6461fececdcca49fbdfb291e7 Mon Sep 17 00:00:00 2001
From: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>
Date: Thu, 31 Jul 2008 23:16:31 +0000
Subject: Use utf8-string's System.IO.UTF8 to replace Text.Pandoc.UTF8. +
 removed Text/Pandoc/UTF8.hs + removed UTF8.hs notice from debian/copyright +
 adjusted main.hs, Text/Pandoc.hs, and Text/Pandoc/ODT.hs to use  
 System.IO.UTF8 instead of Text.Pandoc.UTF8 + Added dependency on utf8-string
 to pandoc.cabal

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1347 788f1e2b-df1e-0410-8736-df70ead52e1b
---
 Text/Pandoc.hs      | 12 +++++-------
 Text/Pandoc/ODT.hs  |  1 -
 Text/Pandoc/UTF8.hs | 45 ---------------------------------------------
 3 files changed, 5 insertions(+), 53 deletions(-)
 delete mode 100644 Text/Pandoc/UTF8.hs

(limited to 'Text')

diff --git a/Text/Pandoc.hs b/Text/Pandoc.hs
index d5026587d..9be9f28c7 100644
--- a/Text/Pandoc.hs
+++ b/Text/Pandoc.hs
@@ -36,13 +36,14 @@ inline links:
 
 > module Main where
 > import Text.Pandoc
-> 
+> import qualified System.IO.UTF8 as U
+>
 > markdownToRST :: String -> String
-> markdownToRST = toUTF8 .
+> markdownToRST =
 >   (writeRST defaultWriterOptions {writerReferenceLinks = True}) .
->   (readMarkdown defaultParserState) .  fromUTF8
+>   readMarkdown defaultParserState
 > 
-> main = interact markdownToRST
+> main = U.getContents >>= U.putStrLn . markdownToRST
 
 -}
 
@@ -84,8 +85,6 @@ module Text.Pandoc
                , defaultWriterOptions
                -- * Default headers for various output formats
                , module Text.Pandoc.DefaultHeaders
-               -- * Functions for converting to and from UTF-8
-               , module Text.Pandoc.UTF8
                -- * Version
                , pandocVersion
              ) where
@@ -108,7 +107,6 @@ import Text.Pandoc.Writers.Man
 import Text.Pandoc.Writers.RTF 
 import Text.Pandoc.Writers.MediaWiki
 import Text.Pandoc.DefaultHeaders
-import Text.Pandoc.UTF8
 import Text.Pandoc.Shared
 
 -- | Version number of pandoc library.
diff --git a/Text/Pandoc/ODT.hs b/Text/Pandoc/ODT.hs
index f388515fb..487bcdedc 100644
--- a/Text/Pandoc/ODT.hs
+++ b/Text/Pandoc/ODT.hs
@@ -92,7 +92,6 @@ handlePictures tempODT sourceDirRelative xml = do
                    Just x  -> x
   cursor' <- scanPictures tempODT sourceDirRelative cursor
   let modified = parsed { elContent = toForest $ root cursor' }
-  putStrLn $ showTopElement modified
   return $ showTopElement modified
 
 scanPictures :: FilePath -> FilePath -> Cursor -> IO Cursor
diff --git a/Text/Pandoc/UTF8.hs b/Text/Pandoc/UTF8.hs
deleted file mode 100644
index 16bdb9218..000000000
--- a/Text/Pandoc/UTF8.hs
+++ /dev/null
@@ -1,45 +0,0 @@
--- | Functions for converting Unicode strings to UTF-8 and vice versa.
--- 
--- Taken from <http://www.cse.ogi.edu/~hallgren/Talks/LHiH/base/lib/UTF8.hs>.
--- (c) 2003, OGI School of Science & Engineering, Oregon Health and
--- Science University.  
---
--- Modified by Martin Norbaeck
--- to pass illegal UTF-8 sequences through unchanged.
-module Text.Pandoc.UTF8 ( 
-             fromUTF8, 
-             toUTF8 
-            ) where
-
--- From the Char module supplied with HBC.
-
--- | Take a UTF-8 string and decode it into a Unicode string.
-fromUTF8 :: String -> String
-fromUTF8 "" = ""
-fromUTF8 ('\xef':'\xbb':'\xbf':cs) = fromUTF8 cs -- skip BOM (byte order marker)
-fromUTF8 (c:c':cs) | '\xc0' <= c  && c  <= '\xdf' && 
-		             '\x80' <= c' && c' <= '\xbf' =
-	toEnum ((fromEnum c `mod` 0x20) * 0x40 + fromEnum c' `mod` 0x40) : fromUTF8 cs
-fromUTF8 (c:c':c'':cs) | '\xe0' <= c   && c   <= '\xef' && 
-		          '\x80' <= c'  && c'  <= '\xbf' &&
-		          '\x80' <= c'' && c'' <= '\xbf' =
-	toEnum ((fromEnum c `mod` 0x10 * 0x1000) + (fromEnum c' `mod` 0x40) * 0x40 + fromEnum c'' `mod` 0x40) : fromUTF8 cs
-fromUTF8 (c:cs) = c : fromUTF8 cs
-
--- | Take a Unicode string and encode it as a UTF-8 string.
-toUTF8 :: String -> String
-toUTF8 "" = ""
-toUTF8 (c:cs) =
-	if c > '\x0000' && c < '\x0080' then
-	    c : toUTF8 cs
-	else if c < toEnum 0x0800 then
-	    let i = fromEnum c
-	    in  toEnum (0xc0 + i `div` 0x40) : 
-	        toEnum (0x80 + i `mod` 0x40) : 
-		toUTF8 cs
-	else
-	    let i = fromEnum c
-	    in  toEnum (0xe0 + i `div` 0x1000) : 
-	        toEnum (0x80 + (i `mod` 0x1000) `div` 0x40) : 
-		toEnum (0x80 + i `mod` 0x40) : 
-		toUTF8 cs
-- 
cgit v1.2.3