diff options
| author | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2008-08-14 01:32:54 +0000 |
|---|---|---|
| committer | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2008-08-14 01:32:54 +0000 |
| commit | d00da316227c26bad857998d0398a23917ee0a7b (patch) | |
| tree | ef4ae5ec42114e9a8e94cb83f50368c6af849e33 /System/IO | |
| parent | 8388427d230e853466298cfae7abf0ac0af50269 (diff) | |
| download | pandoc-d00da316227c26bad857998d0398a23917ee0a7b.tar.gz | |
Simplified and improved UTF8 handling:
+ Removed utf8-string cabal configuration flag.
+ Instead, we just include System.IO.UTF8 and Codec.Binary.UTF8.String
from utf8-string package in the source tree, avoiding a dependency
on utf8-string and avoiding crufty custom UTF8 code. (The old
Text.Pandoc.UTF8 had problems with the getContents function.)
+ Removed lots of CPP directives that are no longer needed.
+ In Setup.hs, use '-i..' in running RunTests.hs, so the local UTF8
code will be found.
git-svn-id: https://pandoc.googlecode.com/svn/trunk@1411 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'System/IO')
| -rw-r--r-- | System/IO/UTF8.hs | 118 |
1 files changed, 118 insertions, 0 deletions
diff --git a/System/IO/UTF8.hs b/System/IO/UTF8.hs new file mode 100644 index 000000000..d0af4c38e --- /dev/null +++ b/System/IO/UTF8.hs @@ -0,0 +1,118 @@ +----------------------------------------------------------------------------- +-- | +-- Module : System.IO.UTF8 +-- Copyright : (c) Eric Mertens 2007 +-- License : BSD3-style (see LICENSE) +-- +-- Maintainer: emertens@galois.com +-- Stability : experimental +-- Portability : portable +-- +-- String IO preserving UTF8 encoding. +-- + +module System.IO.UTF8 ( + print + , putStr + , putStrLn + , getLine + , readLn + , readFile + , writeFile + , appendFile + , getContents + , hGetLine + , hGetContents + , hPutStr + , hPutStrLn + ) where + +import Control.Monad (liftM) +import Data.Char (ord, chr) +import Data.Word (Word8) +import Prelude (String, ($), (=<<), (>>=), (.), map, toEnum, fromEnum, Read, + Show(..)) +import System.IO (Handle, IO, FilePath) +import qualified System.IO as IO + +import Codec.Binary.UTF8.String (encode, decode) + + +-- | Encode a string in UTF8 form. +encodeString :: String -> String +encodeString xs = bytesToString (encode xs) + +-- | Decode a string from UTF8 +decodeString :: String -> String +decodeString xs = decode (stringToBytes xs) + +-- | Convert a list of bytes to a String +bytesToString :: [Word8] -> String +bytesToString xs = map (chr . fromEnum) xs + +-- | String to list of bytes. +stringToBytes :: String -> [Word8] +stringToBytes xs = map (toEnum . ord) xs + +-- | The 'print' function outputs a value of any printable type to the +-- standard output device. This function differs from the +-- System.IO.print in that it preserves any UTF8 encoding of the shown value. +-- +print :: Show a => a -> IO () +print x = putStrLn (show x) + +-- | Write a UTF8 string to the standard output device +putStr :: String -> IO () +putStr x = IO.putStr (encodeString x) + +-- | The same as 'putStr', but adds a newline character. +putStrLn :: String -> IO () +putStrLn x = IO.putStrLn (encodeString x) + +-- | Read a UTF8 line from the standard input device +getLine :: IO String +getLine = liftM decodeString IO.getLine + +-- | The 'readLn' function combines 'getLine' and 'readIO', preserving UTF8 +readLn :: Read a => IO a +readLn = IO.readIO =<< getLine + +-- | The 'readFile' function reads a file and +-- returns the contents of the file as a UTF8 string. +-- The file is read lazily, on demand, as with 'getContents'. +readFile :: FilePath -> IO String +readFile n = liftM decodeString (IO.openBinaryFile n IO.ReadMode >>= + IO.hGetContents) + +-- | The computation 'writeFile' @file str@ function writes the UTF8 string @str@, +-- to the file @file@. +writeFile :: FilePath -> String -> IO () +writeFile n c = IO.withBinaryFile n IO.WriteMode $ \ h -> + IO.hPutStr h $ encodeString c + +-- | The computation 'appendFile' @file str@ function appends the UTF8 string @str@, +-- to the file @file@. +appendFile :: FilePath -> String -> IO () +appendFile n c = IO.withBinaryFile n IO.AppendMode $ \h -> + IO.hPutStr h $ encodeString c + +-- | Read a UTF8 line from a Handle +hGetLine :: Handle -> IO String +hGetLine h = liftM decodeString $ IO.hGetLine h + +-- | Lazily read a UTF8 string from a Handle +hGetContents :: Handle -> IO String +hGetContents h = liftM decodeString (IO.hGetContents h) + +-- | Write a UTF8 string to a Handle. +hPutStr :: Handle -> String -> IO () +hPutStr h s = IO.hPutStr h (encodeString s) + +-- | Write a UTF8 string to a Handle, appending a newline. +hPutStrLn :: Handle -> String -> IO () +hPutStrLn h s = IO.hPutStrLn h (encodeString s) + +-- | Lazily read stdin as a UTF8 string. +getContents :: IO String +getContents = liftM decodeString IO.getContents + |
