aboutsummaryrefslogtreecommitdiff
path: root/Codec/Binary
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2008-09-04 02:51:28 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2008-09-04 02:51:28 +0000
commit4dca8f6e75948d489e8127119ce3787cb97ee1e2 (patch)
tree85a9b26dfe9f5074fc993661b2129c97742351fc /Codec/Binary
parent9b7ec2d366e48dd77befb6710b9b567e26a53084 (diff)
downloadpandoc-4dca8f6e75948d489e8127119ce3787cb97ee1e2.tar.gz
Reworked Text.Pandoc.ODT to use zip-archive instead of calling external 'zip'.
+ Removed utf8-string and xml-light modules, and unneeded content.xml. + Removed code for building reference.odt from Setup.hs. The ODT is now built using template haskell in Text.Pandoc.ODT. + Removed copyright statements for utf8-string and xml modules, since they are no longer included in the source. + README: Removed claim that 'zip' is needed for ODT production. + Removed dependency on 'zip' from debian/control. + Text.Pandoc.Shared: Removed withTempDir, added inDirectory. + Added makeZip to Text.Pandoc.TH. + pandoc.cabal: Added dependencies on old-time, zip-archive, and utf8-string. Added markdown2pdf files to extra-sources list. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1417 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'Codec/Binary')
-rw-r--r--Codec/Binary/UTF8/String.hs97
1 files changed, 0 insertions, 97 deletions
diff --git a/Codec/Binary/UTF8/String.hs b/Codec/Binary/UTF8/String.hs
deleted file mode 100644
index 27c003f00..000000000
--- a/Codec/Binary/UTF8/String.hs
+++ /dev/null
@@ -1,97 +0,0 @@
---
--- |
--- Module : Codec.Binary.UTF8.String
--- Copyright : (c) Eric Mertens 2007
--- License : BSD3-style (see LICENSE)
---
--- Maintainer: emertens@galois.com
--- Stability : experimental
--- Portability : portable
---
--- Support for encoding UTF8 Strings to and from @[Word8]@
---
-
-module Codec.Binary.UTF8.String (
- encode
- , decode
- , encodeString
- , decodeString
- ) where
-
-import Data.Word (Word8)
-import Data.Bits ((.|.),(.&.),shiftL,shiftR)
-import Data.Char (chr,ord)
-
-default(Int)
-
--- | Encode a string using 'encode' and store the result in a 'String'.
-encodeString :: String -> String
-encodeString xs = map (toEnum . fromEnum) (encode xs)
-
--- | Decode a string using 'decode' using a 'String' as input.
--- | This is not safe but it is necessary if UTF-8 encoded text
--- | has been loaded into a 'String' prior to being decoded.
-decodeString :: String -> String
-decodeString xs = decode (map (toEnum . fromEnum) xs)
-
-replacement_character :: Char
-replacement_character = '\xfffd'
-
--- | Encode a Haskell String to a list of Word8 values, in UTF8 format.
-encode :: String -> [Word8]
-encode = concatMap (map fromIntegral . go . ord)
- where
- go oc
- | oc <= 0x7f = [oc]
-
- | oc <= 0x7ff = [ 0xc0 + (oc `shiftR` 6)
- , 0x80 + oc .&. 0x3f
- ]
-
- | oc <= 0xffff = [ 0xe0 + (oc `shiftR` 12)
- , 0x80 + ((oc `shiftR` 6) .&. 0x3f)
- , 0x80 + oc .&. 0x3f
- ]
- | otherwise = [ 0xf0 + (oc `shiftR` 18)
- , 0x80 + ((oc `shiftR` 12) .&. 0x3f)
- , 0x80 + ((oc `shiftR` 6) .&. 0x3f)
- , 0x80 + oc .&. 0x3f
- ]
-
---
--- | Decode a UTF8 string packed into a list of Word8 values, directly to String
---
-decode :: [Word8] -> String
-decode [ ] = ""
-decode (c:cs)
- | c < 0x80 = chr (fromEnum c) : decode cs
- | c < 0xc0 = replacement_character : decode cs
- | c < 0xe0 = multi1
- | c < 0xf0 = multi_byte 2 0xf 0x800
- | c < 0xf8 = multi_byte 3 0x7 0x10000
- | c < 0xfc = multi_byte 4 0x3 0x200000
- | c < 0xfe = multi_byte 5 0x1 0x4000000
- | otherwise = replacement_character : decode cs
- where
- multi1 = case cs of
- c1 : ds | c1 .&. 0xc0 == 0x80 ->
- let d = ((fromEnum c .&. 0x1f) `shiftL` 6) .|. fromEnum (c1 .&. 0x3f)
- in if d >= 0x000080 then toEnum d : decode ds
- else replacement_character : decode ds
- _ -> replacement_character : decode cs
-
- multi_byte :: Int -> Word8 -> Int -> [Char]
- multi_byte i mask overlong = aux i cs (fromEnum (c .&. mask))
- where
- aux 0 rs acc
- | overlong <= acc && acc <= 0x10ffff &&
- (acc < 0xd800 || 0xdfff < acc) &&
- (acc < 0xfffe || 0xffff < acc) = chr acc : decode rs
- | otherwise = replacement_character : decode rs
-
- aux n (r:rs) acc
- | r .&. 0xc0 == 0x80 = aux (n-1) rs
- $ shiftL acc 6 .|. fromEnum (r .&. 0x3f)
-
- aux _ rs _ = replacement_character : decode rs
-