{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- Copyright (C) 2010-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | Module : Text.Pandoc.UTF8 Copyright : Copyright (C) 2010-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha Portability : portable UTF-8 aware string IO functions that will work with GHC 6.10, 6.12, or 7. -} module Text.Pandoc.UTF8 ( readFile , getContents , writeFileWith , writeFile , putStrWith , putStr , putStrLnWith , putStrLn , hPutStrWith , hPutStr , hPutStrLnWith , hPutStrLn , hGetContents , toString , toText , fromString , fromText , toStringLazy , fromTextLazy , toTextLazy , fromStringLazy , encodePath , decodeArg ) where import qualified Data.ByteString.Char8 as B import qualified Data.ByteString.Lazy.Char8 as BL import qualified Data.Text as T import qualified Data.Text.Encoding as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Encoding as TL import Prelude hiding (getContents, putStr, putStrLn, readFile, writeFile) import System.IO hiding (getContents, hGetContents, hPutStr, hPutStrLn, putStr, putStrLn, readFile, writeFile) import qualified System.IO as IO readFile :: FilePath -> IO String readFile f = do h <- openFile (encodePath f) ReadMode hGetContents h getContents :: IO String getContents = hGetContents stdin writeFileWith :: Newline -> FilePath -> String -> IO () writeFileWith eol f s = withFile (encodePath f) WriteMode $ \h -> hPutStrWith eol h s writeFile :: FilePath -> String -> IO () writeFile = writeFileWith nativeNewline putStrWith :: Newline -> String -> IO () putStrWith eol s = hPutStrWith eol stdout s putStr :: String -> IO () putStr = putStrWith nativeNewline putStrLnWith :: Newline -> String -> IO () putStrLnWith eol s = hPutStrLnWith eol stdout s putStrLn :: String -> IO () putStrLn = putStrLnWith nativeNewline hPutStrWith :: Newline -> Handle -> String -> IO () hPutStrWith eol h s = hSetNewlineMode h (NewlineMode eol eol) >> hSetEncoding h utf8 >> IO.hPutStr h s hPutStr :: Handle -> String -> IO () hPutStr = hPutStrWith nativeNewline hPutStrLnWith :: Newline -> Handle -> String -> IO () hPutStrLnWith eol h s = hSetNewlineMode h (NewlineMode eol eol) >> hSetEncoding h utf8 >> IO.hPutStrLn h s hPutStrLn :: Handle -> String -> IO () hPutStrLn = hPutStrLnWith nativeNewline hGetContents :: Handle -> IO String hGetContents = fmap toString . B.hGetContents -- hGetContents h = hSetEncoding h utf8_bom -- >> hSetNewlineMode h universalNewlineMode -- >> IO.hGetContents h -- | Convert UTF8-encoded ByteString to Text, also -- removing '\r' characters. toText :: B.ByteString -> T.Text toText = T.decodeUtf8 . filterCRs . dropBOM where dropBOM bs = if "\xEF\xBB\xBF" `B.isPrefixOf` bs then B.drop 3 bs else bs filterCRs = B.filter (/='\r') -- | Convert UTF8-encoded ByteString to String, also -- removing '\r' characters. toString :: B.ByteString -> String toString = T.unpack . toText -- | Convert UTF8-encoded ByteString to Text, also -- removing '\r' characters. toTextLazy :: BL.ByteString -> TL.Text toTextLazy = TL.decodeUtf8 . filterCRs . dropBOM where dropBOM bs = if "\xEF\xBB\xBF" `BL.isPrefixOf` bs then BL.drop 3 bs else bs filterCRs = BL.filter (/='\r') -- | Convert UTF8-encoded ByteString to String, also -- removing '\r' characters. toStringLazy :: BL.ByteString -> String toStringLazy = TL.unpack . toTextLazy fromText :: T.Text -> B.ByteString fromText = T.encodeUtf8 fromTextLazy :: TL.Text -> BL.ByteString fromTextLazy = TL.encodeUtf8 fromString :: String -> B.ByteString fromString = fromText . T.pack fromStringLazy :: String -> BL.ByteString fromStringLazy = fromTextLazy . TL.pack encodePath :: FilePath -> FilePath encodePath = id decodeArg :: String -> String decodeArg = id