aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <fiddlosopher@gmail.com>2012-09-23 22:12:21 -0700
committerJohn MacFarlane <fiddlosopher@gmail.com>2012-09-23 22:12:21 -0700
commit7272735b3d413a644fd9ab01eeae8ae9cd5a925b (patch)
treebb0fa978cee6702d0bb3300f234b1f08d2071c5f
parent31107741f0ee69d444e5f9ed2d8272583024e10c (diff)
downloadpandoc-7272735b3d413a644fd9ab01eeae8ae9cd5a925b.tar.gz
More intelligent handling of text encodings.
Previously, UTF-8 was enforced for both input and output. The new system: * For input, UTF-8 is tried first; if an error is raised, the locale encoding is tried. * For output, the locale encoding is always used.
-rw-r--r--src/Text/Pandoc/Parsing.hs3
-rw-r--r--src/Text/Pandoc/Shared.hs6
-rw-r--r--src/Text/Pandoc/UTF8.hs20
-rw-r--r--src/Text/Pandoc/Writers/Docx.hs5
-rw-r--r--src/pandoc.hs16
5 files changed, 30 insertions, 20 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index bee96be82..de4e3a65d 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -146,7 +146,6 @@ where
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Builder (Blocks)
-import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
import Text.Parsec
import Text.Parsec.Pos (newPos)
import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum, isDigit, isPunctuation )
@@ -708,7 +707,7 @@ readWith parser state input =
testStringWith :: (Show a) => Parser [Char] ParserState a
-> String
-> IO ()
-testStringWith parser str = UTF8.putStrLn $ show $
+testStringWith parser str = putStrLn $ show $
readWith parser defaultParserState str
-- | Parsing options.
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index d86f9a390..fa928455d 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -90,7 +90,7 @@ import Paths_pandoc (getDataFileName)
import Text.Pandoc.Pretty (charWidth)
import System.Locale (defaultTimeLocale)
import Data.Time
-import System.IO (stderr)
+import System.IO (stderr, hPutStrLn)
import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
renderOptions)
@@ -503,14 +503,14 @@ readDataFile userDir fname = findDataFile userDir fname >>= UTF8.readFile
err :: Int -> String -> IO a
err exitCode msg = do
name <- getProgName
- UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
+ hPutStrLn stderr $ name ++ ": " ++ msg
exitWith $ ExitFailure exitCode
return undefined
warn :: String -> IO ()
warn msg = do
name <- getProgName
- UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
+ hPutStrLn stderr $ name ++ ": " ++ msg
--
-- Safe read
diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
index aa3a9da04..45664892a 100644
--- a/src/Text/Pandoc/UTF8.hs
+++ b/src/Text/Pandoc/UTF8.hs
@@ -25,7 +25,11 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Stability : alpha
Portability : portable
-UTF-8 aware string IO functions that will work with GHC 6.10, 6.12, or 7.
+UTF-8 aware string IO functions that will work with GHC 6.12 or 7.
+The reading functions first attempt to read UTF-8; if an encoding
+error is encountered, the local encoding is used instead. This
+should work well in practice because text in other encodings
+is usually not valid UTF-8.
-}
module Text.Pandoc.UTF8 ( readFile
, writeFile
@@ -45,10 +49,11 @@ where
#else
import Codec.Binary.UTF8.String (encodeString, decodeString)
#endif
-
+import Control.Exception (catch, throwIO)
+import GHC.IO.Exception (IOException(..), IOErrorType(..))
import System.IO hiding (readFile, writeFile, getContents,
putStr, putStrLn, hPutStr, hPutStrLn, hGetContents)
-import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn )
+import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn, catch )
import qualified System.IO as IO
readFile :: FilePath -> IO String
@@ -75,7 +80,14 @@ hPutStrLn :: Handle -> String -> IO ()
hPutStrLn h s = hSetEncoding h utf8 >> IO.hPutStrLn h s
hGetContents :: Handle -> IO String
-hGetContents h = hSetEncoding h utf8_bom >> IO.hGetContents h
+hGetContents h = do
+ hSetEncoding h utf8_bom
+ catch (IO.hGetContents h) $ \e ->
+ case ioe_type e of
+ InvalidArgument -> do
+ hSetEncoding h localeEncoding
+ IO.hGetContents h
+ _ -> throwIO e
encodePath :: FilePath -> FilePath
decodeArg :: String -> String
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 84bf95dfb..211f8946b 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -33,8 +33,7 @@ import System.FilePath ( (</>) )
import qualified Data.ByteString.Lazy as B
import qualified Data.Map as M
import Data.ByteString.Lazy.UTF8 ( fromString, toString )
-import Text.Pandoc.UTF8 as UTF8
-import System.IO ( stderr )
+import System.IO ( stderr, hPutStrLn )
import Codec.Archive.Zip
import Data.Time.Clock.POSIX
import Paths_pandoc ( getDataFileName )
@@ -663,7 +662,7 @@ inlineToOpenXML opts (Image alt (src, tit)) = do
, mknode "wp:docPr" [("descr",tit),("id","1"),("name","Picture")] ()
, graphic ] ]
else do
- liftIO $ UTF8.hPutStrLn stderr $
+ liftIO $ hPutStrLn stderr $
"Could not find image `" ++ src ++ "', skipping..."
inlinesToOpenXML opts alt
diff --git a/src/pandoc.hs b/src/pandoc.hs
index 305557f6a..33fbd2152 100644
--- a/src/pandoc.hs
+++ b/src/pandoc.hs
@@ -46,7 +46,7 @@ import System.Console.GetOpt
import Data.Char ( toLower )
import Data.List ( intercalate, isPrefixOf )
import System.Directory ( getAppUserDataDirectory, doesFileExist, findExecutable )
-import System.IO ( stdout )
+import System.IO ( stdout, hPutStr, hPutStrLn )
import System.IO.Error ( isDoesNotExistError )
import qualified Control.Exception as E
import Control.Exception.Extensible ( throwIO )
@@ -312,7 +312,7 @@ options =
(\arg _ -> do
templ <- getDefaultTemplate Nothing arg
case templ of
- Right t -> UTF8.hPutStr stdout t
+ Right t -> hPutStr stdout t
Left e -> error $ show e
exitWith ExitSuccess)
"FORMAT")
@@ -663,7 +663,7 @@ options =
(NoArg
(\_ -> do
prg <- getProgName
- UTF8.hPutStrLn stdout (prg ++ " " ++ pandocVersion ++ compileInfo ++
+ hPutStrLn stdout (prg ++ " " ++ pandocVersion ++ compileInfo ++
copyrightMessage)
exitWith ExitSuccess ))
"" -- "Print version"
@@ -672,7 +672,7 @@ options =
(NoArg
(\_ -> do
prg <- getProgName
- UTF8.hPutStr stdout (usageMessage prg options)
+ hPutStr stdout (usageMessage prg options)
exitWith ExitSuccess ))
"" -- "Show help"
@@ -827,8 +827,8 @@ main = do
} = opts
when dumpArgs $
- do UTF8.hPutStrLn stdout outputFile
- mapM_ (\arg -> UTF8.hPutStrLn stdout arg) args
+ do hPutStrLn stdout outputFile
+ mapM_ (\arg -> hPutStrLn stdout arg) args
exitWith ExitSuccess
let sources = if ignoreArgs then [] else args
@@ -1026,8 +1026,8 @@ main = do
writeBinary = B.writeFile (UTF8.encodePath outputFile)
let writerFn :: FilePath -> String -> IO ()
- writerFn "-" = UTF8.putStr
- writerFn f = UTF8.writeFile f
+ writerFn "-" = putStr
+ writerFn f = writeFile f
case getWriter writerName' of
Left e -> err 9 e