aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2021-02-22 11:30:07 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2021-02-22 11:30:07 -0800
commit80fde18fb1d983b938476ed5b3771ed5d6158d44 (patch)
tree9c2f3bbe6c1f28425b65a564df95ba39ef16537a
parent607c014e9d7a7cee0b5f82efb6b6cb80ab75ec18 (diff)
downloadpandoc-80fde18fb1d983b938476ed5b3771ed5d6158d44.tar.gz
Text.Pandoc.UTF8: change IO functions to return Text, not String.
[API change] This affects `readFile`, `getContents`, `writeFileWith`, `writeFile`, `putStrWith`, `putStr`, `putStrLnWith`, `putStrLn`. `hPutStrWith`, `hPutStr`, `hPutStrLnWith`, `hPutStrLn`, `hGetContents`. This avoids the need to uselessly create a linked list of characters when emiting output.
-rw-r--r--src/Text/Pandoc/App.hs10
-rw-r--r--src/Text/Pandoc/App/CommandLineOptions.hs38
-rw-r--r--src/Text/Pandoc/App/OutputSettings.hs4
-rw-r--r--src/Text/Pandoc/Class/IO.hs6
-rw-r--r--src/Text/Pandoc/Error.hs2
-rw-r--r--src/Text/Pandoc/PDF.hs25
-rw-r--r--src/Text/Pandoc/Parsing.hs2
-rw-r--r--src/Text/Pandoc/UTF8.hs42
-rw-r--r--test/Tests/Command.hs2
-rw-r--r--test/Tests/Old.hs5
-rw-r--r--test/Tests/Readers/LaTeX.hs2
11 files changed, 70 insertions, 68 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 6a071ad5a..63996828e 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -73,8 +73,9 @@ convertWithOpts opts = do
let verbosity = optVerbosity opts
when (optDumpArgs opts) $
- do UTF8.hPutStrLn stdout outputFile
- mapM_ (UTF8.hPutStrLn stdout) (fromMaybe ["-"] $ optInputFiles opts)
+ do UTF8.hPutStrLn stdout (T.pack outputFile)
+ mapM_ (UTF8.hPutStrLn stdout . T.pack)
+ (fromMaybe ["-"] $ optInputFiles opts)
exitSuccess
let sources = case optInputFiles opts of
@@ -354,6 +355,5 @@ writeFnBinary "-" = liftIO . BL.putStr
writeFnBinary f = liftIO . BL.writeFile (UTF8.encodePath f)
writerFn :: MonadIO m => IO.Newline -> FilePath -> Text -> m ()
--- TODO this implementation isn't maximally efficient:
-writerFn eol "-" = liftIO . UTF8.putStrWith eol . T.unpack
-writerFn eol f = liftIO . UTF8.writeFileWith eol f . T.unpack
+writerFn eol "-" = liftIO . UTF8.putStrWith eol
+writerFn eol f = liftIO . UTF8.writeFileWith eol f
diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs
index a4c510d97..0a8193f6c 100644
--- a/src/Text/Pandoc/App/CommandLineOptions.hs
+++ b/src/Text/Pandoc/App/CommandLineOptions.hs
@@ -812,10 +812,10 @@ options =
map (\c -> ['-',c]) shorts ++
map ("--" ++) longs
let allopts = unwords (concatMap optnames options)
- UTF8.hPutStrLn stdout $ printf tpl allopts
- (unwords readersNames)
- (unwords writersNames)
- (unwords $ map (T.unpack . fst) highlightingStyles)
+ UTF8.hPutStrLn stdout $ T.pack $ printf tpl allopts
+ (T.unpack $ T.unwords readersNames)
+ (T.unpack $ T.unwords writersNames)
+ (T.unpack $ T.unwords $ map fst highlightingStyles)
(unwords datafiles)
exitSuccess ))
"" -- "Print bash completion script"
@@ -854,7 +854,7 @@ options =
else if extensionEnabled x allExts
then '-'
else ' ') : drop 4 (show x)
- mapM_ (UTF8.hPutStrLn stdout . showExt)
+ mapM_ (UTF8.hPutStrLn stdout . T.pack . showExt)
[ex | ex <- extList, extensionEnabled ex allExts]
exitSuccess )
"FORMAT")
@@ -868,14 +868,14 @@ options =
, sShortname s `notElem`
[T.pack "Alert", T.pack "Alert_indent"]
]
- mapM_ (UTF8.hPutStrLn stdout) (sort langs)
+ mapM_ (UTF8.hPutStrLn stdout . T.pack) (sort langs)
exitSuccess ))
""
, Option "" ["list-highlight-styles"]
(NoArg
(\_ -> do
- mapM_ (UTF8.hPutStrLn stdout . T.unpack . fst) highlightingStyles
+ mapM_ (UTF8.hPutStrLn stdout . fst) highlightingStyles
exitSuccess ))
""
@@ -893,7 +893,7 @@ options =
| T.null t -> -- e.g. for docx, odt, json:
E.throwIO $ PandocCouldNotFindDataFileError $ T.pack
("templates/default." ++ arg)
- | otherwise -> write . T.unpack $ t
+ | otherwise -> write t
Left e -> E.throwIO e
exitSuccess)
"FORMAT")
@@ -940,11 +940,13 @@ options =
(\_ -> do
prg <- getProgName
defaultDatadirs <- defaultUserDataDirs
- UTF8.hPutStrLn stdout (prg ++ " " ++ T.unpack pandocVersion ++
- compileInfo ++
- "\nUser data directory: " ++
- intercalate " or " defaultDatadirs ++
- ('\n':copyrightMessage))
+ UTF8.hPutStrLn stdout
+ $ T.pack
+ $ prg ++ " " ++ T.unpack pandocVersion ++
+ compileInfo ++
+ "\nUser data directory: " ++
+ intercalate " or " defaultDatadirs ++
+ ('\n':copyrightMessage)
exitSuccess ))
"" -- "Print version"
@@ -952,7 +954,7 @@ options =
(NoArg
(\_ -> do
prg <- getProgName
- UTF8.hPutStr stdout (usageMessage prg options)
+ UTF8.hPutStr stdout (T.pack $ usageMessage prg options)
exitSuccess ))
"" -- "Show help"
]
@@ -1013,12 +1015,12 @@ handleUnrecognizedOption "-R" = handleUnrecognizedOption "--parse-raw"
handleUnrecognizedOption x =
(("Unknown option " ++ x ++ ".") :)
-readersNames :: [String]
-readersNames = sort (map (T.unpack . fst) (readers :: [(Text, Reader PandocIO)]))
+readersNames :: [Text]
+readersNames = sort (map fst (readers :: [(Text, Reader PandocIO)]))
-writersNames :: [String]
+writersNames :: [Text]
writersNames = sort
- ("pdf" : map (T.unpack . fst) (writers :: [(Text, Writer PandocIO)]))
+ ("pdf" : map fst (writers :: [(Text, Writer PandocIO)]))
splitField :: String -> (String, String)
splitField = second (tailDef "true") . break (`elemText` ":=")
diff --git a/src/Text/Pandoc/App/OutputSettings.hs b/src/Text/Pandoc/App/OutputSettings.hs
index 53c7d82ef..3864ab188 100644
--- a/src/Text/Pandoc/App/OutputSettings.hs
+++ b/src/Text/Pandoc/App/OutputSettings.hs
@@ -59,8 +59,8 @@ optToOutputSettings opts = do
let outputFile = fromMaybe "-" (optOutputFile opts)
when (optDumpArgs opts) . liftIO $ do
- UTF8.hPutStrLn stdout outputFile
- mapM_ (UTF8.hPutStrLn stdout) (fromMaybe [] $ optInputFiles opts)
+ UTF8.hPutStrLn stdout (T.pack outputFile)
+ mapM_ (UTF8.hPutStrLn stdout . T.pack) (fromMaybe [] $ optInputFiles opts)
exitSuccess
epubMetadata <- traverse readUtf8File $ optEpubMetadata opts
diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs
index eecda5711..bb4e2b732 100644
--- a/src/Text/Pandoc/Class/IO.hs
+++ b/src/Text/Pandoc/Class/IO.hs
@@ -183,7 +183,7 @@ getModificationTime = liftIOError System.Directory.getModificationTime
logOutput :: (PandocMonad m, MonadIO m) => LogMessage -> m ()
logOutput msg = liftIO $ do
UTF8.hPutStr stderr $
- "[" ++ show (messageVerbosity msg) ++ "] "
+ "[" <> T.pack (show (messageVerbosity msg)) <> "] "
alertIndent $ T.lines $ showLogMessage msg
-- | Prints the list of lines to @stderr@, indenting every but the first
@@ -191,10 +191,10 @@ logOutput msg = liftIO $ do
alertIndent :: [Text] -> IO ()
alertIndent [] = return ()
alertIndent (l:ls) = do
- UTF8.hPutStrLn stderr $ unpack l
+ UTF8.hPutStrLn stderr l
mapM_ go ls
where go l' = do UTF8.hPutStr stderr " "
- UTF8.hPutStrLn stderr $ unpack l'
+ UTF8.hPutStrLn stderr l'
-- | Extract media from the mediabag into a directory.
extractMedia :: (PandocMonad m, MonadIO m) => FilePath -> Pandoc -> m Pandoc
diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs
index 94c013cdb..0fdb7bfe5 100644
--- a/src/Text/Pandoc/Error.hs
+++ b/src/Text/Pandoc/Error.hs
@@ -191,6 +191,6 @@ handleError (Left e) =
err :: Int -> Text -> IO a
err exitCode msg = do
- UTF8.hPutStrLn stderr (T.unpack msg)
+ UTF8.hPutStrLn stderr msg
exitWith $ ExitFailure exitCode
return undefined
diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index 3f9dd8dad..6f462aad5 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -270,7 +270,7 @@ missingCharacterWarnings verbosity log' = do
| isAscii c = T.singleton c
| otherwise = T.pack $ c : " (U+" ++ printf "%04X" (ord c) ++ ")"
let addCodePoint = T.concatMap toCodePoint
- let warnings = [ addCodePoint (T.pack $ utf8ToString (BC.drop 19 l))
+ let warnings = [ addCodePoint (utf8ToText (BC.drop 19 l))
| l <- ls
, isMissingCharacterWarning l
]
@@ -314,7 +314,7 @@ runTectonic verbosity program args' tmpDir' source = do
env <- liftIO getEnvironment
when (verbosity >= INFO) $ liftIO $
showVerboseInfo (Just tmpDir) program programArgs env
- (utf8ToString sourceBL)
+ (utf8ToText sourceBL)
(exit, out) <- liftIO $ E.catch
(pipeProcess (Just env) program programArgs sourceBL)
(handlePDFProgramNotFound program)
@@ -385,7 +385,7 @@ runTeXProgram verbosity program args numRuns tmpDir' source = do
(pipeProcess (Just env'') program programArgs BL.empty)
(handlePDFProgramNotFound program)
when (verbosity >= INFO) $ liftIO $ do
- UTF8.hPutStrLn stderr $ "[makePDF] Run #" ++ show runNumber
+ UTF8.hPutStrLn stderr $ "[makePDF] Run #" <> tshow runNumber
BL.hPutStr stderr out
UTF8.hPutStr stderr "\n"
if runNumber < numRuns
@@ -405,7 +405,7 @@ generic2pdf :: Verbosity
generic2pdf verbosity program args source = do
env' <- getEnvironment
when (verbosity >= INFO) $
- showVerboseInfo Nothing program args env' (T.unpack source)
+ showVerboseInfo Nothing program args env' source
(exit, out) <- E.catch
(pipeProcess (Just env') program args
(BL.fromStrict $ UTF8.fromText source))
@@ -494,19 +494,20 @@ showVerboseInfo :: Maybe FilePath
-> String
-> [String]
-> [(String, String)]
- -> String
+ -> Text
-> IO ()
showVerboseInfo mbTmpDir program programArgs env source = do
case mbTmpDir of
Just tmpDir -> do
UTF8.hPutStrLn stderr "[makePDF] temp dir:"
- UTF8.hPutStrLn stderr tmpDir
+ UTF8.hPutStrLn stderr (T.pack tmpDir)
Nothing -> return ()
UTF8.hPutStrLn stderr "[makePDF] Command line:"
- UTF8.hPutStrLn stderr $ program ++ " " ++ unwords (map show programArgs)
+ UTF8.hPutStrLn stderr $
+ T.pack program <> " " <> T.pack (unwords (map show programArgs))
UTF8.hPutStr stderr "\n"
UTF8.hPutStrLn stderr "[makePDF] Environment:"
- mapM_ (UTF8.hPutStrLn stderr . show) env
+ mapM_ (UTF8.hPutStrLn stderr . tshow) env
UTF8.hPutStr stderr "\n"
UTF8.hPutStrLn stderr "[makePDF] Source:"
UTF8.hPutStrLn stderr source
@@ -517,8 +518,8 @@ handlePDFProgramNotFound program e
E.throwIO $ PandocPDFProgramNotFoundError $ T.pack program
| otherwise = E.throwIO e
-utf8ToString :: ByteString -> String
-utf8ToString lbs =
+utf8ToText :: ByteString -> Text
+utf8ToText lbs =
case decodeUtf8' lbs of
- Left _ -> BC.unpack lbs -- if decoding fails, treat as latin1
- Right t -> TL.unpack t
+ Left _ -> T.pack $ BC.unpack lbs -- if decoding fails, treat as latin1
+ Right t -> TL.toStrict t
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 4c4dd531d..8d3799c3e 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -1148,7 +1148,7 @@ testStringWith :: Show a
=> ParserT Text ParserState Identity a
-> Text
-> IO ()
-testStringWith parser str = UTF8.putStrLn $ show $
+testStringWith parser str = UTF8.putStrLn $ tshow $
readWith parser defaultParserState str
-- | Parsing options.
diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
index 567f5abe5..4d5921faf 100644
--- a/src/Text/Pandoc/UTF8.hs
+++ b/src/Text/Pandoc/UTF8.hs
@@ -39,67 +39,65 @@ where
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Lazy.Char8 as BL
+import Data.Text (Text)
import qualified Data.Text as T
+import qualified Data.Text.IO as TIO
import qualified Data.Text.Encoding as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL
import Prelude hiding (getContents, putStr, putStrLn, readFile, writeFile)
import System.IO hiding (getContents, hGetContents, hPutStr, hPutStrLn, putStr,
putStrLn, readFile, writeFile)
-import qualified System.IO as IO
-readFile :: FilePath -> IO String
+readFile :: FilePath -> IO Text
readFile f = do
h <- openFile (encodePath f) ReadMode
hGetContents h
-getContents :: IO String
+getContents :: IO Text
getContents = hGetContents stdin
-writeFileWith :: Newline -> FilePath -> String -> IO ()
+writeFileWith :: Newline -> FilePath -> Text -> IO ()
writeFileWith eol f s =
withFile (encodePath f) WriteMode $ \h -> hPutStrWith eol h s
-writeFile :: FilePath -> String -> IO ()
+writeFile :: FilePath -> Text -> IO ()
writeFile = writeFileWith nativeNewline
-putStrWith :: Newline -> String -> IO ()
+putStrWith :: Newline -> Text -> IO ()
putStrWith eol s = hPutStrWith eol stdout s
-putStr :: String -> IO ()
+putStr :: Text -> IO ()
putStr = putStrWith nativeNewline
-putStrLnWith :: Newline -> String -> IO ()
+putStrLnWith :: Newline -> Text -> IO ()
putStrLnWith eol s = hPutStrLnWith eol stdout s
-putStrLn :: String -> IO ()
+putStrLn :: Text -> IO ()
putStrLn = putStrLnWith nativeNewline
-hPutStrWith :: Newline -> Handle -> String -> IO ()
+hPutStrWith :: Newline -> Handle -> Text -> IO ()
hPutStrWith eol h s =
hSetNewlineMode h (NewlineMode eol eol) >>
- hSetEncoding h utf8 >> IO.hPutStr h s
+ hSetEncoding h utf8 >> TIO.hPutStr h s
-hPutStr :: Handle -> String -> IO ()
+hPutStr :: Handle -> Text -> IO ()
hPutStr = hPutStrWith nativeNewline
-hPutStrLnWith :: Newline -> Handle -> String -> IO ()
+hPutStrLnWith :: Newline -> Handle -> Text -> IO ()
hPutStrLnWith eol h s =
hSetNewlineMode h (NewlineMode eol eol) >>
- hSetEncoding h utf8 >> IO.hPutStrLn h s
+ hSetEncoding h utf8 >> TIO.hPutStrLn h s
-hPutStrLn :: Handle -> String -> IO ()
+hPutStrLn :: Handle -> Text -> IO ()
hPutStrLn = hPutStrLnWith nativeNewline
-hGetContents :: Handle -> IO String
-hGetContents = fmap toString . B.hGetContents
--- hGetContents h = hSetEncoding h utf8_bom
--- >> hSetNewlineMode h universalNewlineMode
--- >> IO.hGetContents h
+hGetContents :: Handle -> IO Text
+hGetContents = fmap toText . B.hGetContents
-- | Convert UTF8-encoded ByteString to Text, also
-- removing '\r' characters.
-toText :: B.ByteString -> T.Text
+toText :: B.ByteString -> Text
toText = T.decodeUtf8 . filterCRs . dropBOM
where dropBOM bs =
if "\xEF\xBB\xBF" `B.isPrefixOf` bs
@@ -127,7 +125,7 @@ toTextLazy = TL.decodeUtf8 . filterCRs . dropBOM
toStringLazy :: BL.ByteString -> String
toStringLazy = TL.unpack . toTextLazy
-fromText :: T.Text -> B.ByteString
+fromText :: Text -> B.ByteString
fromText = T.encodeUtf8
fromTextLazy :: TL.Text -> BL.ByteString
diff --git a/test/Tests/Command.hs b/test/Tests/Command.hs
index 07d825f73..59b04eac1 100644
--- a/test/Tests/Command.hs
+++ b/test/Tests/Command.hs
@@ -130,7 +130,7 @@ runCommandTest testExePath fp num code =
let cmdline = "% " <> cmd
let x = cmdline <> "\n" <> input <> "^D\n" <> norm
let y = cmdline <> "\n" <> input <> "^D\n" <> newnorm
- let updated = T.unpack $ T.replace (T.pack x) (T.pack y) (T.pack raw)
+ let updated = T.replace (T.pack x) (T.pack y) raw
UTF8.writeFile fp' updated
extractCommandTest :: FilePath -> FilePath -> TestTree
diff --git a/test/Tests/Old.hs b/test/Tests/Old.hs
index 17ece49fd..160086be2 100644
--- a/test/Tests/Old.hs
+++ b/test/Tests/Old.hs
@@ -22,6 +22,7 @@ import Test.Tasty (TestTree, testGroup)
import Test.Tasty.Golden.Advanced (goldenTest)
import Tests.Helpers hiding (test)
import qualified Text.Pandoc.UTF8 as UTF8
+import qualified Data.Text as T
tests :: FilePath -> [TestTree]
tests pandocPath =
@@ -231,7 +232,7 @@ tests pandocPath =
-- makes sure file is fully closed after reading
readFile' :: FilePath -> IO String
readFile' f = do s <- UTF8.readFile f
- return $! (length s `seq` s)
+ return $! (T.length s `seq` T.unpack s)
lhsWriterTests :: FilePath -> String -> [TestTree]
lhsWriterTests pandocPath format
@@ -333,7 +334,7 @@ testWithNormalize normalizer pandocPath testname opts inp norm =
$ UTF8.toStringLazy out
-- filter \r so the tests will work on Windows machines
else fail $ "Pandoc failed with error code " ++ show ec
- updateGolden = UTF8.writeFile norm
+ updateGolden = UTF8.writeFile norm . T.pack
options = ["--data-dir=../data","--quiet"] ++ [inp] ++ opts
compareValues :: FilePath -> [String] -> String -> String -> IO (Maybe String)
diff --git a/test/Tests/Readers/LaTeX.hs b/test/Tests/Readers/LaTeX.hs
index 77104c853..8385b751e 100644
--- a/test/Tests/Readers/LaTeX.hs
+++ b/test/Tests/Readers/LaTeX.hs
@@ -54,7 +54,7 @@ tokUntokRt s = untokenize (tokenize "random" t) == t
tests :: [TestTree]
tests = [ testGroup "tokenization"
[ testCase "tokenizer round trip on test case" $ do
- orig <- T.pack <$> UTF8.readFile "../test/latex-reader.latex"
+ orig <- UTF8.readFile "../test/latex-reader.latex"
let new = untokenize $ tokenize "../test/latex-reader.latex"
orig
assertEqual "untokenize . tokenize is identity" orig new