From 965f1ddd4a9d1317455094b8c41016624d92f8ce Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sat, 13 May 2017 23:30:13 +0200 Subject: Update dates in copyright notices This follows the suggestions given by the FSF for GPL licensed software. --- src/Text/Pandoc/UTF8.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/Text/Pandoc/UTF8.hs') diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs index d88a44948..e27a24e63 100644 --- a/src/Text/Pandoc/UTF8.hs +++ b/src/Text/Pandoc/UTF8.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2010-2016 John MacFarlane +Copyright (C) 2010-2017 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.UTF8 - Copyright : Copyright (C) 2010-2016 John MacFarlane + Copyright : Copyright (C) 2010-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane -- cgit v1.2.3 From 6b8240fc2f45ced4f16403316cab76df15ceaf7a Mon Sep 17 00:00:00 2001 From: Stefan Dresselhaus Date: Wed, 17 May 2017 15:13:35 +0200 Subject: Add `--eol` flag and writer option to control line endings. * Add `--eol=crlf|lf` CLI option. * Add `optEol` to `WriterOptions` [API change] * In `Text.Pandoc.UTF8`, add new functions parameterized on `Newline`: `writeFileWith`, `putStrWith`, `putStrLnWith`, `hPutStrWith`, `hPutStrLnWith`. [API change] * Document option in MANUAL.txt. Closes #3663. Closes #2097. --- MANUAL.txt | 7 +++++++ src/Text/Pandoc/App.hs | 26 +++++++++++++++++++++----- src/Text/Pandoc/UTF8.hs | 41 +++++++++++++++++++++++++++++++++-------- 3 files changed, 61 insertions(+), 13 deletions(-) (limited to 'src/Text/Pandoc/UTF8.hs') diff --git a/MANUAL.txt b/MANUAL.txt index f41d96ffa..d99cd0600 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -593,7 +593,14 @@ General writer options : Print a system default data file. Files in the user data directory are ignored. +`--eol=crlf`|`lf` + +: Manually specify line endings: `crlf` (Windows) or `lf` + (MacOS/linux/unix). The default is to use the line endings + appropriate for the OS. + `--dpi`=*NUMBER* + : Specify the dpi (dots per inch) value for conversion from pixels to inch/centimeters and vice versa. The default is 96dpi. Technically, the correct term would be ppi (pixels per inch). diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 157100507..9c8e1bde4 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -65,7 +65,7 @@ import System.Directory (Permissions (..), doesFileExist, findExecutable, import System.Environment (getArgs, getEnvironment, getProgName) import System.Exit (ExitCode (..), exitSuccess) import System.FilePath -import System.IO (stdout) +import System.IO (stdout, nativeNewline, Newline(..)) import System.IO.Error (isDoesNotExistError) import Text.Pandoc import Text.Pandoc.Builder (setMeta) @@ -411,6 +411,8 @@ convertWithOpts opts = do return $ ("csl", jatsEncoded) : optMetadata opts else return $ optMetadata opts + let eol = fromMaybe nativeNewline $ optEol opts + runIO' $ do (doc, media) <- withMediaBag $ sourceToDoc sources >>= ( (if isJust (optExtractMedia opts) @@ -463,7 +465,7 @@ convertWithOpts opts = do else id output <- f writerOptions doc selfcontain (output ++ ['\n' | not standalone]) >>= - writerFn outputFile . handleEntities + writerFn eol outputFile . handleEntities type Transform = Pandoc -> Pandoc @@ -567,6 +569,7 @@ data Opt = Opt , optIncludeBeforeBody :: [FilePath] -- ^ Files to include before , optIncludeAfterBody :: [FilePath] -- ^ Files to include after body , optIncludeInHeader :: [FilePath] -- ^ Files to include in header + , optEol :: Maybe Newline -- ^ Enforce line-endings } -- | Defaults for command-line options. @@ -635,6 +638,7 @@ defaultOpts = Opt , optIncludeBeforeBody = [] , optIncludeAfterBody = [] , optIncludeInHeader = [] + , optEol = Nothing } addMetadata :: (String, String) -> Pandoc -> Pandoc @@ -783,9 +787,9 @@ writeFnBinary :: MonadIO m => FilePath -> B.ByteString -> m () writeFnBinary "-" = liftIO . B.putStr writeFnBinary f = liftIO . B.writeFile (UTF8.encodePath f) -writerFn :: MonadIO m => FilePath -> String -> m () -writerFn "-" = liftIO . UTF8.putStr -writerFn f = liftIO . UTF8.writeFile f +writerFn :: MonadIO m => Newline -> FilePath -> String -> m () +writerFn eol "-" = liftIO . UTF8.putStrWith eol +writerFn eol f = liftIO . UTF8.writeFileWith eol f lookupHighlightStyle :: Maybe String -> IO (Maybe Style) lookupHighlightStyle Nothing = return Nothing @@ -958,6 +962,18 @@ options = "NUMBER") "" -- "Dpi (default 96)" + , Option "" ["eol"] + (ReqArg + (\arg opt -> + case toLower <$> arg of + "crlf" -> return opt { optEol = Just CRLF } + "lf" -> return opt { optEol = Just LF } + -- mac-syntax (cr) is not supported in ghc-base. + _ -> E.throwIO $ PandocOptionError + "--eol must be one of crlf (Windows), lf (Unix)") + "crlf|lf") + "" -- "EOL (default OS-dependent)" + , Option "" ["wrap"] (ReqArg (\arg opt -> diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs index e27a24e63..84043d4cb 100644 --- a/src/Text/Pandoc/UTF8.hs +++ b/src/Text/Pandoc/UTF8.hs @@ -28,11 +28,16 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA UTF-8 aware string IO functions that will work with GHC 6.10, 6.12, or 7. -} module Text.Pandoc.UTF8 ( readFile - , writeFile , getContents + , writeFileWith + , writeFile + , putStrWith , putStr + , putStrLnWith , putStrLn + , hPutStrWith , hPutStr + , hPutStrLnWith , hPutStrLn , hGetContents , toString @@ -61,23 +66,43 @@ readFile f = do h <- openFile (encodePath f) ReadMode hGetContents h -writeFile :: FilePath -> String -> IO () -writeFile f s = withFile (encodePath f) WriteMode $ \h -> hPutStr h s - getContents :: IO String getContents = hGetContents stdin +writeFileWith :: Newline -> FilePath -> String -> IO () +writeFileWith eol f s = + withFile (encodePath f) WriteMode $ \h -> hPutStrWith eol h s + +writeFile :: FilePath -> String -> IO () +writeFile = writeFileWith nativeNewline + +putStrWith :: Newline -> String -> IO () +putStrWith eol s = hPutStrWith eol stdout s + putStr :: String -> IO () -putStr s = hPutStr stdout s +putStr = putStrWith nativeNewline + +putStrLnWith :: Newline -> String -> IO () +putStrLnWith eol s = hPutStrLnWith eol stdout s putStrLn :: String -> IO () -putStrLn s = hPutStrLn stdout s +putStrLn = putStrLnWith nativeNewline + +hPutStrWith :: Newline -> Handle -> String -> IO () +hPutStrWith eol h s = + hSetNewlineMode h (NewlineMode eol eol) >> + hSetEncoding h utf8 >> IO.hPutStr h s hPutStr :: Handle -> String -> IO () -hPutStr h s = hSetEncoding h utf8 >> IO.hPutStr h s +hPutStr = hPutStrWith nativeNewline + +hPutStrLnWith :: Newline -> Handle -> String -> IO () +hPutStrLnWith eol h s = + hSetNewlineMode h (NewlineMode eol eol) >> + hSetEncoding h utf8 >> IO.hPutStrLn h s hPutStrLn :: Handle -> String -> IO () -hPutStrLn h s = hSetEncoding h utf8 >> IO.hPutStrLn h s +hPutStrLn = hPutStrLnWith nativeNewline hGetContents :: Handle -> IO String hGetContents = fmap toString . B.hGetContents -- cgit v1.2.3 From c691b975061e3674a80474968fab604cafe776af Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 10 Jun 2017 15:54:35 +0200 Subject: UTF8: export toText, toTextLazy. Define toString, toStringLazy in terms of them. --- src/Text/Pandoc/UTF8.hs | 44 +++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) (limited to 'src/Text/Pandoc/UTF8.hs') diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs index 84043d4cb..3f1b28e54 100644 --- a/src/Text/Pandoc/UTF8.hs +++ b/src/Text/Pandoc/UTF8.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} {- Copyright (C) 2010-2017 John MacFarlane @@ -41,8 +42,10 @@ module Text.Pandoc.UTF8 ( readFile , hPutStrLn , hGetContents , toString + , toText , fromString , toStringLazy + , toTextLazy , fromStringLazy , encodePath , decodeArg @@ -51,7 +54,7 @@ module Text.Pandoc.UTF8 ( readFile where import qualified Data.ByteString.Char8 as B -import qualified Data.ByteString.Lazy as BL +import qualified Data.ByteString.Lazy.Char8 as BL import qualified Data.Text as T import qualified Data.Text.Encoding as T import qualified Data.Text.Lazy as TL @@ -110,31 +113,38 @@ hGetContents = fmap toString . B.hGetContents -- >> hSetNewlineMode h universalNewlineMode -- >> IO.hGetContents h --- | Drop BOM (byte order marker) if present at beginning of string. --- Note that Data.Text converts the BOM to code point FEFF, zero-width --- no-break space, so if the string begins with this we strip it off. -dropBOM :: String -> String -dropBOM ('\xFEFF':xs) = xs -dropBOM xs = xs - -filterCRs :: String -> String -filterCRs ('\r':'\n':xs) = '\n': filterCRs xs -filterCRs ('\r':xs) = '\n' : filterCRs xs -filterCRs (x:xs) = x : filterCRs xs -filterCRs [] = [] +-- | Convert UTF8-encoded ByteString to Text, also +-- removing '\r' characters. +toText :: B.ByteString -> T.Text +toText = T.decodeUtf8 . filterCRs . dropBOM + where dropBOM bs = + if "\xEF\xBB\xBF" `B.isPrefixOf` bs + then B.drop 3 bs + else bs + filterCRs = B.filter (/='\r') -- | Convert UTF8-encoded ByteString to String, also -- removing '\r' characters. toString :: B.ByteString -> String -toString = filterCRs . dropBOM . T.unpack . T.decodeUtf8 +toString = T.unpack . toText -fromString :: String -> B.ByteString -fromString = T.encodeUtf8 . T.pack +-- | Convert UTF8-encoded ByteString to Text, also +-- removing '\r' characters. +toTextLazy :: BL.ByteString -> TL.Text +toTextLazy = TL.decodeUtf8 . filterCRs . dropBOM + where dropBOM bs = + if "\xEF\xBB\xBF" `BL.isPrefixOf` bs + then BL.drop 3 bs + else bs + filterCRs = BL.filter (/='\r') -- | Convert UTF8-encoded ByteString to String, also -- removing '\r' characters. toStringLazy :: BL.ByteString -> String -toStringLazy = filterCRs . dropBOM . TL.unpack . TL.decodeUtf8 +toStringLazy = TL.unpack . toTextLazy + +fromString :: String -> B.ByteString +fromString = T.encodeUtf8 . T.pack fromStringLazy :: String -> BL.ByteString fromStringLazy = TL.encodeUtf8 . TL.pack -- cgit v1.2.3 From d1e78d96b6ad9a4afe4b319f9c06668e0aa4ca1c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 10 Jun 2017 16:05:56 +0200 Subject: UTF8: export fromText, fromTextLazy. --- src/Text/Pandoc/UTF8.hs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'src/Text/Pandoc/UTF8.hs') diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs index 3f1b28e54..663f30d92 100644 --- a/src/Text/Pandoc/UTF8.hs +++ b/src/Text/Pandoc/UTF8.hs @@ -44,7 +44,9 @@ module Text.Pandoc.UTF8 ( readFile , toString , toText , fromString + , fromText , toStringLazy + , fromTextLazy , toTextLazy , fromStringLazy , encodePath @@ -143,11 +145,17 @@ toTextLazy = TL.decodeUtf8 . filterCRs . dropBOM toStringLazy :: BL.ByteString -> String toStringLazy = TL.unpack . toTextLazy +fromText :: T.Text -> B.ByteString +fromText = T.encodeUtf8 + +fromTextLazy :: TL.Text -> BL.ByteString +fromTextLazy = TL.encodeUtf8 + fromString :: String -> B.ByteString -fromString = T.encodeUtf8 . T.pack +fromString = fromText . T.pack fromStringLazy :: String -> BL.ByteString -fromStringLazy = TL.encodeUtf8 . TL.pack +fromStringLazy = fromTextLazy . TL.pack encodePath :: FilePath -> FilePath encodePath = id -- cgit v1.2.3