diff options
author | John MacFarlane <jgm@berkeley.edu> | 2021-02-22 14:01:10 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2021-02-22 14:01:10 -0800 |
commit | 5a73c5d3f8136c7fba7429c3ae3a8ae31c58030b (patch) | |
tree | 503dd3dede7ab5f79f0653f27090b4f832cbb89f /src/Text | |
parent | bafccd5aa2dc977e5e49b67c587e1507dd73417c (diff) | |
download | pandoc-5a73c5d3f8136c7fba7429c3ae3a8ae31c58030b.tar.gz |
When downloading content from URL arguments, be sensitive to...
the character encoding. We can properly handle UTF-8 and
latin1 (ISO-8859-1); for others we raise an error.
See #5600.
Diffstat (limited to 'src/Text')
-rw-r--r-- | src/Text/Pandoc/App.hs | 10 |
1 files changed, 9 insertions, 1 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 63996828e..59af029b5 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -28,6 +28,7 @@ import Control.Monad ( (>=>), when ) import Control.Monad.Trans ( MonadIO(..) ) import Control.Monad.Except (throwError) import qualified Data.ByteString as BS +import qualified Data.ByteString.Char8 as B8 import qualified Data.ByteString.Lazy as BL import Data.Char (toLower) import Data.Maybe (fromMaybe, isJust, isNothing) @@ -45,6 +46,7 @@ import System.FilePath ( takeBaseName, takeExtension ) import System.IO (nativeNewline, stdout) import qualified System.IO as IO (Newline (..)) import Text.Pandoc +import Text.Pandoc.MIME (getCharset) import Text.Pandoc.App.FormatHeuristics (formatFromFilePaths) import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts, IpynbOutput (..) ) @@ -344,7 +346,13 @@ readSource src = case parseURI src of _ -> PandocAppError (tshow e)) readURI :: FilePath -> PandocIO Text -readURI src = UTF8.toText . fst <$> openURL (T.pack src) +readURI src = do + (bs, mt) <- openURL (T.pack src) + case mt >>= getCharset of + Just "UTF-8" -> return $ UTF8.toText bs + Just "ISO-8859-1" -> return $ T.pack $ B8.unpack bs + Just charset -> throwError $ PandocUnsupportedCharsetError charset + Nothing -> return $ UTF8.toText bs readFile' :: MonadIO m => FilePath -> m BL.ByteString readFile' "-" = liftIO BL.getContents |