From 3c4a58bad03ef56ae9c82b7e7a6ae027514e2bd6 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 10 Feb 2021 23:01:08 -0800 Subject: T.P.Class: Add getTimestamp [API change]. This attempts to read the SOURCE_DATE_EPOCH environment variable and parse a UTC time from it (treating it as a unix date stamp, see https://reproducible-builds.org/specs/source-date-epoch/). If the variable is not set or can't be parsed as a unix date stamp, then the function returns the current date. --- src/Text/Pandoc/Class/PandocMonad.hs | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'src/Text/Pandoc/Class') diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index 374da161b..86c8de79e 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -51,6 +51,7 @@ module Text.Pandoc.Class.PandocMonad , setTranslations , translateTerm , makeCanonical + , getTimestamp ) where import Codec.Archive.Zip @@ -59,7 +60,8 @@ import Control.Monad.Except (MonadError (catchError, throwError), import Data.Digest.Pure.SHA (sha1, showDigest) import Data.Maybe (fromMaybe) import Data.Time (UTCTime) -import Data.Time.Clock.POSIX (POSIXTime, utcTimeToPOSIXSeconds) +import Data.Time.Clock.POSIX (POSIXTime, utcTimeToPOSIXSeconds, + posixSecondsToUTCTime) import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime) import Network.URI ( escapeURIString, nonStrictRelativeTo, unEscapeString, parseURIReference, isAllowedInURI, @@ -74,7 +76,7 @@ import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType) import Text.Pandoc.MediaBag (MediaBag, lookupMedia) -import Text.Pandoc.Shared (uriPathToPath) +import Text.Pandoc.Shared (uriPathToPath, safeRead) import Text.Pandoc.Translations (Term(..), Translations, lookupTerm, readTranslations) import Text.Pandoc.Walk (walkM) @@ -175,6 +177,21 @@ report msg = do when (level <= verbosity) $ logOutput msg modifyCommonState $ \st -> st{ stLog = msg : stLog st } +-- | Get the time from the @SOURCE_DATE_EPOCH@ +-- environment variable. The variable should contain a +-- unix time stamp, the number of seconds since midnight Jan 01 +-- 1970 UTC. If the variable is not set or cannot be +-- parsed as a unix time stamp, the current time is returned. +-- This function is designed to make possible reproducible +-- builds in formats that include a creation timestamp. +getTimestamp :: PandocMonad m => m UTCTime +getTimestamp = do + mbSourceDateEpoch <- lookupEnv "SOURCE_DATE_EPOCH" + case mbSourceDateEpoch >>= safeRead of + Just (epoch :: Integer) -> + return $ posixSecondsToUTCTime $ fromIntegral epoch + Nothing -> getCurrentTime + -- | Determine whether tracing is enabled. This affects -- the behavior of 'trace'. If tracing is not enabled, -- 'trace' does nothing. -- cgit v1.2.3 From 80fde18fb1d983b938476ed5b3771ed5d6158d44 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 22 Feb 2021 11:30:07 -0800 Subject: Text.Pandoc.UTF8: change IO functions to return Text, not String. [API change] This affects `readFile`, `getContents`, `writeFileWith`, `writeFile`, `putStrWith`, `putStr`, `putStrLnWith`, `putStrLn`. `hPutStrWith`, `hPutStr`, `hPutStrLnWith`, `hPutStrLn`, `hGetContents`. This avoids the need to uselessly create a linked list of characters when emiting output. --- src/Text/Pandoc/App.hs | 10 ++++---- src/Text/Pandoc/App/CommandLineOptions.hs | 38 +++++++++++++++------------- src/Text/Pandoc/App/OutputSettings.hs | 4 +-- src/Text/Pandoc/Class/IO.hs | 6 ++--- src/Text/Pandoc/Error.hs | 2 +- src/Text/Pandoc/PDF.hs | 25 +++++++++--------- src/Text/Pandoc/Parsing.hs | 2 +- src/Text/Pandoc/UTF8.hs | 42 +++++++++++++++---------------- test/Tests/Command.hs | 2 +- test/Tests/Old.hs | 5 ++-- test/Tests/Readers/LaTeX.hs | 2 +- 11 files changed, 70 insertions(+), 68 deletions(-) (limited to 'src/Text/Pandoc/Class') diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 6a071ad5a..63996828e 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -73,8 +73,9 @@ convertWithOpts opts = do let verbosity = optVerbosity opts when (optDumpArgs opts) $ - do UTF8.hPutStrLn stdout outputFile - mapM_ (UTF8.hPutStrLn stdout) (fromMaybe ["-"] $ optInputFiles opts) + do UTF8.hPutStrLn stdout (T.pack outputFile) + mapM_ (UTF8.hPutStrLn stdout . T.pack) + (fromMaybe ["-"] $ optInputFiles opts) exitSuccess let sources = case optInputFiles opts of @@ -354,6 +355,5 @@ writeFnBinary "-" = liftIO . BL.putStr writeFnBinary f = liftIO . BL.writeFile (UTF8.encodePath f) writerFn :: MonadIO m => IO.Newline -> FilePath -> Text -> m () --- TODO this implementation isn't maximally efficient: -writerFn eol "-" = liftIO . UTF8.putStrWith eol . T.unpack -writerFn eol f = liftIO . UTF8.writeFileWith eol f . T.unpack +writerFn eol "-" = liftIO . UTF8.putStrWith eol +writerFn eol f = liftIO . UTF8.writeFileWith eol f diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs index a4c510d97..0a8193f6c 100644 --- a/src/Text/Pandoc/App/CommandLineOptions.hs +++ b/src/Text/Pandoc/App/CommandLineOptions.hs @@ -812,10 +812,10 @@ options = map (\c -> ['-',c]) shorts ++ map ("--" ++) longs let allopts = unwords (concatMap optnames options) - UTF8.hPutStrLn stdout $ printf tpl allopts - (unwords readersNames) - (unwords writersNames) - (unwords $ map (T.unpack . fst) highlightingStyles) + UTF8.hPutStrLn stdout $ T.pack $ printf tpl allopts + (T.unpack $ T.unwords readersNames) + (T.unpack $ T.unwords writersNames) + (T.unpack $ T.unwords $ map fst highlightingStyles) (unwords datafiles) exitSuccess )) "" -- "Print bash completion script" @@ -854,7 +854,7 @@ options = else if extensionEnabled x allExts then '-' else ' ') : drop 4 (show x) - mapM_ (UTF8.hPutStrLn stdout . showExt) + mapM_ (UTF8.hPutStrLn stdout . T.pack . showExt) [ex | ex <- extList, extensionEnabled ex allExts] exitSuccess ) "FORMAT") @@ -868,14 +868,14 @@ options = , sShortname s `notElem` [T.pack "Alert", T.pack "Alert_indent"] ] - mapM_ (UTF8.hPutStrLn stdout) (sort langs) + mapM_ (UTF8.hPutStrLn stdout . T.pack) (sort langs) exitSuccess )) "" , Option "" ["list-highlight-styles"] (NoArg (\_ -> do - mapM_ (UTF8.hPutStrLn stdout . T.unpack . fst) highlightingStyles + mapM_ (UTF8.hPutStrLn stdout . fst) highlightingStyles exitSuccess )) "" @@ -893,7 +893,7 @@ options = | T.null t -> -- e.g. for docx, odt, json: E.throwIO $ PandocCouldNotFindDataFileError $ T.pack ("templates/default." ++ arg) - | otherwise -> write . T.unpack $ t + | otherwise -> write t Left e -> E.throwIO e exitSuccess) "FORMAT") @@ -940,11 +940,13 @@ options = (\_ -> do prg <- getProgName defaultDatadirs <- defaultUserDataDirs - UTF8.hPutStrLn stdout (prg ++ " " ++ T.unpack pandocVersion ++ - compileInfo ++ - "\nUser data directory: " ++ - intercalate " or " defaultDatadirs ++ - ('\n':copyrightMessage)) + UTF8.hPutStrLn stdout + $ T.pack + $ prg ++ " " ++ T.unpack pandocVersion ++ + compileInfo ++ + "\nUser data directory: " ++ + intercalate " or " defaultDatadirs ++ + ('\n':copyrightMessage) exitSuccess )) "" -- "Print version" @@ -952,7 +954,7 @@ options = (NoArg (\_ -> do prg <- getProgName - UTF8.hPutStr stdout (usageMessage prg options) + UTF8.hPutStr stdout (T.pack $ usageMessage prg options) exitSuccess )) "" -- "Show help" ] @@ -1013,12 +1015,12 @@ handleUnrecognizedOption "-R" = handleUnrecognizedOption "--parse-raw" handleUnrecognizedOption x = (("Unknown option " ++ x ++ ".") :) -readersNames :: [String] -readersNames = sort (map (T.unpack . fst) (readers :: [(Text, Reader PandocIO)])) +readersNames :: [Text] +readersNames = sort (map fst (readers :: [(Text, Reader PandocIO)])) -writersNames :: [String] +writersNames :: [Text] writersNames = sort - ("pdf" : map (T.unpack . fst) (writers :: [(Text, Writer PandocIO)])) + ("pdf" : map fst (writers :: [(Text, Writer PandocIO)])) splitField :: String -> (String, String) splitField = second (tailDef "true") . break (`elemText` ":=") diff --git a/src/Text/Pandoc/App/OutputSettings.hs b/src/Text/Pandoc/App/OutputSettings.hs index 53c7d82ef..3864ab188 100644 --- a/src/Text/Pandoc/App/OutputSettings.hs +++ b/src/Text/Pandoc/App/OutputSettings.hs @@ -59,8 +59,8 @@ optToOutputSettings opts = do let outputFile = fromMaybe "-" (optOutputFile opts) when (optDumpArgs opts) . liftIO $ do - UTF8.hPutStrLn stdout outputFile - mapM_ (UTF8.hPutStrLn stdout) (fromMaybe [] $ optInputFiles opts) + UTF8.hPutStrLn stdout (T.pack outputFile) + mapM_ (UTF8.hPutStrLn stdout . T.pack) (fromMaybe [] $ optInputFiles opts) exitSuccess epubMetadata <- traverse readUtf8File $ optEpubMetadata opts diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs index eecda5711..bb4e2b732 100644 --- a/src/Text/Pandoc/Class/IO.hs +++ b/src/Text/Pandoc/Class/IO.hs @@ -183,7 +183,7 @@ getModificationTime = liftIOError System.Directory.getModificationTime logOutput :: (PandocMonad m, MonadIO m) => LogMessage -> m () logOutput msg = liftIO $ do UTF8.hPutStr stderr $ - "[" ++ show (messageVerbosity msg) ++ "] " + "[" <> T.pack (show (messageVerbosity msg)) <> "] " alertIndent $ T.lines $ showLogMessage msg -- | Prints the list of lines to @stderr@, indenting every but the first @@ -191,10 +191,10 @@ logOutput msg = liftIO $ do alertIndent :: [Text] -> IO () alertIndent [] = return () alertIndent (l:ls) = do - UTF8.hPutStrLn stderr $ unpack l + UTF8.hPutStrLn stderr l mapM_ go ls where go l' = do UTF8.hPutStr stderr " " - UTF8.hPutStrLn stderr $ unpack l' + UTF8.hPutStrLn stderr l' -- | Extract media from the mediabag into a directory. extractMedia :: (PandocMonad m, MonadIO m) => FilePath -> Pandoc -> m Pandoc diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index 94c013cdb..0fdb7bfe5 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -191,6 +191,6 @@ handleError (Left e) = err :: Int -> Text -> IO a err exitCode msg = do - UTF8.hPutStrLn stderr (T.unpack msg) + UTF8.hPutStrLn stderr msg exitWith $ ExitFailure exitCode return undefined diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index 3f9dd8dad..6f462aad5 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -270,7 +270,7 @@ missingCharacterWarnings verbosity log' = do | isAscii c = T.singleton c | otherwise = T.pack $ c : " (U+" ++ printf "%04X" (ord c) ++ ")" let addCodePoint = T.concatMap toCodePoint - let warnings = [ addCodePoint (T.pack $ utf8ToString (BC.drop 19 l)) + let warnings = [ addCodePoint (utf8ToText (BC.drop 19 l)) | l <- ls , isMissingCharacterWarning l ] @@ -314,7 +314,7 @@ runTectonic verbosity program args' tmpDir' source = do env <- liftIO getEnvironment when (verbosity >= INFO) $ liftIO $ showVerboseInfo (Just tmpDir) program programArgs env - (utf8ToString sourceBL) + (utf8ToText sourceBL) (exit, out) <- liftIO $ E.catch (pipeProcess (Just env) program programArgs sourceBL) (handlePDFProgramNotFound program) @@ -385,7 +385,7 @@ runTeXProgram verbosity program args numRuns tmpDir' source = do (pipeProcess (Just env'') program programArgs BL.empty) (handlePDFProgramNotFound program) when (verbosity >= INFO) $ liftIO $ do - UTF8.hPutStrLn stderr $ "[makePDF] Run #" ++ show runNumber + UTF8.hPutStrLn stderr $ "[makePDF] Run #" <> tshow runNumber BL.hPutStr stderr out UTF8.hPutStr stderr "\n" if runNumber < numRuns @@ -405,7 +405,7 @@ generic2pdf :: Verbosity generic2pdf verbosity program args source = do env' <- getEnvironment when (verbosity >= INFO) $ - showVerboseInfo Nothing program args env' (T.unpack source) + showVerboseInfo Nothing program args env' source (exit, out) <- E.catch (pipeProcess (Just env') program args (BL.fromStrict $ UTF8.fromText source)) @@ -494,19 +494,20 @@ showVerboseInfo :: Maybe FilePath -> String -> [String] -> [(String, String)] - -> String + -> Text -> IO () showVerboseInfo mbTmpDir program programArgs env source = do case mbTmpDir of Just tmpDir -> do UTF8.hPutStrLn stderr "[makePDF] temp dir:" - UTF8.hPutStrLn stderr tmpDir + UTF8.hPutStrLn stderr (T.pack tmpDir) Nothing -> return () UTF8.hPutStrLn stderr "[makePDF] Command line:" - UTF8.hPutStrLn stderr $ program ++ " " ++ unwords (map show programArgs) + UTF8.hPutStrLn stderr $ + T.pack program <> " " <> T.pack (unwords (map show programArgs)) UTF8.hPutStr stderr "\n" UTF8.hPutStrLn stderr "[makePDF] Environment:" - mapM_ (UTF8.hPutStrLn stderr . show) env + mapM_ (UTF8.hPutStrLn stderr . tshow) env UTF8.hPutStr stderr "\n" UTF8.hPutStrLn stderr "[makePDF] Source:" UTF8.hPutStrLn stderr source @@ -517,8 +518,8 @@ handlePDFProgramNotFound program e E.throwIO $ PandocPDFProgramNotFoundError $ T.pack program | otherwise = E.throwIO e -utf8ToString :: ByteString -> String -utf8ToString lbs = +utf8ToText :: ByteString -> Text +utf8ToText lbs = case decodeUtf8' lbs of - Left _ -> BC.unpack lbs -- if decoding fails, treat as latin1 - Right t -> TL.unpack t + Left _ -> T.pack $ BC.unpack lbs -- if decoding fails, treat as latin1 + Right t -> TL.toStrict t diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 4c4dd531d..8d3799c3e 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -1148,7 +1148,7 @@ testStringWith :: Show a => ParserT Text ParserState Identity a -> Text -> IO () -testStringWith parser str = UTF8.putStrLn $ show $ +testStringWith parser str = UTF8.putStrLn $ tshow $ readWith parser defaultParserState str -- | Parsing options. diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs index 567f5abe5..4d5921faf 100644 --- a/src/Text/Pandoc/UTF8.hs +++ b/src/Text/Pandoc/UTF8.hs @@ -39,67 +39,65 @@ where import qualified Data.ByteString.Char8 as B import qualified Data.ByteString.Lazy.Char8 as BL +import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.IO as TIO import qualified Data.Text.Encoding as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Encoding as TL import Prelude hiding (getContents, putStr, putStrLn, readFile, writeFile) import System.IO hiding (getContents, hGetContents, hPutStr, hPutStrLn, putStr, putStrLn, readFile, writeFile) -import qualified System.IO as IO -readFile :: FilePath -> IO String +readFile :: FilePath -> IO Text readFile f = do h <- openFile (encodePath f) ReadMode hGetContents h -getContents :: IO String +getContents :: IO Text getContents = hGetContents stdin -writeFileWith :: Newline -> FilePath -> String -> IO () +writeFileWith :: Newline -> FilePath -> Text -> IO () writeFileWith eol f s = withFile (encodePath f) WriteMode $ \h -> hPutStrWith eol h s -writeFile :: FilePath -> String -> IO () +writeFile :: FilePath -> Text -> IO () writeFile = writeFileWith nativeNewline -putStrWith :: Newline -> String -> IO () +putStrWith :: Newline -> Text -> IO () putStrWith eol s = hPutStrWith eol stdout s -putStr :: String -> IO () +putStr :: Text -> IO () putStr = putStrWith nativeNewline -putStrLnWith :: Newline -> String -> IO () +putStrLnWith :: Newline -> Text -> IO () putStrLnWith eol s = hPutStrLnWith eol stdout s -putStrLn :: String -> IO () +putStrLn :: Text -> IO () putStrLn = putStrLnWith nativeNewline -hPutStrWith :: Newline -> Handle -> String -> IO () +hPutStrWith :: Newline -> Handle -> Text -> IO () hPutStrWith eol h s = hSetNewlineMode h (NewlineMode eol eol) >> - hSetEncoding h utf8 >> IO.hPutStr h s + hSetEncoding h utf8 >> TIO.hPutStr h s -hPutStr :: Handle -> String -> IO () +hPutStr :: Handle -> Text -> IO () hPutStr = hPutStrWith nativeNewline -hPutStrLnWith :: Newline -> Handle -> String -> IO () +hPutStrLnWith :: Newline -> Handle -> Text -> IO () hPutStrLnWith eol h s = hSetNewlineMode h (NewlineMode eol eol) >> - hSetEncoding h utf8 >> IO.hPutStrLn h s + hSetEncoding h utf8 >> TIO.hPutStrLn h s -hPutStrLn :: Handle -> String -> IO () +hPutStrLn :: Handle -> Text -> IO () hPutStrLn = hPutStrLnWith nativeNewline -hGetContents :: Handle -> IO String -hGetContents = fmap toString . B.hGetContents --- hGetContents h = hSetEncoding h utf8_bom --- >> hSetNewlineMode h universalNewlineMode --- >> IO.hGetContents h +hGetContents :: Handle -> IO Text +hGetContents = fmap toText . B.hGetContents -- | Convert UTF8-encoded ByteString to Text, also -- removing '\r' characters. -toText :: B.ByteString -> T.Text +toText :: B.ByteString -> Text toText = T.decodeUtf8 . filterCRs . dropBOM where dropBOM bs = if "\xEF\xBB\xBF" `B.isPrefixOf` bs @@ -127,7 +125,7 @@ toTextLazy = TL.decodeUtf8 . filterCRs . dropBOM toStringLazy :: BL.ByteString -> String toStringLazy = TL.unpack . toTextLazy -fromText :: T.Text -> B.ByteString +fromText :: Text -> B.ByteString fromText = T.encodeUtf8 fromTextLazy :: TL.Text -> BL.ByteString diff --git a/test/Tests/Command.hs b/test/Tests/Command.hs index 07d825f73..59b04eac1 100644 --- a/test/Tests/Command.hs +++ b/test/Tests/Command.hs @@ -130,7 +130,7 @@ runCommandTest testExePath fp num code = let cmdline = "% " <> cmd let x = cmdline <> "\n" <> input <> "^D\n" <> norm let y = cmdline <> "\n" <> input <> "^D\n" <> newnorm - let updated = T.unpack $ T.replace (T.pack x) (T.pack y) (T.pack raw) + let updated = T.replace (T.pack x) (T.pack y) raw UTF8.writeFile fp' updated extractCommandTest :: FilePath -> FilePath -> TestTree diff --git a/test/Tests/Old.hs b/test/Tests/Old.hs index 17ece49fd..160086be2 100644 --- a/test/Tests/Old.hs +++ b/test/Tests/Old.hs @@ -22,6 +22,7 @@ import Test.Tasty (TestTree, testGroup) import Test.Tasty.Golden.Advanced (goldenTest) import Tests.Helpers hiding (test) import qualified Text.Pandoc.UTF8 as UTF8 +import qualified Data.Text as T tests :: FilePath -> [TestTree] tests pandocPath = @@ -231,7 +232,7 @@ tests pandocPath = -- makes sure file is fully closed after reading readFile' :: FilePath -> IO String readFile' f = do s <- UTF8.readFile f - return $! (length s `seq` s) + return $! (T.length s `seq` T.unpack s) lhsWriterTests :: FilePath -> String -> [TestTree] lhsWriterTests pandocPath format @@ -333,7 +334,7 @@ testWithNormalize normalizer pandocPath testname opts inp norm = $ UTF8.toStringLazy out -- filter \r so the tests will work on Windows machines else fail $ "Pandoc failed with error code " ++ show ec - updateGolden = UTF8.writeFile norm + updateGolden = UTF8.writeFile norm . T.pack options = ["--data-dir=../data","--quiet"] ++ [inp] ++ opts compareValues :: FilePath -> [String] -> String -> String -> IO (Maybe String) diff --git a/test/Tests/Readers/LaTeX.hs b/test/Tests/Readers/LaTeX.hs index 77104c853..8385b751e 100644 --- a/test/Tests/Readers/LaTeX.hs +++ b/test/Tests/Readers/LaTeX.hs @@ -54,7 +54,7 @@ tokUntokRt s = untokenize (tokenize "random" t) == t tests :: [TestTree] tests = [ testGroup "tokenization" [ testCase "tokenizer round trip on test case" $ do - orig <- T.pack <$> UTF8.readFile "../test/latex-reader.latex" + orig <- UTF8.readFile "../test/latex-reader.latex" let new = untokenize $ tokenize "../test/latex-reader.latex" orig assertEqual "untokenize . tokenize is identity" orig new -- cgit v1.2.3 From 24191a2a278c0dec30bacd66b78cbb8cc8d91324 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 15 Mar 2021 10:37:35 -0700 Subject: Use foldl' instead of foldl everywhere. --- src/Text/Pandoc/App/CommandLineOptions.hs | 4 ++-- src/Text/Pandoc/Citeproc/Locator.hs | 3 ++- src/Text/Pandoc/Class/PandocMonad.hs | 3 ++- src/Text/Pandoc/Extensions.hs | 3 ++- src/Text/Pandoc/Lua/Filter.hs | 3 ++- src/Text/Pandoc/Readers/Docx/Combine.hs | 4 ++-- src/Text/Pandoc/Readers/HTML/TagCategories.hs | 1 + src/Text/Pandoc/Readers/Markdown.hs | 6 +++--- src/Text/Pandoc/Readers/Odt/Arrows/State.hs | 4 ++-- src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs | 4 ++-- src/Text/Pandoc/Readers/Odt/StyleReader.hs | 4 ++-- src/Text/Pandoc/Readers/Textile.hs | 4 ++-- src/Text/Pandoc/Shared.hs | 4 ++-- src/Text/Pandoc/Writers/HTML.hs | 8 ++++---- src/Text/Pandoc/Writers/RST.hs | 4 ++-- src/Text/Pandoc/Writers/Texinfo.hs | 4 ++-- 16 files changed, 34 insertions(+), 29 deletions(-) (limited to 'src/Text/Pandoc/Class') diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs index b4483f756..a6df12715 100644 --- a/src/Text/Pandoc/App/CommandLineOptions.hs +++ b/src/Text/Pandoc/App/CommandLineOptions.hs @@ -31,7 +31,7 @@ import Data.Aeson.Encode.Pretty (encodePretty', Config(..), keyOrder, defConfig, Indent(..), NumberFormat(..)) import Data.Bifunctor (second) import Data.Char (toLower) -import Data.List (intercalate, sort) +import Data.List (intercalate, sort, foldl') #ifdef _WINDOWS #if MIN_VERSION_base(4,12,0) import Data.List (isPrefixOf) @@ -93,7 +93,7 @@ parseOptionsFromArgs options' defaults prg rawArgs = do ("Try " ++ prg ++ " --help for more information.") -- thread option data structure through all supplied option actions - opts <- foldl (>>=) (return defaults) actions + opts <- foldl' (>>=) (return defaults) actions let mbArgs = case args of [] -> Nothing xs -> Just xs diff --git a/src/Text/Pandoc/Citeproc/Locator.hs b/src/Text/Pandoc/Citeproc/Locator.hs index dba762c02..44416ca12 100644 --- a/src/Text/Pandoc/Citeproc/Locator.hs +++ b/src/Text/Pandoc/Citeproc/Locator.hs @@ -7,6 +7,7 @@ where import Citeproc.Types import Data.Text (Text) import qualified Data.Text as T +import Data.List (foldl') import Text.Parsec import Text.Pandoc.Definition import Text.Pandoc.Parsing (romanNumeral) @@ -139,7 +140,7 @@ pBalancedBraces braces p = try $ do where except = notFollowedBy pBraces >> p -- outer and inner - surround = foldl (\a (open, close) -> sur open close except <|> a) + surround = foldl' (\a (open, close) -> sur open close except <|> a) except braces diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index 86c8de79e..293a822a0 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -59,6 +59,7 @@ import Control.Monad.Except (MonadError (catchError, throwError), MonadTrans, lift, when) import Data.Digest.Pure.SHA (sha1, showDigest) import Data.Maybe (fromMaybe) +import Data.List (foldl') import Data.Time (UTCTime) import Data.Time.Clock.POSIX (POSIXTime, utcTimeToPOSIXSeconds, posixSecondsToUTCTime) @@ -612,7 +613,7 @@ checkExistence fn = do -- | Canonicalizes a file path by removing redundant @.@ and @..@. makeCanonical :: FilePath -> FilePath makeCanonical = Posix.joinPath . transformPathParts . splitDirectories - where transformPathParts = reverse . foldl go [] + where transformPathParts = reverse . foldl' go [] go as "." = as go (_:as) ".." = as go as x = x : as diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 3b96f9e04..266a09e3c 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -34,6 +34,7 @@ module Text.Pandoc.Extensions ( Extension(..) where import Data.Bits (clearBit, setBit, testBit, (.|.)) import Data.Data (Data) +import Data.List (foldl') import qualified Data.Text as T import Data.Typeable (Typeable) import GHC.Generics (Generic) @@ -593,7 +594,7 @@ parseFormatSpec :: T.Text parseFormatSpec = parse formatSpec "" where formatSpec = do name <- formatName - (extsToEnable, extsToDisable) <- foldl (flip ($)) ([],[]) <$> + (extsToEnable, extsToDisable) <- foldl' (flip ($)) ([],[]) <$> many extMod return (T.pack name, reverse extsToEnable, reverse extsToDisable) formatName = many1 $ noneOf "-+" diff --git a/src/Text/Pandoc/Lua/Filter.hs b/src/Text/Pandoc/Lua/Filter.hs index bffe01a34..90967f295 100644 --- a/src/Text/Pandoc/Lua/Filter.hs +++ b/src/Text/Pandoc/Lua/Filter.hs @@ -22,6 +22,7 @@ import Control.Monad.Catch (finally, try) import Data.Data (Data, DataType, dataTypeConstrs, dataTypeName, dataTypeOf, showConstr, toConstr, tyconUQname) import Data.Foldable (foldrM) +import Data.List (foldl') import Data.Map (Map) import Data.Maybe (fromMaybe) import Foreign.Lua (Lua, Peekable, Pushable, StackIndex) @@ -204,7 +205,7 @@ walkMeta lf (Pandoc m bs) = do walkPandoc :: LuaFilter -> Pandoc -> Lua Pandoc walkPandoc (LuaFilter fnMap) = - case foldl mplus Nothing (map (`Map.lookup` fnMap) pandocFilterNames) of + case foldl' mplus Nothing (map (`Map.lookup` fnMap) pandocFilterNames) of Just fn -> \x -> runFilterFunction fn x *> singleElement x Nothing -> return diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs index bcf26c4a3..7c6d01769 100644 --- a/src/Text/Pandoc/Readers/Docx/Combine.hs +++ b/src/Text/Pandoc/Readers/Docx/Combine.hs @@ -182,7 +182,7 @@ isAttrModifier _ = False smushInlines :: [Inlines] -> Inlines smushInlines xs = combineInlines xs' mempty - where xs' = foldl combineInlines mempty xs + where xs' = foldl' combineInlines mempty xs smushBlocks :: [Blocks] -> Blocks -smushBlocks xs = foldl combineBlocks mempty xs +smushBlocks xs = foldl' combineBlocks mempty xs diff --git a/src/Text/Pandoc/Readers/HTML/TagCategories.hs b/src/Text/Pandoc/Readers/HTML/TagCategories.hs index b7bd40fee..67aba1cb1 100644 --- a/src/Text/Pandoc/Readers/HTML/TagCategories.hs +++ b/src/Text/Pandoc/Readers/HTML/TagCategories.hs @@ -23,6 +23,7 @@ where import Data.Set (Set, fromList, unions) import Data.Text (Text) +import Data.List (foldl') eitherBlockOrInline :: Set Text eitherBlockOrInline = fromList diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 34edbcc17..dc94fc2d6 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -21,7 +21,7 @@ module Text.Pandoc.Readers.Markdown ( import Control.Monad import Control.Monad.Except (throwError) import Data.Char (isAlphaNum, isPunctuation, isSpace) -import Data.List (transpose, elemIndex, sortOn) +import Data.List (transpose, elemIndex, sortOn, foldl') import qualified Data.Map as M import Data.Maybe import qualified Data.Set as Set @@ -357,7 +357,7 @@ referenceKey = try $ do addKvs <- option [] $ guardEnabled Ext_mmd_link_attributes >> many (try $ spnl >> keyValAttr) blanklines - let attr' = extractIdClass $ foldl (\x f -> f x) attr addKvs + let attr' = extractIdClass $ foldl' (\x f -> f x) attr addKvs target = (escapeURI $ trimr src, tit) st <- getState let oldkeys = stateKeys st @@ -613,7 +613,7 @@ attributes = try $ do spnl attrs <- many (attribute <* spnl) char '}' - return $ foldl (\x f -> f x) nullAttr attrs + return $ foldl' (\x f -> f x) nullAttr attrs attribute :: PandocMonad m => MarkdownParser m (Attr -> Attr) attribute = identifierAttr <|> classAttr <|> keyValAttr <|> specialAttr diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs index 93c6b5e79..96515bf56 100644 --- a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs +++ b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs @@ -22,7 +22,7 @@ module Text.Pandoc.Readers.Odt.Arrows.State where import Control.Arrow import qualified Control.Category as Cat import Control.Monad - +import Data.List (foldl') import Text.Pandoc.Readers.Odt.Arrows.Utils import Text.Pandoc.Readers.Odt.Generic.Fallible @@ -122,7 +122,7 @@ iterateS a = ArrowState $ \(s,f) -> foldr a' (s,mzero) f iterateSL :: (Foldable f, MonadPlus m) => ArrowState s x y -> ArrowState s (f x) (m y) -iterateSL a = ArrowState $ \(s,f) -> foldl a' (s,mzero) f +iterateSL a = ArrowState $ \(s,f) -> foldl' a' (s,mzero) f where a' (s',m) x = second (mplus m.return) $ runArrowState a (s',x) diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs index 0d921e23b..341903046 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs @@ -64,12 +64,12 @@ import qualified Data.Map as M import Data.Text (Text) import Data.Default import Data.Maybe +import Data.List (foldl') import qualified Text.Pandoc.XML.Light as XML import Text.Pandoc.Readers.Odt.Arrows.State import Text.Pandoc.Readers.Odt.Arrows.Utils - import Text.Pandoc.Readers.Odt.Generic.Namespaces import Text.Pandoc.Readers.Odt.Generic.Utils import Text.Pandoc.Readers.Odt.Generic.Fallible @@ -293,7 +293,7 @@ readNSattributes = fromState $ \state -> maybe (state, failEmpty ) => XMLConverterState nsID extraState -> Maybe (XMLConverterState nsID extraState) extractNSAttrs startState - = foldl (\state d -> state >>= addNS d) + = foldl' (\state d -> state >>= addNS d) (Just startState) nsAttribs where nsAttribs = mapMaybe readNSattr (XML.elAttribs element) diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs index 5e10f896c..b722aa07d 100644 --- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs +++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs @@ -44,7 +44,7 @@ import Control.Arrow import Data.Default import qualified Data.Foldable as F -import Data.List (unfoldr) +import Data.List (unfoldr, foldl') import qualified Data.Map as M import Data.Maybe import Data.Text (Text) @@ -120,7 +120,7 @@ fontPitchReader = executeInSub NsOffice "font-face-decls" ( &&& lookupDefaultingAttr NsStyle "font-pitch" )) - >>?^ ( M.fromList . foldl accumLegalPitches [] ) + >>?^ ( M.fromList . foldl' accumLegalPitches [] ) ) `ifFailedDo` returnV (Right M.empty) where accumLegalPitches ls (Nothing,_) = ls accumLegalPitches ls (Just n,p) = (n,p):ls diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 860da2dc3..99238c7f0 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -38,7 +38,7 @@ module Text.Pandoc.Readers.Textile ( readTextile) where import Control.Monad (guard, liftM) import Control.Monad.Except (throwError) import Data.Char (digitToInt, isUpper) -import Data.List (intersperse, transpose) +import Data.List (intersperse, transpose, foldl') import Data.Text (Text) import qualified Data.Text as T import Text.HTML.TagSoup (Tag (..), fromAttrib) @@ -627,7 +627,7 @@ code2 = do -- | Html / CSS attributes attributes :: PandocMonad m => ParserT Text ParserState m Attr -attributes = foldl (flip ($)) ("",[],[]) <$> +attributes = foldl' (flip ($)) ("",[],[]) <$> try (do special <- option id specialAttribute attrs <- many attribute return (special : attrs)) diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index d11ad13f5..0ce9396b3 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -109,7 +109,7 @@ import qualified Data.Bifunctor as Bifunctor import Data.Char (isAlpha, isLower, isSpace, isUpper, toLower, isAlphaNum, generalCategory, GeneralCategory(NonSpacingMark, SpacingCombiningMark, EnclosingMark, ConnectorPunctuation)) -import Data.List (find, intercalate, intersperse, sortOn) +import Data.List (find, intercalate, intersperse, sortOn, foldl') import qualified Data.Map as M import Data.Maybe (mapMaybe, fromMaybe) import Data.Monoid (Any (..)) @@ -840,7 +840,7 @@ mapLeft = Bifunctor.first -- > collapseFilePath "parent/foo/.." == "parent" -- > collapseFilePath "/parent/foo/../../bar" == "/bar" collapseFilePath :: FilePath -> FilePath -collapseFilePath = Posix.joinPath . reverse . foldl go [] . splitDirectories +collapseFilePath = Posix.joinPath . reverse . foldl' go [] . splitDirectories where go rs "." = rs go r@(p:rs) ".." = case p of diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 2f33cd467..332de1545 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -30,7 +30,7 @@ module Text.Pandoc.Writers.HTML ( ) where import Control.Monad.State.Strict import Data.Char (ord) -import Data.List (intercalate, intersperse, partition, delete, (\\)) +import Data.List (intercalate, intersperse, partition, delete, (\\), foldl') import Data.List.NonEmpty (NonEmpty((:|))) import Data.Maybe (fromMaybe, isJust, isNothing) import qualified Data.Set as Set @@ -544,7 +544,7 @@ tagWithAttributes opts html5 selfClosing tagname attr = addAttrs :: PandocMonad m => WriterOptions -> Attr -> Html -> StateT WriterState m Html -addAttrs opts attr h = foldl (!) h <$> attrsToHtml opts attr +addAttrs opts attr h = foldl' (!) h <$> attrsToHtml opts attr toAttrs :: PandocMonad m => [(Text, Text)] -> StateT WriterState m [Attribute] @@ -926,7 +926,7 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do numstyle'] else []) l <- ordList opts contents - return $ foldl (!) l attribs + return $ foldl' (!) l attribs blockToHtml opts (DefinitionList lst) = do contents <- mapM (\(term, defs) -> do term' <- liftM H.dt $ inlineListToHtml opts term @@ -1407,7 +1407,7 @@ inlineToHtml opts inline = do Just "audio" -> mediaTag H5.audio "Audio" Just _ -> (H5.embed, []) _ -> imageTag - return $ foldl (!) tag $ attributes ++ specAttrs + return $ foldl' (!) tag $ attributes ++ specAttrs -- note: null title included, as in Markdown.pl (Note contents) -> do notes <- gets stNotes diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index d01e13db4..54d042332 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -16,7 +16,7 @@ reStructuredText: module Text.Pandoc.Writers.RST ( writeRST, flatten ) where import Control.Monad.State.Strict import Data.Char (isSpace) -import Data.List (transpose, intersperse) +import Data.List (transpose, intersperse, foldl') import Data.Maybe (fromMaybe) import qualified Data.Text as T import Data.Text (Text) @@ -509,7 +509,7 @@ flatten outer | null contents = [outer] | otherwise = combineAll contents where contents = dropInlineParent outer - combineAll = foldl combine [] + combineAll = foldl' combine [] combine :: [Inline] -> Inline -> [Inline] combine f i = diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index 53da70f84..9d695563f 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -15,7 +15,7 @@ module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where import Control.Monad.Except (throwError) import Control.Monad.State.Strict import Data.Char (chr, ord) -import Data.List (maximumBy, transpose) +import Data.List (maximumBy, transpose, foldl') import Data.Ord (comparing) import qualified Data.Set as Set import Data.Text (Text) @@ -271,7 +271,7 @@ tableAnyRowToTexinfo :: PandocMonad m -> [[Block]] -> TI m (Doc Text) tableAnyRowToTexinfo itemtype aligns cols = - (literal itemtype $$) . foldl (\row item -> row $$ + (literal itemtype $$) . foldl' (\row item -> row $$ (if isEmpty row then empty else text " @tab ") <> item) empty <$> zipWithM alignedBlock aligns cols alignedBlock :: PandocMonad m -- cgit v1.2.3 From aecbf8156eb7c36c4b41de27797e262c23728db5 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 11 Apr 2021 21:28:48 -0700 Subject: Remove Text.Pandoc.BCP47 module. [API change] Use Lang from UnicodeCollation.Lang instead. This is a richer implementation of BCP 47. --- pandoc.cabal | 1 - src/Text/Pandoc/App.hs | 6 +- src/Text/Pandoc/BCP47.hs | 99 ------------- src/Text/Pandoc/Citeproc.hs | 10 +- src/Text/Pandoc/Citeproc/BibTeX.hs | 20 +-- src/Text/Pandoc/Citeproc/Data.hs | 12 +- src/Text/Pandoc/Class/CommonState.hs | 2 +- src/Text/Pandoc/Class/PandocMonad.hs | 4 +- src/Text/Pandoc/Readers/BibTeX.hs | 9 +- src/Text/Pandoc/Readers/LaTeX.hs | 2 +- src/Text/Pandoc/Readers/LaTeX/Lang.hs | 241 ++++++++++++++++---------------- src/Text/Pandoc/Writers/BibTeX.hs | 2 +- src/Text/Pandoc/Writers/ConTeXt.hs | 48 +++---- src/Text/Pandoc/Writers/CslJson.hs | 7 +- src/Text/Pandoc/Writers/Docx.hs | 2 +- src/Text/Pandoc/Writers/LaTeX.hs | 2 +- src/Text/Pandoc/Writers/LaTeX/Lang.hs | 8 +- src/Text/Pandoc/Writers/ODT.hs | 7 +- src/Text/Pandoc/Writers/OpenDocument.hs | 8 +- src/Text/Pandoc/Writers/Shared.hs | 2 +- 20 files changed, 198 insertions(+), 294 deletions(-) delete mode 100644 src/Text/Pandoc/BCP47.hs (limited to 'src/Text/Pandoc/Class') diff --git a/pandoc.cabal b/pandoc.cabal index b6cbb0d7a..8816767e9 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -601,7 +601,6 @@ library Text.Pandoc.Asciify, Text.Pandoc.Emoji, Text.Pandoc.ImageSize, - Text.Pandoc.BCP47, Text.Pandoc.Class, Text.Pandoc.Citeproc other-modules: Text.Pandoc.App.CommandLineOptions, diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 6b45e5418..67d3cce7d 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -55,7 +55,7 @@ import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts, import Text.Pandoc.App.CommandLineOptions (parseOptions, parseOptionsFromArgs, options) import Text.Pandoc.App.OutputSettings (OutputSettings (..), optToOutputSettings) -import Text.Pandoc.BCP47 (Lang (..), parseBCP47) +import UnicodeCollation.Lang (Lang (..), parseLang) import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), applyFilters) import Text.Pandoc.PDF (makePDF) import Text.Pandoc.SelfContained (makeSelfContained) @@ -200,8 +200,8 @@ convertWithOpts opts = do Just f -> readFileStrict f case lookupMetaString "lang" (optMetadata opts) of - "" -> setTranslations $ Lang "en" "" "US" [] - l -> case parseBCP47 l of + "" -> setTranslations $ Lang "en" Nothing (Just "US") [] [] [] + l -> case parseLang l of Left _ -> report $ InvalidLang l Right l' -> setTranslations l' diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs deleted file mode 100644 index 1ecf0bf73..000000000 --- a/src/Text/Pandoc/BCP47.hs +++ /dev/null @@ -1,99 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -{- | - Module : Text.Pandoc.BCP47 - Copyright : Copyright (C) 2017-2021 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Functions for parsing and rendering BCP47 language identifiers. --} -module Text.Pandoc.BCP47 ( - getLang - , parseBCP47 - , Lang(..) - , renderLang - ) -where -import Control.Monad (guard) -import Data.Char (isAlphaNum, isAscii, isLetter, isLower, isUpper) -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Text.DocTemplates (FromContext(..)) -import qualified Data.Text as T -import qualified Text.Parsec as P - --- | Represents BCP 47 language/country code. -data Lang = Lang{ langLanguage :: T.Text - , langScript :: T.Text - , langRegion :: T.Text - , langVariants :: [T.Text] } - deriving (Eq, Ord, Show) - --- | Render a Lang as BCP 47. -renderLang :: Lang -> T.Text -renderLang lang = T.intercalate "-" (langLanguage lang : filter (not . T.null) - ([langScript lang, langRegion lang] ++ langVariants lang)) - --- | Parse a BCP 47 string as a Lang. Currently we parse --- extensions and private-use fields as "variants," even --- though officially they aren't. -parseBCP47 :: T.Text -> Either T.Text Lang -parseBCP47 lang = - case P.parse bcp47 "lang" lang of - Right r -> Right r - Left e -> Left $ T.pack $ show e - where bcp47 = do - language <- pLanguage - script <- P.option "" pScript - region <- P.option "" pRegion - variants <- P.many (pVariant P.<|> pExtension P.<|> pPrivateUse) - P.eof - return Lang{ langLanguage = language - , langScript = script - , langRegion = region - , langVariants = variants } - asciiLetter = P.satisfy (\c -> isAscii c && isLetter c) - pLanguage = do - cs <- P.many1 asciiLetter - let lcs = length cs - guard $ lcs == 2 || lcs == 3 - return $ T.toLower $ T.pack cs - pScript = P.try $ do - P.char '-' - x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c) - xs <- P.count 3 - (P.satisfy (\c -> isAscii c && isLetter c && isLower c)) - return $ T.toLower $ T.pack (x:xs) - pRegion = P.try $ do - P.char '-' - cs <- P.many1 asciiLetter - let lcs = length cs - guard $ lcs == 2 || lcs == 3 - return $ T.toUpper $ T.pack cs - pVariant = P.try $ do - P.char '-' - ds <- P.option "" (P.count 1 P.digit) - cs <- P.many1 asciiLetter - let var = ds ++ cs - lv = length var - guard $ if null ds - then lv >= 5 && lv <= 8 - else lv == 4 - return $ T.toLower $ T.pack var - pExtension = P.try $ do - P.char '-' - cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c) - let lcs = length cs - guard $ lcs >= 2 && lcs <= 8 - return $ T.toLower $ T.pack cs - pPrivateUse = P.try $ do - P.char '-' - P.char 'x' - P.char '-' - cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c) - guard $ not (null cs) && length cs <= 8 - let var = "x-" ++ cs - return $ T.toLower $ T.pack var diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs index af302f782..c9f1806e4 100644 --- a/src/Text/Pandoc/Citeproc.hs +++ b/src/Text/Pandoc/Citeproc.hs @@ -18,7 +18,6 @@ import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences) import Text.Pandoc.Citeproc.BibTeX (readBibtexString, Variant(..)) import Text.Pandoc.Citeproc.MetaValue (metaValueToReference, metaValueToText) import Text.Pandoc.Readers.Markdown (yamlToRefs) -import qualified Text.Pandoc.BCP47 as BCP47 import Text.Pandoc.Builder (Inlines, Many(..), deleteMeta, setMeta) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Definition as Pandoc @@ -630,13 +629,8 @@ removeFinalPeriod ils = bcp47LangToIETF :: PandocMonad m => Text -> m (Maybe Lang) bcp47LangToIETF bcplang = - case BCP47.parseBCP47 bcplang of + case parseLang bcplang of Left _ -> do report $ InvalidLang bcplang return Nothing - Right lang -> - return $ Just - $ Lang (BCP47.langLanguage lang) - (if T.null (BCP47.langRegion lang) - then Nothing - else Just (BCP47.langRegion lang)) + Right lang -> return $ Just lang diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs index c0752dadc..510e56f9c 100644 --- a/src/Text/Pandoc/Citeproc/BibTeX.hs +++ b/src/Text/Pandoc/Citeproc/BibTeX.hs @@ -205,10 +205,13 @@ writeBibtexString opts variant mblang ref = [ (", " <>) <$> nameGiven name, nameDroppingParticle name ] - mblang' = (parseLang <$> getVariableAsText "language") <|> mblang + mblang' = case getVariableAsText "language" of + Just l -> either (const Nothing) Just $ parseLang l + Nothing -> mblang titlecase = case mblang' of - Just (Lang "en" _) -> titlecase' + Just lang | langLanguage lang == "en" + -> titlecase' Nothing -> titlecase' _ -> case variant of @@ -331,7 +334,7 @@ writeBibtexString opts variant mblang ref = renderFields = mconcat . intersperse ("," <> cr) . mapMaybe renderField defaultLang :: Lang -defaultLang = Lang "en" (Just "US") +defaultLang = Lang "en" Nothing (Just "US") [] [] [] -- a map of bibtex "string" macros type StringMap = Map.Map Text Text @@ -351,9 +354,7 @@ itemToReference locale variant item = do bib item $ do let lang = fromMaybe defaultLang $ localeLanguage locale modify $ \st -> st{ localeLang = lang, - untitlecase = case lang of - (Lang "en" _) -> True - _ -> False } + untitlecase = langLanguage lang == "en" } id' <- asks identifier otherIds <- (Just <$> getRawField "ids") @@ -711,7 +712,7 @@ itemToReference locale variant item = do bib :: Item -> Bib a -> BibParser a -bib entry m = fst <$> evalRWST m entry (BibState True (Lang "en" (Just "US"))) +bib entry m = fst <$> evalRWST m entry (BibState True defaultLang) resolveCrossRefs :: Variant -> [Item] -> [Item] resolveCrossRefs variant entries = @@ -1456,8 +1457,9 @@ resolveKey lang ils = Walk.walk go ils go x = x resolveKey' :: Lang -> Text -> Text -resolveKey' lang@(Lang l _) k = - case Map.lookup l biblatexStringMap >>= Map.lookup (T.toLower k) of +resolveKey' lang k = + case Map.lookup (langLanguage lang) biblatexStringMap >>= + Map.lookup (T.toLower k) of Nothing -> k Just (x, _) -> either (const k) stringify $ parseLaTeX lang x diff --git a/src/Text/Pandoc/Citeproc/Data.hs b/src/Text/Pandoc/Citeproc/Data.hs index 40430b0f5..388b9ba62 100644 --- a/src/Text/Pandoc/Citeproc/Data.hs +++ b/src/Text/Pandoc/Citeproc/Data.hs @@ -21,12 +21,12 @@ biblatexStringMap :: M.Map Text (M.Map Text (Text, Text)) biblatexStringMap = foldr go mempty biblatexLocalizations where go (fp, bs) = - let Lang lang _ _ _ _ _ = parseLang - (toIETF $ T.takeWhile (/= '.') $ T.pack fp) - ls = T.lines $ TE.decodeUtf8 bs - in if length ls > 4 - then M.insert lang (toStringMap $ map (T.splitOn "|") ls) - else id + let ls = T.lines $ TE.decodeUtf8 bs + in case parseLang (toIETF $ T.takeWhile (/= '.') $ T.pack fp) of + Right lang | length ls > 4 + -> M.insert (langLanguage lang) + (toStringMap $ map (T.splitOn "|") ls) + _ -> id toStringMap = foldr go' mempty go' [term, x, y] = M.insert term (x, y) go' _ = id diff --git a/src/Text/Pandoc/Class/CommonState.hs b/src/Text/Pandoc/Class/CommonState.hs index 7e1735c2b..0fd094d99 100644 --- a/src/Text/Pandoc/Class/CommonState.hs +++ b/src/Text/Pandoc/Class/CommonState.hs @@ -19,7 +19,7 @@ where import Data.Default (Default (def)) import Data.Text (Text) -import Text.Pandoc.BCP47 (Lang) +import UnicodeCollation.Lang (Lang) import Text.Pandoc.MediaBag (MediaBag) import Text.Pandoc.Logging (LogMessage, Verbosity (WARNING)) import Text.Pandoc.Translations (Translations) diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index 293a822a0..76f1fa32b 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -70,7 +70,7 @@ import Network.URI ( escapeURIString, nonStrictRelativeTo, import System.FilePath ((), (<.>), takeExtension, dropExtension, isRelative, splitDirectories) import System.Random (StdGen) -import Text.Pandoc.BCP47 (Lang(..), parseBCP47, renderLang) +import UnicodeCollation.Lang (Lang(..), parseLang, renderLang) import Text.Pandoc.Class.CommonState (CommonState (..)) import Text.Pandoc.Definition import Text.Pandoc.Error @@ -285,7 +285,7 @@ readFileFromDirs (d:ds) f = catchError toLang :: PandocMonad m => Maybe T.Text -> m (Maybe Lang) toLang Nothing = return Nothing toLang (Just s) = - case parseBCP47 s of + case parseLang s of Left _ -> do report $ InvalidLang s return Nothing diff --git a/src/Text/Pandoc/Readers/BibTeX.hs b/src/Text/Pandoc/Readers/BibTeX.hs index 956b9f1f7..b82a81350 100644 --- a/src/Text/Pandoc/Readers/BibTeX.hs +++ b/src/Text/Pandoc/Readers/BibTeX.hs @@ -48,11 +48,14 @@ readBibLaTeX = readBibTeX' BibTeX.Biblatex readBibTeX' :: PandocMonad m => Variant -> ReaderOptions -> Text -> m Pandoc readBibTeX' variant _opts t = do - lang <- maybe (Lang "en" (Just "US")) parseLang - <$> lookupEnv "LANG" + mblangEnv <- lookupEnv "LANG" + let defaultLang = Lang "en" Nothing (Just "US") [] [] [] + let lang = case mblangEnv of + Nothing -> defaultLang + Just l -> either (const defaultLang) id $ parseLang l locale <- case getLocale lang of Left e -> - case getLocale (Lang "en" (Just "US")) of + case getLocale (Lang "en" Nothing (Just "US") [] [] []) of Right l -> return l Left _ -> throwError $ PandocCiteprocError e Right l -> return l diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 851756065..83caf742a 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -33,7 +33,7 @@ import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T import System.FilePath (addExtension, replaceExtension, takeExtension) -import Text.Pandoc.BCP47 (renderLang) +import UnicodeCollation.Lang (renderLang) import Text.Pandoc.Builder as B import Text.Pandoc.Class.PandocPure (PandocPure) import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath, diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs index 08e217bdb..b92e6ab57 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs @@ -23,7 +23,7 @@ import qualified Data.Map as M import Data.Text (Text) import qualified Data.Text as T import Text.Pandoc.Shared (extractSpaces) -import Text.Pandoc.BCP47 (Lang(..), renderLang) +import UnicodeCollation.Lang (Lang(..), renderLang) import Text.Pandoc.Class (PandocMonad(..), setTranslations) import Text.Pandoc.Readers.LaTeX.Parsing import Text.Pandoc.Parsing (updateState, option, getState, QuoteContext(..), @@ -99,133 +99,136 @@ setDefaultLanguage = do polyglossiaLangToBCP47 :: M.Map T.Text (T.Text -> Lang) polyglossiaLangToBCP47 = M.fromList [ ("arabic", \o -> case T.filter (/=' ') o of - "locale=algeria" -> Lang "ar" "" "DZ" [] - "locale=mashriq" -> Lang "ar" "" "SY" [] - "locale=libya" -> Lang "ar" "" "LY" [] - "locale=morocco" -> Lang "ar" "" "MA" [] - "locale=mauritania" -> Lang "ar" "" "MR" [] - "locale=tunisia" -> Lang "ar" "" "TN" [] - _ -> Lang "ar" "" "" []) + "locale=algeria" -> Lang "ar" Nothing (Just "DZ") [] [] [] + "locale=mashriq" -> Lang "ar" Nothing (Just "SY") [] [] [] + "locale=libya" -> Lang "ar" Nothing (Just "LY") [] [] [] + "locale=morocco" -> Lang "ar" Nothing (Just "MA") [] [] [] + "locale=mauritania" -> Lang "ar" Nothing (Just "MR") [] [] [] + "locale=tunisia" -> Lang "ar" Nothing (Just "TN") [] [] [] + _ -> Lang "ar" Nothing (Just "") [] [] []) , ("german", \o -> case T.filter (/=' ') o of - "spelling=old" -> Lang "de" "" "DE" ["1901"] + "spelling=old" -> Lang "de" Nothing (Just "DE") ["1901"] [] [] "variant=austrian,spelling=old" - -> Lang "de" "" "AT" ["1901"] - "variant=austrian" -> Lang "de" "" "AT" [] + -> Lang "de" Nothing (Just "AT") ["1901"] [] [] + "variant=austrian" -> Lang "de" Nothing (Just "AT") [] [] [] "variant=swiss,spelling=old" - -> Lang "de" "" "CH" ["1901"] - "variant=swiss" -> Lang "de" "" "CH" [] - _ -> Lang "de" "" "" []) - , ("lsorbian", \_ -> Lang "dsb" "" "" []) + -> Lang "de" Nothing (Just "CH") ["1901"] [] [] + "variant=swiss" -> Lang "de" Nothing (Just "CH") [] [] [] + _ -> Lang "de" Nothing Nothing [] [] []) + , ("lsorbian", \_ -> Lang "dsb" Nothing Nothing [] [] []) , ("greek", \o -> case T.filter (/=' ') o of - "variant=poly" -> Lang "el" "" "polyton" [] - "variant=ancient" -> Lang "grc" "" "" [] - _ -> Lang "el" "" "" []) + "variant=poly" -> Lang "el" Nothing (Just "polyton") [] [] [] + "variant=ancient" -> Lang "grc" Nothing Nothing [] [] [] + _ -> Lang "el" Nothing Nothing [] [] []) , ("english", \o -> case T.filter (/=' ') o of - "variant=australian" -> Lang "en" "" "AU" [] - "variant=canadian" -> Lang "en" "" "CA" [] - "variant=british" -> Lang "en" "" "GB" [] - "variant=newzealand" -> Lang "en" "" "NZ" [] - "variant=american" -> Lang "en" "" "US" [] - _ -> Lang "en" "" "" []) - , ("usorbian", \_ -> Lang "hsb" "" "" []) + "variant=australian" -> Lang "en" Nothing (Just "AU") [] [] [] + "variant=canadian" -> Lang "en" Nothing (Just "CA") [] [] [] + "variant=british" -> Lang "en" Nothing (Just "GB") [] [] [] + "variant=newzealand" -> Lang "en" Nothing (Just "NZ") [] [] [] + "variant=american" -> Lang "en" Nothing (Just "US") [] [] [] + _ -> Lang "en" Nothing (Just "") [] [] []) + , ("usorbian", \_ -> Lang "hsb" Nothing Nothing [] [] []) , ("latin", \o -> case T.filter (/=' ') o of - "variant=classic" -> Lang "la" "" "" ["x-classic"] - _ -> Lang "la" "" "" []) - , ("slovenian", \_ -> Lang "sl" "" "" []) - , ("serbianc", \_ -> Lang "sr" "cyrl" "" []) - , ("pinyin", \_ -> Lang "zh" "Latn" "" ["pinyin"]) - , ("afrikaans", \_ -> Lang "af" "" "" []) - , ("amharic", \_ -> Lang "am" "" "" []) - , ("assamese", \_ -> Lang "as" "" "" []) - , ("asturian", \_ -> Lang "ast" "" "" []) - , ("bulgarian", \_ -> Lang "bg" "" "" []) - , ("bengali", \_ -> Lang "bn" "" "" []) - , ("tibetan", \_ -> Lang "bo" "" "" []) - , ("breton", \_ -> Lang "br" "" "" []) - , ("catalan", \_ -> Lang "ca" "" "" []) - , ("welsh", \_ -> Lang "cy" "" "" []) - , ("czech", \_ -> Lang "cs" "" "" []) - , ("coptic", \_ -> Lang "cop" "" "" []) - , ("danish", \_ -> Lang "da" "" "" []) - , ("divehi", \_ -> Lang "dv" "" "" []) - , ("esperanto", \_ -> Lang "eo" "" "" []) - , ("spanish", \_ -> Lang "es" "" "" []) - , ("estonian", \_ -> Lang "et" "" "" []) - , ("basque", \_ -> Lang "eu" "" "" []) - , ("farsi", \_ -> Lang "fa" "" "" []) - , ("finnish", \_ -> Lang "fi" "" "" []) - , ("french", \_ -> Lang "fr" "" "" []) - , ("friulan", \_ -> Lang "fur" "" "" []) - , ("irish", \_ -> Lang "ga" "" "" []) - , ("scottish", \_ -> Lang "gd" "" "" []) - , ("ethiopic", \_ -> Lang "gez" "" "" []) - , ("galician", \_ -> Lang "gl" "" "" []) - , ("hebrew", \_ -> Lang "he" "" "" []) - , ("hindi", \_ -> Lang "hi" "" "" []) - , ("croatian", \_ -> Lang "hr" "" "" []) - , ("magyar", \_ -> Lang "hu" "" "" []) - , ("armenian", \_ -> Lang "hy" "" "" []) - , ("interlingua", \_ -> Lang "ia" "" "" []) - , ("indonesian", \_ -> Lang "id" "" "" []) - , ("icelandic", \_ -> Lang "is" "" "" []) - , ("italian", \_ -> Lang "it" "" "" []) - , ("japanese", \_ -> Lang "jp" "" "" []) - , ("khmer", \_ -> Lang "km" "" "" []) - , ("kurmanji", \_ -> Lang "kmr" "" "" []) - , ("kannada", \_ -> Lang "kn" "" "" []) - , ("korean", \_ -> Lang "ko" "" "" []) - , ("lao", \_ -> Lang "lo" "" "" []) - , ("lithuanian", \_ -> Lang "lt" "" "" []) - , ("latvian", \_ -> Lang "lv" "" "" []) - , ("malayalam", \_ -> Lang "ml" "" "" []) - , ("mongolian", \_ -> Lang "mn" "" "" []) - , ("marathi", \_ -> Lang "mr" "" "" []) - , ("dutch", \_ -> Lang "nl" "" "" []) - , ("nynorsk", \_ -> Lang "nn" "" "" []) - , ("norsk", \_ -> Lang "no" "" "" []) - , ("nko", \_ -> Lang "nqo" "" "" []) - , ("occitan", \_ -> Lang "oc" "" "" []) - , ("panjabi", \_ -> Lang "pa" "" "" []) - , ("polish", \_ -> Lang "pl" "" "" []) - , ("piedmontese", \_ -> Lang "pms" "" "" []) - , ("portuguese", \_ -> Lang "pt" "" "" []) - , ("romansh", \_ -> Lang "rm" "" "" []) - , ("romanian", \_ -> Lang "ro" "" "" []) - , ("russian", \_ -> Lang "ru" "" "" []) - , ("sanskrit", \_ -> Lang "sa" "" "" []) - , ("samin", \_ -> Lang "se" "" "" []) - , ("slovak", \_ -> Lang "sk" "" "" []) - , ("albanian", \_ -> Lang "sq" "" "" []) - , ("serbian", \_ -> Lang "sr" "" "" []) - , ("swedish", \_ -> Lang "sv" "" "" []) - , ("syriac", \_ -> Lang "syr" "" "" []) - , ("tamil", \_ -> Lang "ta" "" "" []) - , ("telugu", \_ -> Lang "te" "" "" []) - , ("thai", \_ -> Lang "th" "" "" []) - , ("turkmen", \_ -> Lang "tk" "" "" []) - , ("turkish", \_ -> Lang "tr" "" "" []) - , ("ukrainian", \_ -> Lang "uk" "" "" []) - , ("urdu", \_ -> Lang "ur" "" "" []) - , ("vietnamese", \_ -> Lang "vi" "" "" []) + "variant=classic" -> Lang "la" Nothing Nothing ["x-classic"] [] [] + _ -> Lang "la" Nothing Nothing [] [] []) + , ("slovenian", \_ -> Lang "sl" Nothing Nothing [] [] []) + , ("serbianc", \_ -> Lang "sr" (Just "Cyrl") Nothing [] [] []) + , ("pinyin", \_ -> Lang "zh" (Just "Latn") Nothing ["pinyin"] [] []) + , ("afrikaans", \_ -> simpleLang "af") + , ("amharic", \_ -> simpleLang "am") + , ("assamese", \_ -> simpleLang "as") + , ("asturian", \_ -> simpleLang "ast") + , ("bulgarian", \_ -> simpleLang "bg") + , ("bengali", \_ -> simpleLang "bn") + , ("tibetan", \_ -> simpleLang "bo") + , ("breton", \_ -> simpleLang "br") + , ("catalan", \_ -> simpleLang "ca") + , ("welsh", \_ -> simpleLang "cy") + , ("czech", \_ -> simpleLang "cs") + , ("coptic", \_ -> simpleLang "cop") + , ("danish", \_ -> simpleLang "da") + , ("divehi", \_ -> simpleLang "dv") + , ("esperanto", \_ -> simpleLang "eo") + , ("spanish", \_ -> simpleLang "es") + , ("estonian", \_ -> simpleLang "et") + , ("basque", \_ -> simpleLang "eu") + , ("farsi", \_ -> simpleLang "fa") + , ("finnish", \_ -> simpleLang "fi") + , ("french", \_ -> simpleLang "fr") + , ("friulan", \_ -> simpleLang "fur") + , ("irish", \_ -> simpleLang "ga") + , ("scottish", \_ -> simpleLang "gd") + , ("ethiopic", \_ -> simpleLang "gez") + , ("galician", \_ -> simpleLang "gl") + , ("hebrew", \_ -> simpleLang "he") + , ("hindi", \_ -> simpleLang "hi") + , ("croatian", \_ -> simpleLang "hr") + , ("magyar", \_ -> simpleLang "hu") + , ("armenian", \_ -> simpleLang "hy") + , ("interlingua", \_ -> simpleLang "ia") + , ("indonesian", \_ -> simpleLang "id") + , ("icelandic", \_ -> simpleLang "is") + , ("italian", \_ -> simpleLang "it") + , ("japanese", \_ -> simpleLang "jp") + , ("khmer", \_ -> simpleLang "km") + , ("kurmanji", \_ -> simpleLang "kmr") + , ("kannada", \_ -> simpleLang "kn") + , ("korean", \_ -> simpleLang "ko") + , ("lao", \_ -> simpleLang "lo") + , ("lithuanian", \_ -> simpleLang "lt") + , ("latvian", \_ -> simpleLang "lv") + , ("malayalam", \_ -> simpleLang "ml") + , ("mongolian", \_ -> simpleLang "mn") + , ("marathi", \_ -> simpleLang "mr") + , ("dutch", \_ -> simpleLang "nl") + , ("nynorsk", \_ -> simpleLang "nn") + , ("norsk", \_ -> simpleLang "no") + , ("nko", \_ -> simpleLang "nqo") + , ("occitan", \_ -> simpleLang "oc") + , ("panjabi", \_ -> simpleLang "pa") + , ("polish", \_ -> simpleLang "pl") + , ("piedmontese", \_ -> simpleLang "pms") + , ("portuguese", \_ -> simpleLang "pt") + , ("romansh", \_ -> simpleLang "rm") + , ("romanian", \_ -> simpleLang "ro") + , ("russian", \_ -> simpleLang "ru") + , ("sanskrit", \_ -> simpleLang "sa") + , ("samin", \_ -> simpleLang "se") + , ("slovak", \_ -> simpleLang "sk") + , ("albanian", \_ -> simpleLang "sq") + , ("serbian", \_ -> simpleLang "sr") + , ("swedish", \_ -> simpleLang "sv") + , ("syriac", \_ -> simpleLang "syr") + , ("tamil", \_ -> simpleLang "ta") + , ("telugu", \_ -> simpleLang "te") + , ("thai", \_ -> simpleLang "th") + , ("turkmen", \_ -> simpleLang "tk") + , ("turkish", \_ -> simpleLang "tr") + , ("ukrainian", \_ -> simpleLang "uk") + , ("urdu", \_ -> simpleLang "ur") + , ("vietnamese", \_ -> simpleLang "vi") ] +simpleLang :: Text -> Lang +simpleLang l = Lang l Nothing Nothing [] [] [] + babelLangToBCP47 :: T.Text -> Maybe Lang babelLangToBCP47 s = case s of - "austrian" -> Just $ Lang "de" "" "AT" ["1901"] - "naustrian" -> Just $ Lang "de" "" "AT" [] - "swissgerman" -> Just $ Lang "de" "" "CH" ["1901"] - "nswissgerman" -> Just $ Lang "de" "" "CH" [] - "german" -> Just $ Lang "de" "" "DE" ["1901"] - "ngerman" -> Just $ Lang "de" "" "DE" [] - "lowersorbian" -> Just $ Lang "dsb" "" "" [] - "uppersorbian" -> Just $ Lang "hsb" "" "" [] - "polutonikogreek" -> Just $ Lang "el" "" "" ["polyton"] - "slovene" -> Just $ Lang "sl" "" "" [] - "australian" -> Just $ Lang "en" "" "AU" [] - "canadian" -> Just $ Lang "en" "" "CA" [] - "british" -> Just $ Lang "en" "" "GB" [] - "newzealand" -> Just $ Lang "en" "" "NZ" [] - "american" -> Just $ Lang "en" "" "US" [] - "classiclatin" -> Just $ Lang "la" "" "" ["x-classic"] + "austrian" -> Just $ Lang "de" Nothing (Just "AT") ["1901"] [] [] + "naustrian" -> Just $ Lang "de" Nothing (Just "AT") [] [] [] + "swissgerman" -> Just $ Lang "de" Nothing (Just "CH") ["1901"] [] [] + "nswissgerman" -> Just $ Lang "de" Nothing (Just "CH") [] [] [] + "german" -> Just $ Lang "de" Nothing (Just "DE") ["1901"] [] [] + "ngerman" -> Just $ Lang "de" Nothing (Just "DE") [] [] [] + "lowersorbian" -> Just $ Lang "dsb" Nothing Nothing [] [] [] + "uppersorbian" -> Just $ Lang "hsb" Nothing Nothing [] [] [] + "polutonikogreek" -> Just $ Lang "el" Nothing Nothing ["polyton"] [] [] + "slovene" -> Just $ simpleLang "sl" + "australian" -> Just $ Lang "en" Nothing (Just "AU") [] [] [] + "canadian" -> Just $ Lang "en" Nothing (Just "CA") [] [] [] + "british" -> Just $ Lang "en" Nothing (Just "GB") [] [] [] + "newzealand" -> Just $ Lang "en" Nothing (Just "NZ") [] [] [] + "american" -> Just $ Lang "en" Nothing (Just "US") [] [] [] + "classiclatin" -> Just $ Lang "la" Nothing Nothing ["x-classic"] [] [] _ -> ($ "") <$> M.lookup s polyglossiaLangToBCP47 diff --git a/src/Text/Pandoc/Writers/BibTeX.hs b/src/Text/Pandoc/Writers/BibTeX.hs index b9ae0c13a..95de6b71f 100644 --- a/src/Text/Pandoc/Writers/BibTeX.hs +++ b/src/Text/Pandoc/Writers/BibTeX.hs @@ -43,7 +43,7 @@ writeBibTeX' :: PandocMonad m => Variant -> WriterOptions -> Pandoc -> m Text writeBibTeX' variant opts (Pandoc meta _) = do let mblang = case lookupMetaString "lang" meta of "" -> Nothing - t -> Just $ parseLang t + t -> either (const Nothing) Just $ parseLang t let refs = case lookupMeta "references" meta of Just (MetaList xs) -> mapMaybe metaValueToReference xs _ -> [] diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 3c9975be8..f352c84bc 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -21,7 +21,7 @@ import Data.Maybe (mapMaybe) import Data.Text (Text) import qualified Data.Text as T import Network.URI (unEscapeString) -import Text.Pandoc.BCP47 +import UnicodeCollation.Lang (Lang(..)) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import Text.Pandoc.Definition import Text.Pandoc.ImageSize @@ -555,26 +555,26 @@ fromBCP47 mbs = fromBCP47' <$> toLang mbs -- https://tools.ietf.org/html/bcp47#section-2.1 -- http://wiki.contextgarden.net/Language_Codes fromBCP47' :: Maybe Lang -> Maybe Text -fromBCP47' (Just (Lang "ar" _ "SY" _) ) = Just "ar-sy" -fromBCP47' (Just (Lang "ar" _ "IQ" _) ) = Just "ar-iq" -fromBCP47' (Just (Lang "ar" _ "JO" _) ) = Just "ar-jo" -fromBCP47' (Just (Lang "ar" _ "LB" _) ) = Just "ar-lb" -fromBCP47' (Just (Lang "ar" _ "DZ" _) ) = Just "ar-dz" -fromBCP47' (Just (Lang "ar" _ "MA" _) ) = Just "ar-ma" -fromBCP47' (Just (Lang "de" _ _ ["1901"]) ) = Just "deo" -fromBCP47' (Just (Lang "de" _ "DE" _) ) = Just "de-de" -fromBCP47' (Just (Lang "de" _ "AT" _) ) = Just "de-at" -fromBCP47' (Just (Lang "de" _ "CH" _) ) = Just "de-ch" -fromBCP47' (Just (Lang "el" _ _ ["poly"]) ) = Just "agr" -fromBCP47' (Just (Lang "en" _ "US" _) ) = Just "en-us" -fromBCP47' (Just (Lang "en" _ "GB" _) ) = Just "en-gb" -fromBCP47' (Just (Lang "grc"_ _ _) ) = Just "agr" -fromBCP47' (Just (Lang "el" _ _ _) ) = Just "gr" -fromBCP47' (Just (Lang "eu" _ _ _) ) = Just "ba" -fromBCP47' (Just (Lang "he" _ _ _) ) = Just "il" -fromBCP47' (Just (Lang "jp" _ _ _) ) = Just "ja" -fromBCP47' (Just (Lang "uk" _ _ _) ) = Just "ua" -fromBCP47' (Just (Lang "vi" _ _ _) ) = Just "vn" -fromBCP47' (Just (Lang "zh" _ _ _) ) = Just "cn" -fromBCP47' (Just (Lang l _ _ _) ) = Just l -fromBCP47' Nothing = Nothing +fromBCP47' (Just (Lang "ar" _ (Just "SY") _ _ _)) = Just "ar-sy" +fromBCP47' (Just (Lang "ar" _ (Just "IQ") _ _ _)) = Just "ar-iq" +fromBCP47' (Just (Lang "ar" _ (Just "JO") _ _ _)) = Just "ar-jo" +fromBCP47' (Just (Lang "ar" _ (Just "LB") _ _ _)) = Just "ar-lb" +fromBCP47' (Just (Lang "ar" _ (Just "DZ") _ _ _)) = Just "ar-dz" +fromBCP47' (Just (Lang "ar" _ (Just "MA") _ _ _)) = Just "ar-ma" +fromBCP47' (Just (Lang "de" _ _ ["1901"] _ _)) = Just "deo" +fromBCP47' (Just (Lang "de" _ (Just "DE") _ _ _)) = Just "de-de" +fromBCP47' (Just (Lang "de" _ (Just "AT") _ _ _)) = Just "de-at" +fromBCP47' (Just (Lang "de" _ (Just "CH") _ _ _)) = Just "de-ch" +fromBCP47' (Just (Lang "el" _ _ ["poly"] _ _)) = Just "agr" +fromBCP47' (Just (Lang "en" _ (Just "US") _ _ _)) = Just "en-us" +fromBCP47' (Just (Lang "en" _ (Just "GB") _ _ _)) = Just "en-gb" +fromBCP47' (Just (Lang "grc"_ _ _ _ _)) = Just "agr" +fromBCP47' (Just (Lang "el" _ _ _ _ _)) = Just "gr" +fromBCP47' (Just (Lang "eu" _ _ _ _ _)) = Just "ba" +fromBCP47' (Just (Lang "he" _ _ _ _ _)) = Just "il" +fromBCP47' (Just (Lang "jp" _ _ _ _ _)) = Just "ja" +fromBCP47' (Just (Lang "uk" _ _ _ _ _)) = Just "ua" +fromBCP47' (Just (Lang "vi" _ _ _ _ _)) = Just "vn" +fromBCP47' (Just (Lang "zh" _ _ _ _ _)) = Just "cn" +fromBCP47' (Just (Lang l _ _ _ _ _)) = Just l +fromBCP47' Nothing = Nothing diff --git a/src/Text/Pandoc/Writers/CslJson.hs b/src/Text/Pandoc/Writers/CslJson.hs index a10def95e..395335667 100644 --- a/src/Text/Pandoc/Writers/CslJson.hs +++ b/src/Text/Pandoc/Writers/CslJson.hs @@ -34,15 +34,16 @@ import Control.Monad.Identity import Citeproc.Locale (getLocale) import Citeproc.CslJson import Text.Pandoc.Options (WriterOptions) -import Data.Maybe (mapMaybe) +import Data.Maybe (mapMaybe, fromMaybe) import Data.Aeson.Encode.Pretty (Config (..), Indent (Spaces), NumberFormat (Generic), defConfig, encodePretty') writeCslJson :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeCslJson _opts (Pandoc meta _) = do - let lang = maybe (Lang "en" (Just "US")) parseLang - (lookupMeta "lang" meta >>= metaValueToText) + let lang = fromMaybe (Lang "en" Nothing (Just "US") [] [] []) + (lookupMeta "lang" meta >>= metaValueToText >>= + either (const Nothing) Just . parseLang) locale <- case getLocale lang of Left e -> throwError $ PandocCiteprocError e Right l -> return l diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 20bcd0324..7781df8e7 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -36,7 +36,7 @@ import qualified Data.Text.Lazy as TL import Data.Time.Clock.POSIX import Data.Digest.Pure.SHA (sha1, showDigest) import Skylighting -import Text.Pandoc.BCP47 (getLang, renderLang) +import UnicodeCollation.Lang (renderLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import qualified Text.Pandoc.Class.PandocMonad as P import Data.Time diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 1c970e6ad..e99bad738 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -29,7 +29,7 @@ import qualified Data.Text as T import Network.URI (unEscapeString) import Text.DocTemplates (FromContext(lookupContext), renderTemplate, Val(..), Context(..)) -import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang) +import UnicodeCollation.Lang (Lang (..), renderLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import Text.Pandoc.Definition import Text.Pandoc.Highlighting (formatLaTeXBlock, formatLaTeXInline, highlight, diff --git a/src/Text/Pandoc/Writers/LaTeX/Lang.hs b/src/Text/Pandoc/Writers/LaTeX/Lang.hs index 871b2692a..437b84120 100644 --- a/src/Text/Pandoc/Writers/LaTeX/Lang.hs +++ b/src/Text/Pandoc/Writers/LaTeX/Lang.hs @@ -46,7 +46,7 @@ toPolyglossia (Lang "de" _ (Just "AT") vars _ _) toPolyglossia (Lang "de" _ (Just "AT") _ _ _) = ("german", "variant=austrian") toPolyglossia (Lang "de" _ (Just "CH") vars _ _) | "1901" `elem` vars = ("german", "variant=swiss, spelling=old") -toPolyglossia (Lang "de" _ (Just "CH") _ _ _ _) = ("german", "variant=swiss") +toPolyglossia (Lang "de" _ (Just "CH") _ _ _) = ("german", "variant=swiss") toPolyglossia (Lang "de" _ _ _ _ _) = ("german", "") toPolyglossia (Lang "dsb" _ _ _ _ _) = ("lsorbian", "") toPolyglossia (Lang "el" _ _ vars _ _) @@ -61,9 +61,9 @@ toPolyglossia (Lang "grc" _ _ _ _ _) = ("greek", "variant=ancient") toPolyglossia (Lang "hsb" _ _ _ _ _) = ("usorbian", "") toPolyglossia (Lang "la" _ _ vars _ _) | "x-classic" `elem` vars = ("latin", "variant=classic") -toPolyglossia (Lang "pt" _ "BR" _ _ _) = ("portuguese", "variant=brazilian") +toPolyglossia (Lang "pt" _ (Just "BR") _ _ _) = ("portuguese", "variant=brazilian") toPolyglossia (Lang "sl" _ _ _ _ _) = ("slovenian", "") -toPolyglossia x = (commonFromBcp47 x, "") +toPolyglossia x = (commonFromBcp47 x, "") -- Takes a list of the constituents of a BCP47 language code and -- converts it to a Babel language string. @@ -81,7 +81,7 @@ toBabel (Lang "de" _ _ vars _ _) | "1901" `elem` vars = "german" | otherwise = "ngerman" toBabel (Lang "dsb" _ _ _ _ _) = "lowersorbian" -toBabel (Lang "el" _ _ vars) +toBabel (Lang "el" _ _ vars _ _) | "polyton" `elem` vars = "polutonikogreek" toBabel (Lang "en" _ (Just "AU") _ _ _) = "australian" toBabel (Lang "en" _ (Just "CA") _ _ _) = "canadian" diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 101b236aa..6fd4cdeb4 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -16,6 +16,7 @@ import Codec.Archive.Zip import Control.Monad.Except (catchError, throwError) import Control.Monad.State.Strict import qualified Data.ByteString.Lazy as B +import Data.Maybe (fromMaybe) import Data.Generics (everywhere', mkT) import Data.List (isPrefixOf) import qualified Data.Map as Map @@ -23,7 +24,7 @@ import qualified Data.Text as T import qualified Data.Text.Lazy as TL import Data.Time import System.FilePath (takeDirectory, takeExtension, (<.>)) -import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang) +import UnicodeCollation.Lang (Lang (..), renderLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import qualified Text.Pandoc.Class.PandocMonad as P import Text.Pandoc.Definition @@ -35,7 +36,7 @@ import Text.Pandoc.Options (WrapOption (..), WriterOptions (..)) import Text.DocLayout import Text.Pandoc.Shared (stringify, pandocVersion, tshow) import Text.Pandoc.Writers.Shared (lookupMetaString, lookupMetaBlocks, - fixDisplayMath) + fixDisplayMath, getLang) import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toTextLazy) import Text.Pandoc.Walk import Text.Pandoc.Writers.OpenDocument (writeOpenDocument) @@ -194,7 +195,7 @@ addLang lang = everywhere' (mkT updateLangAttr) where updateLangAttr (Attr n@(QName "language" _ (Just "fo")) _) = Attr n (langLanguage lang) updateLangAttr (Attr n@(QName "country" _ (Just "fo")) _) - = Attr n (langRegion lang) + = Attr n (fromMaybe "" $ langRegion lang) updateLangAttr x = x -- | transform both Image and Math elements diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index cf42f2228..6c265090c 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -25,7 +25,7 @@ import Data.Ord (comparing) import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T -import Text.Pandoc.BCP47 (Lang (..), parseBCP47) +import UnicodeCollation.Lang (Lang (..), parseLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, translateTerm, setTranslations, toLang) import Text.Pandoc.Definition @@ -236,7 +236,7 @@ handleSpaces s = case T.uncons s of -- | Convert Pandoc document to string in OpenDocument format. writeOpenDocument :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeOpenDocument opts (Pandoc meta blocks) = do - let defLang = Lang "en" "US" "" [] + let defLang = Lang "en" (Just "US") Nothing [] [] [] lang <- case lookupMetaString "lang" meta of "" -> pure defLang s -> fromMaybe defLang <$> toLang (Just s) @@ -893,7 +893,7 @@ textStyleAttr m s Map.insert "style:font-name-complex" "Courier New" $ m | Language lang <- s = Map.insert "fo:language" (langLanguage lang) . - Map.insert "fo:country" (langRegion lang) $ m + maybe id (Map.insert "fo:country") (langRegion lang) $ m | otherwise = m withLangFromAttr :: PandocMonad m => Attr -> OD m a -> OD m a @@ -901,7 +901,7 @@ withLangFromAttr (_,_,kvs) action = case lookup "lang" kvs of Nothing -> action Just l -> - case parseBCP47 l of + case parseLang l of Right lang -> withTextStyle (Language lang) action Left _ -> do report $ InvalidLang l diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index fcb47bd5a..a09d18571 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -149,7 +149,7 @@ defField field val (Context m) = f _newval oldval = oldval -- | Get the contents of the `lang` metadata field or variable. -getLang :: WriterOptions -> Meta -> Maybe Text +getLang :: WriterOptions -> Meta -> Maybe T.Text getLang opts meta = case lookupContext "lang" (writerVariables opts) of Just s -> Just s -- cgit v1.2.3 From a478a5c4c8753fd0bf272cd540ca197ae146a196 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 17 Apr 2021 11:47:54 -0700 Subject: Update to released unicode-collation, latest citeproc dev version. Update citeproc test. --- cabal.project | 11 +---------- src/Text/Pandoc/App.hs | 2 +- src/Text/Pandoc/Citeproc/Data.hs | 2 +- src/Text/Pandoc/Class/CommonState.hs | 2 +- src/Text/Pandoc/Class/PandocMonad.hs | 2 +- src/Text/Pandoc/Readers/LaTeX.hs | 2 +- src/Text/Pandoc/Readers/LaTeX/Lang.hs | 2 +- src/Text/Pandoc/Writers/ConTeXt.hs | 2 +- src/Text/Pandoc/Writers/Docbook.hs | 2 +- src/Text/Pandoc/Writers/Docx.hs | 2 +- src/Text/Pandoc/Writers/LaTeX.hs | 2 +- src/Text/Pandoc/Writers/LaTeX/Lang.hs | 2 +- src/Text/Pandoc/Writers/ODT.hs | 2 +- src/Text/Pandoc/Writers/OpenDocument.hs | 2 +- stack.yaml | 5 ++--- test/command/pandoc-citeproc-320a.md | 8 ++++---- 16 files changed, 20 insertions(+), 30 deletions(-) (limited to 'src/Text/Pandoc/Class') diff --git a/cabal.project b/cabal.project index fa17a20a6..77bc8ef88 100644 --- a/cabal.project +++ b/cabal.project @@ -5,14 +5,5 @@ flags: +embed_data_files source-repository-package type: git location: https://github.com/jgm/citeproc - tag: b42857be658b8f2649e989e061978e304986f853 + tag: f9439e07e9271c7c2674a51efcad2fb8c663b2c8 -source-repository-package - type: git - location: https://github.com/jgm/unicode-collation - tag: 9d229a5c6bcbaf53d7022575234eb223cfa90d55 - --- source-repository-package --- type: git --- location: https://github.com/jgm/citeproc --- tag: d44e24696ab444090d0e63e321c3a573f68b2e74 diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 67d3cce7d..4e8c9f2ab 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -55,7 +55,7 @@ import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts, import Text.Pandoc.App.CommandLineOptions (parseOptions, parseOptionsFromArgs, options) import Text.Pandoc.App.OutputSettings (OutputSettings (..), optToOutputSettings) -import UnicodeCollation.Lang (Lang (..), parseLang) +import Text.Collate.Lang (Lang (..), parseLang) import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), applyFilters) import Text.Pandoc.PDF (makePDF) import Text.Pandoc.SelfContained (makeSelfContained) diff --git a/src/Text/Pandoc/Citeproc/Data.hs b/src/Text/Pandoc/Citeproc/Data.hs index 388b9ba62..848a83a1e 100644 --- a/src/Text/Pandoc/Citeproc/Data.hs +++ b/src/Text/Pandoc/Citeproc/Data.hs @@ -10,7 +10,7 @@ import qualified Data.Text.Encoding as TE import qualified Data.Text as T import Data.Text (Text) import Text.Pandoc.Citeproc.Util (toIETF) -import UnicodeCollation.Lang (Lang(..), parseLang) +import Text.Collate.Lang (Lang(..), parseLang) biblatexLocalizations :: [(FilePath, ByteString)] biblatexLocalizations = $(embedDir "citeproc/biblatex-localization") diff --git a/src/Text/Pandoc/Class/CommonState.hs b/src/Text/Pandoc/Class/CommonState.hs index 0fd094d99..796a4afd5 100644 --- a/src/Text/Pandoc/Class/CommonState.hs +++ b/src/Text/Pandoc/Class/CommonState.hs @@ -19,7 +19,7 @@ where import Data.Default (Default (def)) import Data.Text (Text) -import UnicodeCollation.Lang (Lang) +import Text.Collate.Lang (Lang) import Text.Pandoc.MediaBag (MediaBag) import Text.Pandoc.Logging (LogMessage, Verbosity (WARNING)) import Text.Pandoc.Translations (Translations) diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index 76f1fa32b..7559cd7cd 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -70,7 +70,7 @@ import Network.URI ( escapeURIString, nonStrictRelativeTo, import System.FilePath ((), (<.>), takeExtension, dropExtension, isRelative, splitDirectories) import System.Random (StdGen) -import UnicodeCollation.Lang (Lang(..), parseLang, renderLang) +import Text.Collate.Lang (Lang(..), parseLang, renderLang) import Text.Pandoc.Class.CommonState (CommonState (..)) import Text.Pandoc.Definition import Text.Pandoc.Error diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 83caf742a..203dab83c 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -33,7 +33,7 @@ import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T import System.FilePath (addExtension, replaceExtension, takeExtension) -import UnicodeCollation.Lang (renderLang) +import Text.Collate.Lang (renderLang) import Text.Pandoc.Builder as B import Text.Pandoc.Class.PandocPure (PandocPure) import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath, diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs index b92e6ab57..6a8327904 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs @@ -23,7 +23,7 @@ import qualified Data.Map as M import Data.Text (Text) import qualified Data.Text as T import Text.Pandoc.Shared (extractSpaces) -import UnicodeCollation.Lang (Lang(..), renderLang) +import Text.Collate.Lang (Lang(..), renderLang) import Text.Pandoc.Class (PandocMonad(..), setTranslations) import Text.Pandoc.Readers.LaTeX.Parsing import Text.Pandoc.Parsing (updateState, option, getState, QuoteContext(..), diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index f352c84bc..f14b1d894 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -21,7 +21,7 @@ import Data.Maybe (mapMaybe) import Data.Text (Text) import qualified Data.Text as T import Network.URI (unEscapeString) -import UnicodeCollation.Lang (Lang(..)) +import Text.Collate.Lang (Lang(..)) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import Text.Pandoc.Definition import Text.Pandoc.ImageSize diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 1f10c9d04..02b141250 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -493,4 +493,4 @@ isSectionAttr DocBook4 ("os",_) = True isSectionAttr DocBook4 ("revision",_) = True isSectionAttr DocBook4 ("security",_) = True isSectionAttr DocBook4 ("vendor",_) = True -isSectionAttr _ (_,_) = False \ No newline at end of file +isSectionAttr _ (_,_) = False diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 7781df8e7..749ad9a21 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -36,7 +36,7 @@ import qualified Data.Text.Lazy as TL import Data.Time.Clock.POSIX import Data.Digest.Pure.SHA (sha1, showDigest) import Skylighting -import UnicodeCollation.Lang (renderLang) +import Text.Collate.Lang (renderLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import qualified Text.Pandoc.Class.PandocMonad as P import Data.Time diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index e99bad738..8b1f3df1d 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -29,7 +29,7 @@ import qualified Data.Text as T import Network.URI (unEscapeString) import Text.DocTemplates (FromContext(lookupContext), renderTemplate, Val(..), Context(..)) -import UnicodeCollation.Lang (Lang (..), renderLang) +import Text.Collate.Lang (Lang (..), renderLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import Text.Pandoc.Definition import Text.Pandoc.Highlighting (formatLaTeXBlock, formatLaTeXInline, highlight, diff --git a/src/Text/Pandoc/Writers/LaTeX/Lang.hs b/src/Text/Pandoc/Writers/LaTeX/Lang.hs index 437b84120..0ba68b74e 100644 --- a/src/Text/Pandoc/Writers/LaTeX/Lang.hs +++ b/src/Text/Pandoc/Writers/LaTeX/Lang.hs @@ -15,7 +15,7 @@ module Text.Pandoc.Writers.LaTeX.Lang toBabel ) where import Data.Text (Text) -import UnicodeCollation.Lang (Lang(..)) +import Text.Collate.Lang (Lang(..)) -- In environments \Arabic instead of \arabic is used diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 6fd4cdeb4..e4eb4fd25 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -24,7 +24,7 @@ import qualified Data.Text as T import qualified Data.Text.Lazy as TL import Data.Time import System.FilePath (takeDirectory, takeExtension, (<.>)) -import UnicodeCollation.Lang (Lang (..), renderLang) +import Text.Collate.Lang (Lang (..), renderLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import qualified Text.Pandoc.Class.PandocMonad as P import Text.Pandoc.Definition diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 6c265090c..34a3a4aa5 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -25,7 +25,7 @@ import Data.Ord (comparing) import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T -import UnicodeCollation.Lang (Lang (..), parseLang) +import Text.Collate.Lang (Lang (..), parseLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, translateTerm, setTranslations, toLang) import Text.Pandoc.Definition diff --git a/stack.yaml b/stack.yaml index 159bf74b9..7bc33fa43 100644 --- a/stack.yaml +++ b/stack.yaml @@ -16,11 +16,10 @@ extra-deps: - texmath-0.12.2 - random-1.2.0 - xml-conduit-1.9.1.1 +- unicode-collation-0.1 # - citeproc-0.3.0.9 - git: https://github.com/jgm/citeproc - commit: b42857be658b8f2649e989e061978e304986f853 -- git: https://github.com/jgm/unicode-collation - commit: 9d229a5c6bcbaf53d7022575234eb223cfa90d55 + commit: f9439e07e9271c7c2674a51efcad2fb8c663b2c8 ghc-options: "$locals": -fhide-source-paths -Wno-missing-home-modules resolver: lts-17.5 diff --git a/test/command/pandoc-citeproc-320a.md b/test/command/pandoc-citeproc-320a.md index 1c3b47de0..79dacfa10 100644 --- a/test/command/pandoc-citeproc-320a.md +++ b/test/command/pandoc-citeproc-320a.md @@ -56,6 +56,10 @@ n.d.; al-'Udhrī, n.d.; Uch, n.d.; Uebel, n.d.; Zzz, n.d.). Uch, Ann. n.d. ::: +::: {#ref-item4 .csl-entry} +'Udhrī, Jamīl al-. n.d. +::: + ::: {#ref-item1 .csl-entry} ʾUdhrī, Jamīl al-. n.d. ::: @@ -68,10 +72,6 @@ Uch, Ann. n.d. \'Udhrī, Jamīl al-. n.d. ::: -::: {#ref-item4 .csl-entry} -'Udhrī, Jamīl al-. n.d. -::: - ::: {#ref-item5 .csl-entry} 'Udhrī, Jamīl al-. n.d. ::: -- cgit v1.2.3 From ddbd984a0d8ea7e75f78ad6632fe3568e2390deb Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 17 May 2021 09:31:52 -0700 Subject: Text.Pandoc.MediaBag: change type to use a Text key... instead of `[FilePath]`. We normalize the path and use `/` separators for consistency. --- src/Text/Pandoc/Class/PandocMonad.hs | 1 + src/Text/Pandoc/MediaBag.hs | 20 +++++++++++++------- 2 files changed, 14 insertions(+), 7 deletions(-) (limited to 'src/Text/Pandoc/Class') diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index 7559cd7cd..226194503 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -1,4 +1,5 @@ {-# LANGUAGE CPP #-} +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs index 3249bcdeb..4a9b4efa1 100644 --- a/src/Text/Pandoc/MediaBag.hs +++ b/src/Text/Pandoc/MediaBag.hs @@ -1,4 +1,5 @@ {-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {- | @@ -27,26 +28,31 @@ import qualified Data.Map as M import Data.Maybe (fromMaybe) import Data.Typeable (Typeable) import System.FilePath -import qualified System.FilePath.Posix as Posix import Text.Pandoc.MIME (MimeType, getMimeTypeDef) +import Data.Text (Text) +import qualified Data.Text as T -- | A container for a collection of binary resources, with names and -- mime types. Note that a 'MediaBag' is a Monoid, so 'mempty' -- can be used for an empty 'MediaBag', and '<>' can be used to append -- two 'MediaBag's. -newtype MediaBag = MediaBag (M.Map [FilePath] (MimeType, BL.ByteString)) +newtype MediaBag = MediaBag (M.Map Text (MimeType, BL.ByteString)) deriving (Semigroup, Monoid, Data, Typeable) instance Show MediaBag where show bag = "MediaBag " ++ show (mediaDirectory bag) +-- | We represent paths with /, in normalized form. +canonicalize :: FilePath -> Text +canonicalize = T.replace "\\" "/" . T.pack . normalise + -- | Delete a media item from a 'MediaBag', or do nothing if no item corresponds -- to the given path. deleteMedia :: FilePath -- ^ relative path and canonical name of resource -> MediaBag -> MediaBag deleteMedia fp (MediaBag mediamap) = - MediaBag $ M.delete (splitDirectories fp) mediamap + MediaBag $ M.delete (canonicalize fp) mediamap -- | Insert a media item into a 'MediaBag', replacing any existing -- value with the same name. @@ -56,7 +62,7 @@ insertMedia :: FilePath -- ^ relative path and canonical name of resource -> MediaBag -> MediaBag insertMedia fp mbMime contents (MediaBag mediamap) = - MediaBag (M.insert (splitDirectories fp) (mime, contents) mediamap) + MediaBag (M.insert (canonicalize fp) (mime, contents) mediamap) where mime = fromMaybe fallback mbMime fallback = case takeExtension fp of ".gz" -> getMimeTypeDef $ dropExtension fp @@ -66,16 +72,16 @@ insertMedia fp mbMime contents (MediaBag mediamap) = lookupMedia :: FilePath -> MediaBag -> Maybe (MimeType, BL.ByteString) -lookupMedia fp (MediaBag mediamap) = M.lookup (splitDirectories fp) mediamap +lookupMedia fp (MediaBag mediamap) = M.lookup (canonicalize fp) mediamap -- | Get a list of the file paths stored in a 'MediaBag', with -- their corresponding mime types and the lengths in bytes of the contents. mediaDirectory :: MediaBag -> [(FilePath, MimeType, Int)] mediaDirectory (MediaBag mediamap) = M.foldrWithKey (\fp (mime,contents) -> - ((Posix.joinPath fp, mime, fromIntegral $ BL.length contents):)) [] mediamap + ((T.unpack fp, mime, fromIntegral (BL.length contents)):)) [] mediamap mediaItems :: MediaBag -> [(FilePath, MimeType, BL.ByteString)] mediaItems (MediaBag mediamap) = M.foldrWithKey (\fp (mime,contents) -> - ((Posix.joinPath fp, mime, contents):)) [] mediamap + ((T.unpack fp, mime, contents):)) [] mediamap -- cgit v1.2.3 From 9b5798bd9abe267f783dab7fe5295c9b61e2fdce Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 17 May 2021 21:22:46 -0700 Subject: Use fetchItem instead of downloadOrRead in fetchMediaResource. --- src/Text/Pandoc/Class/PandocMonad.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text/Pandoc/Class') diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index 226194503..b12850de5 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -635,7 +635,7 @@ withPaths (p:ps) action fp = fetchMediaResource :: PandocMonad m => T.Text -> m (FilePath, Maybe MimeType, BL.ByteString) fetchMediaResource src = do - (bs, mt) <- downloadOrRead src + (bs, mt) <- fetchItem src let ext = fromMaybe (T.pack $ takeExtension $ T.unpack src) (mt >>= extensionFromMimeType) let bs' = BL.fromChunks [bs] -- cgit v1.2.3 From 640dbf8b8f5e652661df42c631b4343570d7448e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 19 May 2021 09:51:50 -0700 Subject: Remove unused pragma. --- src/Text/Pandoc/Class/PandocMonad.hs | 1 - 1 file changed, 1 deletion(-) (limited to 'src/Text/Pandoc/Class') diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index b12850de5..dd6499a73 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -1,5 +1,4 @@ {-# LANGUAGE CPP #-} -{-# LANGUAGE LambdaCase #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} -- cgit v1.2.3 From 8511f6fdf6c9fbc2cc926538bca4ae9f554b4ed9 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 23 May 2021 22:57:02 -0700 Subject: MediaBag improvements. In the current dev version, we will sometimes add a version of an image with a hashed name, keeping the original version with the original name, which would leave to undesirable duplication. This change separates the media's filename from the media's canonical name (which is the path of the link in the document itself). Filenames are based on SHA1 hashes and assigned automatically. In Text.Pandoc.MediaBag: - Export MediaItem type [API change]. - Change MediaBag type to a map from Text to MediaItem [API change]. - `lookupMedia` now returns a `MediaItem` [API change]. - Change `insertMedia` so it sets the `mediaPath` to a filename based on the SHA1 hash of the contents. This will be used when contents are extracted. In Text.Pandoc.Class.PandocMonad: - Remove `fetchMediaResource` [API change]. Lua MediaBag module has been changed minimally. In the future it would be better, probably, to give Lua access to the full MediaItem type. --- src/Text/Pandoc/Class/IO.hs | 9 ++++--- src/Text/Pandoc/Class/PandocMonad.hs | 43 ++++++++++++++-------------------- src/Text/Pandoc/Lua/Module/MediaBag.hs | 6 ++--- src/Text/Pandoc/MediaBag.hs | 35 ++++++++++++++++++++------- test/Tests/Readers/Docx.hs | 10 ++++---- 5 files changed, 55 insertions(+), 48 deletions(-) (limited to 'src/Text/Pandoc/Class') diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs index bb4e2b732..f12c0a938 100644 --- a/src/Text/Pandoc/Class/IO.hs +++ b/src/Text/Pandoc/Class/IO.hs @@ -62,7 +62,7 @@ import Text.Pandoc.Definition (Pandoc, Inline (Image)) import Text.Pandoc.Error (PandocError (..)) import Text.Pandoc.Logging (LogMessage (..), messageVerbosity, showLogMessage) import Text.Pandoc.MIME (MimeType) -import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory) +import Text.Pandoc.MediaBag (MediaBag, MediaItem(..), lookupMedia, mediaDirectory) import Text.Pandoc.Walk (walk) import qualified Control.Exception as E import qualified Data.ByteString as B @@ -213,14 +213,13 @@ writeMedia :: (PandocMonad m, MonadIO m) writeMedia dir mediabag subpath = do -- we join and split to convert a/b/c to a\b\c on Windows; -- in zip containers all paths use / - let fullpath = dir unEscapeString (normalise subpath) let mbcontents = lookupMedia subpath mediabag case mbcontents of Nothing -> throwError $ PandocResourceNotFound $ pack subpath - Just (_, bs) -> do - report $ Extracting $ pack fullpath + Just item -> do + let fullpath = dir mediaPath item liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath) - logIOError $ BL.writeFile fullpath bs + logIOError $ BL.writeFile fullpath $ mediaContents item -- | If the given Inline element is an image with a @src@ path equal to -- one in the list of @paths@, then prepends @dir@ to the image source; diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index dd6499a73..ae6917e06 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -37,7 +37,6 @@ module Text.Pandoc.Class.PandocMonad , setUserDataDir , getUserDataDir , fetchItem - , fetchMediaResource , getInputFiles , setInputFiles , getOutputFile @@ -57,8 +56,6 @@ module Text.Pandoc.Class.PandocMonad import Codec.Archive.Zip import Control.Monad.Except (MonadError (catchError, throwError), MonadTrans, lift, when) -import Data.Digest.Pure.SHA (sha1, showDigest) -import Data.Maybe (fromMaybe) import Data.List (foldl') import Data.Time (UTCTime) import Data.Time.Clock.POSIX (POSIXTime, utcTimeToPOSIXSeconds, @@ -67,7 +64,7 @@ import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime) import Network.URI ( escapeURIString, nonStrictRelativeTo, unEscapeString, parseURIReference, isAllowedInURI, parseURI, URI(..) ) -import System.FilePath ((), (<.>), takeExtension, dropExtension, +import System.FilePath ((), takeExtension, dropExtension, isRelative, splitDirectories) import System.Random (StdGen) import Text.Collate.Lang (Lang(..), parseLang, renderLang) @@ -75,8 +72,8 @@ import Text.Pandoc.Class.CommonState (CommonState (..)) import Text.Pandoc.Definition import Text.Pandoc.Error import Text.Pandoc.Logging -import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType) -import Text.Pandoc.MediaBag (MediaBag, lookupMedia) +import Text.Pandoc.MIME (MimeType, getMimeType) +import Text.Pandoc.MediaBag (MediaBag, lookupMedia, MediaItem(..)) import Text.Pandoc.Shared (uriPathToPath, safeRead) import Text.Pandoc.Translations (Term(..), Translations, lookupTerm, readTranslations) @@ -376,7 +373,8 @@ fetchItem :: PandocMonad m fetchItem s = do mediabag <- getMediaBag case lookupMedia (T.unpack s) mediabag of - Just (mime, bs) -> return (BL.toStrict bs, Just mime) + Just item -> return (BL.toStrict (mediaContents item), + Just (mediaMimeType item)) Nothing -> downloadOrRead s -- | Returns the content and, if available, the MIME type of a resource. @@ -629,19 +627,6 @@ withPaths (p:ps) action fp = catchError (action (p fp)) (\_ -> withPaths ps action fp) --- | Fetch local or remote resource (like an image) and provide data suitable --- for adding it to the MediaBag. -fetchMediaResource :: PandocMonad m - => T.Text -> m (FilePath, Maybe MimeType, BL.ByteString) -fetchMediaResource src = do - (bs, mt) <- fetchItem src - let ext = fromMaybe (T.pack $ takeExtension $ T.unpack src) - (mt >>= extensionFromMimeType) - let bs' = BL.fromChunks [bs] - let basename = showDigest $ sha1 bs' - let fname = basename <.> T.unpack ext - return (fname, mt, bs') - -- | Traverse tree, filling media bag for any images that -- aren't already in the media bag. fillMediaBag :: PandocMonad m => Pandoc -> m Pandoc @@ -649,12 +634,18 @@ fillMediaBag d = walkM handleImage d where handleImage :: PandocMonad m => Inline -> m Inline handleImage (Image attr lab (src, tit)) = catchError (do mediabag <- getMediaBag - case lookupMedia (T.unpack src) mediabag of - Just (_, _) -> return $ Image attr lab (src, tit) - Nothing -> do - (fname, mt, bs) <- fetchMediaResource src - insertMedia fname mt bs - return $ Image attr lab (T.pack fname, tit)) + let fp = T.unpack src + src' <- T.pack <$> case lookupMedia fp mediabag of + Just item -> return $ mediaPath item + Nothing -> do + (bs, mt) <- fetchItem src + insertMedia fp mt (BL.fromStrict bs) + mediabag' <- getMediaBag + case lookupMedia fp mediabag' of + Just item -> return $ mediaPath item + Nothing -> throwError $ PandocSomeError $ + src <> " not successfully inserted into MediaBag" + return $ Image attr lab (src', tit)) (\e -> case e of PandocResourceNotFound _ -> do diff --git a/src/Text/Pandoc/Lua/Module/MediaBag.hs b/src/Text/Pandoc/Lua/Module/MediaBag.hs index 78b699176..3eed50fca 100644 --- a/src/Text/Pandoc/Lua/Module/MediaBag.hs +++ b/src/Text/Pandoc/Lua/Module/MediaBag.hs @@ -73,9 +73,9 @@ lookup fp = do res <- MB.lookupMedia fp <$> getMediaBag liftPandocLua $ case res of Nothing -> 1 <$ Lua.pushnil - Just (mimeType, contents) -> do - Lua.push mimeType - Lua.push contents + Just item -> do + Lua.push $ MB.mediaMimeType item + Lua.push $ MB.mediaContents item return 2 list :: PandocLua NumResults diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs index 4a9b4efa1..a65f315fc 100644 --- a/src/Text/Pandoc/MediaBag.hs +++ b/src/Text/Pandoc/MediaBag.hs @@ -15,6 +15,7 @@ Definition of a MediaBag object to hold binary resources, and an interface for interacting with it. -} module Text.Pandoc.MediaBag ( + MediaItem(..), MediaBag, deleteMedia, lookupMedia, @@ -28,15 +29,23 @@ import qualified Data.Map as M import Data.Maybe (fromMaybe) import Data.Typeable (Typeable) import System.FilePath -import Text.Pandoc.MIME (MimeType, getMimeTypeDef) +import Text.Pandoc.MIME (MimeType, getMimeTypeDef, extensionFromMimeType) import Data.Text (Text) import qualified Data.Text as T +import Data.Digest.Pure.SHA (sha1, showDigest) + +data MediaItem = + MediaItem + { mediaMimeType :: MimeType + , mediaPath :: FilePath + , mediaContents :: BL.ByteString + } deriving (Eq, Ord, Show, Data, Typeable) -- | A container for a collection of binary resources, with names and -- mime types. Note that a 'MediaBag' is a Monoid, so 'mempty' -- can be used for an empty 'MediaBag', and '<>' can be used to append -- two 'MediaBag's. -newtype MediaBag = MediaBag (M.Map Text (MimeType, BL.ByteString)) +newtype MediaBag = MediaBag (M.Map Text MediaItem) deriving (Semigroup, Monoid, Data, Typeable) instance Show MediaBag where @@ -62,26 +71,34 @@ insertMedia :: FilePath -- ^ relative path and canonical name of resource -> MediaBag -> MediaBag insertMedia fp mbMime contents (MediaBag mediamap) = - MediaBag (M.insert (canonicalize fp) (mime, contents) mediamap) - where mime = fromMaybe fallback mbMime + MediaBag (M.insert (canonicalize fp) mediaItem mediamap) + where mediaItem = MediaItem{ mediaPath = showDigest (sha1 contents) <> + "." <> ext + , mediaContents = contents + , mediaMimeType = mt } fallback = case takeExtension fp of ".gz" -> getMimeTypeDef $ dropExtension fp _ -> getMimeTypeDef fp + mt = fromMaybe fallback mbMime + ext = maybe (takeExtension fp) T.unpack $ extensionFromMimeType mt + -- | Lookup a media item in a 'MediaBag', returning mime type and contents. lookupMedia :: FilePath -> MediaBag - -> Maybe (MimeType, BL.ByteString) + -> Maybe MediaItem lookupMedia fp (MediaBag mediamap) = M.lookup (canonicalize fp) mediamap -- | Get a list of the file paths stored in a 'MediaBag', with -- their corresponding mime types and the lengths in bytes of the contents. mediaDirectory :: MediaBag -> [(FilePath, MimeType, Int)] mediaDirectory (MediaBag mediamap) = - M.foldrWithKey (\fp (mime,contents) -> - ((T.unpack fp, mime, fromIntegral (BL.length contents)):)) [] mediamap + M.foldrWithKey (\fp item -> + ((T.unpack fp, mediaMimeType item, + fromIntegral (BL.length (mediaContents item))):)) [] mediamap mediaItems :: MediaBag -> [(FilePath, MimeType, BL.ByteString)] mediaItems (MediaBag mediamap) = - M.foldrWithKey (\fp (mime,contents) -> - ((T.unpack fp, mime, contents):)) [] mediamap + M.foldrWithKey (\fp item -> + ((T.unpack fp, mediaMimeType item, mediaContents item):)) + [] mediamap diff --git a/test/Tests/Readers/Docx.hs b/test/Tests/Readers/Docx.hs index 2cce70cc5..939ff9939 100644 --- a/test/Tests/Readers/Docx.hs +++ b/test/Tests/Readers/Docx.hs @@ -24,7 +24,7 @@ import Test.Tasty.HUnit import Tests.Helpers import Text.Pandoc import qualified Text.Pandoc.Class as P -import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory) +import qualified Text.Pandoc.MediaBag as MB import Text.Pandoc.UTF8 as UTF8 -- We define a wrapper around pandoc that doesn't normalize in the @@ -91,11 +91,11 @@ getMedia :: FilePath -> FilePath -> IO (Maybe B.ByteString) getMedia archivePath mediaPath = fmap fromEntry . findEntryByPath ("word/" ++ mediaPath) . toArchive <$> B.readFile archivePath -compareMediaPathIO :: FilePath -> MediaBag -> FilePath -> IO Bool +compareMediaPathIO :: FilePath -> MB.MediaBag -> FilePath -> IO Bool compareMediaPathIO mediaPath mediaBag docxPath = do docxMedia <- getMedia docxPath mediaPath - let mbBS = case lookupMedia mediaPath mediaBag of - Just (_, bs) -> bs + let mbBS = case MB.lookupMedia mediaPath mediaBag of + Just item -> MB.mediaContents item Nothing -> error ("couldn't find " ++ mediaPath ++ " in media bag") @@ -110,7 +110,7 @@ compareMediaBagIO docxFile = do mb <- runIOorExplode $ readDocx defopts df >> P.getMediaBag bools <- mapM (\(fp, _, _) -> compareMediaPathIO fp mb docxFile) - (mediaDirectory mb) + (MB.mediaDirectory mb) return $ and bools testMediaBagIO :: String -> FilePath -> IO TestTree -- cgit v1.2.3 From f2c1b5746912db945be780961b6503e38c3c7e1e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 25 May 2021 10:08:30 -0700 Subject: PandocMonad: add info message in `downloadOrRead`... indicating what path local resources have been loaded from. --- src/Text/Pandoc/Class/PandocMonad.hs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'src/Text/Pandoc/Class') diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index ae6917e06..b5f401619 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -1,4 +1,5 @@ {-# LANGUAGE CPP #-} +{-# LANGUAGE TupleSections #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} @@ -409,9 +410,10 @@ downloadOrRead s = do _ -> readLocalFile fp -- get from local file system where readLocalFile f = do resourcePath <- getResourcePath - cont <- if isRelative f - then withPaths resourcePath readFileStrict f - else readFileStrict f + (fp', cont) <- if isRelative f + then withPaths resourcePath readFileStrict f + else (f,) <$> readFileStrict f + report $ LoadedResource f fp' return (cont, mime) httpcolon = URI{ uriScheme = "http:", uriAuthority = Nothing, @@ -621,10 +623,11 @@ makeCanonical = Posix.joinPath . transformPathParts . splitDirectories -- that filepath. Returns the result of the first successful execution -- of the action, or throws a @PandocResourceNotFound@ exception if the -- action errors for all filepaths. -withPaths :: PandocMonad m => [FilePath] -> (FilePath -> m a) -> FilePath -> m a +withPaths :: PandocMonad m + => [FilePath] -> (FilePath -> m a) -> FilePath -> m (FilePath, a) withPaths [] _ fp = throwError $ PandocResourceNotFound $ T.pack fp withPaths (p:ps) action fp = - catchError (action (p fp)) + catchError ((p fp,) <$> action (p fp)) (\_ -> withPaths ps action fp) -- | Traverse tree, filling media bag for any images that -- cgit v1.2.3 From cc206af392a40dd7b01b714ae7f33b2fbf4925cc Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 30 May 2021 10:22:02 -0700 Subject: Have LoadedResource use relative paths. The immediate reason for this is to allow the test output of #3752 to work on both windows and linux. --- src/Text/Pandoc/Class/PandocMonad.hs | 4 ++-- test/command/3752.md | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'src/Text/Pandoc/Class') diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index b5f401619..4eb80df29 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -66,7 +66,7 @@ import Network.URI ( escapeURIString, nonStrictRelativeTo, unEscapeString, parseURIReference, isAllowedInURI, parseURI, URI(..) ) import System.FilePath ((), takeExtension, dropExtension, - isRelative, splitDirectories) + isRelative, splitDirectories, makeRelative) import System.Random (StdGen) import Text.Collate.Lang (Lang(..), parseLang, renderLang) import Text.Pandoc.Class.CommonState (CommonState (..)) @@ -413,7 +413,7 @@ downloadOrRead s = do (fp', cont) <- if isRelative f then withPaths resourcePath readFileStrict f else (f,) <$> readFileStrict f - report $ LoadedResource f fp' + report $ LoadedResource f (makeRelative "." fp') return (cont, mime) httpcolon = URI{ uriScheme = "http:", uriAuthority = Nothing, diff --git a/test/command/3752.md b/test/command/3752.md index 6ac025ebe..863e3f2d4 100644 --- a/test/command/3752.md +++ b/test/command/3752.md @@ -1,9 +1,9 @@ ``` % pandoc command/chap1/text.md command/chap2/text.md -f markdown+rebase_relative_paths --verbose -t docx -o - | pandoc -f docx -t plain ^D -[INFO] Loaded command/chap1/spider.png from ./command/chap1/spider.png -[INFO] Loaded command/chap2/spider.png from ./command/chap2/spider.png -[INFO] Loaded command/chap1/../../lalune.jpg from ./command/chap1/../../lalune.jpg +[INFO] Loaded command/chap1/spider.png from command/chap1/spider.png +[INFO] Loaded command/chap2/spider.png from command/chap2/spider.png +[INFO] Loaded command/chap1/../../lalune.jpg from command/chap1/../../lalune.jpg Chapter one A spider: [spider] -- cgit v1.2.3 From b6c04383e403b0962db09e6748760d3ec376f2ed Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 3 Jun 2021 18:34:38 -0600 Subject: T.P.Class.IO: normalise path in writeMedia. This ensures that we get `\` separators on Windows. --- src/Text/Pandoc/Class/IO.hs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/Text/Pandoc/Class') diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs index f12c0a938..6df39d4d0 100644 --- a/src/Text/Pandoc/Class/IO.hs +++ b/src/Text/Pandoc/Class/IO.hs @@ -211,13 +211,12 @@ writeMedia :: (PandocMonad m, MonadIO m) => FilePath -> MediaBag -> FilePath -> m () writeMedia dir mediabag subpath = do - -- we join and split to convert a/b/c to a\b\c on Windows; - -- in zip containers all paths use / let mbcontents = lookupMedia subpath mediabag case mbcontents of Nothing -> throwError $ PandocResourceNotFound $ pack subpath Just item -> do - let fullpath = dir mediaPath item + -- we normalize to get proper path separators for the platform + let fullpath = dir normalise (mediaPath item) liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath) logIOError $ BL.writeFile fullpath $ mediaContents item -- cgit v1.2.3 From 3776e828a83048697e5c64d9fb4bedc0145197dc Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 10 Jun 2021 16:47:02 -0700 Subject: Fix MediaBag regressions. With the 2.14 release `--extract-media` stopped working as before; there could be mismatches between the paths in the rendered document and the extracted media. This patch makes several changes (while keeping the same API). The `mediaPath` in 2.14 was always constructed from the SHA1 hash of the media contents. Now, we preserve the original path unless it's an absolute path or contains `..` segments (in that case we use a path based on the SHA1 hash of the contents). When constructing a path from the SHA1 hash, we always use the original extension, if there is one. Otherwise we look up an appropriate extension for the mime type. `mediaDirectory` and `mediaItems` now use the `mediaPath`, rather than the mediabag key, for the first component of the tuple. This makes more sense, I think, and fits with the documentation of these functions; eventually, though, we should rework the API so that `mediaItems` returns both the keys and the MediaItems. Rewriting of source paths in `extractMedia` has been fixed. `fillMediaBag` has been modified so that it doesn't modify image paths (that was part of the problem in #7345). We now do path normalization (e.g. `\` separators on Windows) only in writing the media; the paths are left unchanged in the image links (sensibly, since they might be URLs and not file paths). These changes should restore the original behavior from before 2.14. Closes #7345. --- MANUAL.txt | 12 +++++------ src/Text/Pandoc/Class/IO.hs | 41 ++++++++++++++++++------------------ src/Text/Pandoc/Class/PandocMonad.hs | 17 ++++++--------- src/Text/Pandoc/MediaBag.hs | 25 ++++++++++++---------- 4 files changed, 47 insertions(+), 48 deletions(-) (limited to 'src/Text/Pandoc/Class') diff --git a/MANUAL.txt b/MANUAL.txt index b3a1f95e2..ef569433a 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -675,12 +675,12 @@ header when requesting a document from a URL: : Extract images and other media contained in or linked from the source document to the path *DIR*, creating it if necessary, and adjust the images references in the document - so they point to the extracted files. If the source format is - a binary container (docx, epub, or odt), the media is - extracted from the container and the original - filenames are used. Otherwise the media is read from the - file system or downloaded, and new filenames are constructed - based on SHA1 hashes of the contents. + so they point to the extracted files. Media are downloaded, + read from the file system, or extracted from a binary + container (e.g. docx), as needed. The original file paths + are used if they are relative paths not containing `..`. + Otherwise filenames are constructed from the SHA1 hash of + the contents. `--abbreviations=`*FILE* diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs index 6df39d4d0..169074860 100644 --- a/src/Text/Pandoc/Class/IO.hs +++ b/src/Text/Pandoc/Class/IO.hs @@ -62,7 +62,7 @@ import Text.Pandoc.Definition (Pandoc, Inline (Image)) import Text.Pandoc.Error (PandocError (..)) import Text.Pandoc.Logging (LogMessage (..), messageVerbosity, showLogMessage) import Text.Pandoc.MIME (MimeType) -import Text.Pandoc.MediaBag (MediaBag, MediaItem(..), lookupMedia, mediaDirectory) +import Text.Pandoc.MediaBag (MediaBag, MediaItem(..), lookupMedia, mediaItems) import Text.Pandoc.Walk (walk) import qualified Control.Exception as E import qualified Data.ByteString as B @@ -200,31 +200,32 @@ alertIndent (l:ls) = do extractMedia :: (PandocMonad m, MonadIO m) => FilePath -> Pandoc -> m Pandoc extractMedia dir d = do media <- getMediaBag - case [fp | (fp, _, _) <- mediaDirectory media] of - [] -> return d - fps -> do - mapM_ (writeMedia dir media) fps - return $ walk (adjustImagePath dir fps) d + let items = mediaItems media + if null items + then return d + else do + mapM_ (writeMedia dir) items + return $ walk (adjustImagePath dir media) d -- | Write the contents of a media bag to a path. writeMedia :: (PandocMonad m, MonadIO m) - => FilePath -> MediaBag -> FilePath + => FilePath + -> (FilePath, MimeType, BL.ByteString) -> m () -writeMedia dir mediabag subpath = do - let mbcontents = lookupMedia subpath mediabag - case mbcontents of - Nothing -> throwError $ PandocResourceNotFound $ pack subpath - Just item -> do - -- we normalize to get proper path separators for the platform - let fullpath = dir normalise (mediaPath item) - liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath) - logIOError $ BL.writeFile fullpath $ mediaContents item +writeMedia dir (fp, _mt, bs) = do + -- we normalize to get proper path separators for the platform + let fullpath = normalise $ dir fp + liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath) + logIOError $ BL.writeFile fullpath bs -- | If the given Inline element is an image with a @src@ path equal to -- one in the list of @paths@, then prepends @dir@ to the image source; -- returns the element unchanged otherwise. -adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline -adjustImagePath dir paths (Image attr lab (src, tit)) - | unpack src `elem` paths - = Image attr lab (pack (normalise $ dir unpack src), tit) +adjustImagePath :: FilePath -> MediaBag -> Inline -> Inline +adjustImagePath dir mediabag (Image attr lab (src, tit)) = + case lookupMedia (T.unpack src) mediabag of + Nothing -> Image attr lab (src, tit) + Just item -> + let fullpath = dir mediaPath item + in Image attr lab (T.pack fullpath, tit) adjustImagePath _ _ x = x diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index 4eb80df29..439aec071 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -638,17 +638,12 @@ fillMediaBag d = walkM handleImage d handleImage (Image attr lab (src, tit)) = catchError (do mediabag <- getMediaBag let fp = T.unpack src - src' <- T.pack <$> case lookupMedia fp mediabag of - Just item -> return $ mediaPath item - Nothing -> do - (bs, mt) <- fetchItem src - insertMedia fp mt (BL.fromStrict bs) - mediabag' <- getMediaBag - case lookupMedia fp mediabag' of - Just item -> return $ mediaPath item - Nothing -> throwError $ PandocSomeError $ - src <> " not successfully inserted into MediaBag" - return $ Image attr lab (src', tit)) + case lookupMedia fp mediabag of + Just _ -> return () + Nothing -> do + (bs, mt) <- fetchItem src + insertMedia fp mt (BL.fromStrict bs) + return $ Image attr lab (src, tit)) (\e -> case e of PandocResourceNotFound _ -> do diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs index a65f315fc..06fba5632 100644 --- a/src/Text/Pandoc/MediaBag.hs +++ b/src/Text/Pandoc/MediaBag.hs @@ -71,16 +71,21 @@ insertMedia :: FilePath -- ^ relative path and canonical name of resource -> MediaBag -> MediaBag insertMedia fp mbMime contents (MediaBag mediamap) = - MediaBag (M.insert (canonicalize fp) mediaItem mediamap) - where mediaItem = MediaItem{ mediaPath = showDigest (sha1 contents) <> - "." <> ext + MediaBag (M.insert fp' mediaItem mediamap) + where mediaItem = MediaItem{ mediaPath = newpath , mediaContents = contents , mediaMimeType = mt } + fp' = canonicalize fp + newpath = if isRelative fp && ".." `notElem` splitPath fp + then T.unpack fp' + else showDigest (sha1 contents) <> "." <> ext fallback = case takeExtension fp of ".gz" -> getMimeTypeDef $ dropExtension fp _ -> getMimeTypeDef fp mt = fromMaybe fallback mbMime - ext = maybe (takeExtension fp) T.unpack $ extensionFromMimeType mt + ext = case takeExtension fp of + '.':e -> e + _ -> maybe "" T.unpack $ extensionFromMimeType mt -- | Lookup a media item in a 'MediaBag', returning mime type and contents. @@ -92,13 +97,11 @@ lookupMedia fp (MediaBag mediamap) = M.lookup (canonicalize fp) mediamap -- | Get a list of the file paths stored in a 'MediaBag', with -- their corresponding mime types and the lengths in bytes of the contents. mediaDirectory :: MediaBag -> [(FilePath, MimeType, Int)] -mediaDirectory (MediaBag mediamap) = - M.foldrWithKey (\fp item -> - ((T.unpack fp, mediaMimeType item, - fromIntegral (BL.length (mediaContents item))):)) [] mediamap +mediaDirectory mediabag = + map (\(fp, mt, bs) -> (fp, mt, fromIntegral (BL.length bs))) + (mediaItems mediabag) mediaItems :: MediaBag -> [(FilePath, MimeType, BL.ByteString)] mediaItems (MediaBag mediamap) = - M.foldrWithKey (\fp item -> - ((T.unpack fp, mediaMimeType item, mediaContents item):)) - [] mediamap + map (\item -> (mediaPath item, mediaMimeType item, mediaContents item)) + (M.elems mediamap) -- cgit v1.2.3 From 477a67061f06827b7e807319404cc277a417e9d0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 9 Jul 2021 14:14:19 -0700 Subject: Always use / when adding directory to image path with extractMedia. Even on Windows. May help with #7431. --- src/Text/Pandoc/Class/IO.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text/Pandoc/Class') diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs index 169074860..f4cfc8682 100644 --- a/src/Text/Pandoc/Class/IO.hs +++ b/src/Text/Pandoc/Class/IO.hs @@ -226,6 +226,6 @@ adjustImagePath dir mediabag (Image attr lab (src, tit)) = case lookupMedia (T.unpack src) mediabag of Nothing -> Image attr lab (src, tit) Just item -> - let fullpath = dir mediaPath item + let fullpath = dir <> "/" <> mediaPath item in Image attr lab (T.pack fullpath, tit) adjustImagePath _ _ x = x -- cgit v1.2.3