diff options
author | despresc <christian.j.j.despres@gmail.com> | 2019-11-04 16:12:37 -0500 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2019-11-12 16:03:45 -0800 |
commit | 90e436d49604e3fd1ef9432fb23f6d7f6245c7fd (patch) | |
tree | 4e7f0692f989643189f1fc6786050d95e239a0ea | |
parent | d3966372f5049eea56213b069fc4d70d8af9144c (diff) | |
download | pandoc-90e436d49604e3fd1ef9432fb23f6d7f6245c7fd.tar.gz |
Switch to new pandoc-types and use Text instead of String [API change].
PR #5884.
+ Use pandoc-types 1.20 and texmath 0.12.
+ Text is now used instead of String, with a few exceptions.
+ In the MediaBag module, some of the types using Strings
were switched to use FilePath instead (not Text).
+ In the Parsing module, new parsers `manyChar`, `many1Char`,
`manyTillChar`, `many1TillChar`, `many1Till`, `manyUntil`,
`mantyUntilChar` have been added: these are like their
unsuffixed counterparts but pack some or all of their output.
+ `glob` in Text.Pandoc.Class still takes String since it seems
to be intended as an interface to Glob, which uses strings.
It seems to be used only once in the package, in the EPUB writer,
so that is not hard to change.
143 files changed, 6368 insertions, 5981 deletions
diff --git a/benchmark/benchmark-pandoc.hs b/benchmark/benchmark-pandoc.hs index 8968cad3e..967728f5d 100644 --- a/benchmark/benchmark-pandoc.hs +++ b/benchmark/benchmark-pandoc.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TupleSections #-} {- Copyright (C) 2012-2019 John MacFarlane <jgm@berkeley.edu> @@ -24,6 +25,7 @@ import Text.Pandoc.Error (PandocError(..)) import Control.Monad.Except (throwError) import qualified Text.Pandoc.UTF8 as UTF8 import qualified Data.ByteString as B +import qualified Data.Text as T import Criterion.Main import Criterion.Types (Config(..)) import Data.List (intersect) @@ -32,12 +34,12 @@ import System.Environment (getArgs) import qualified Data.ByteString.Lazy as BL readerBench :: Pandoc - -> String + -> T.Text -> Maybe Benchmark readerBench doc name = case res of Right (readerFun, inp) -> - Just $ bench (name ++ " reader") + Just $ bench (T.unpack $ name <> " reader") $ nf (\i -> either (error . show) id $ runPure (readerFun i)) inp Left _ -> Nothing @@ -51,7 +53,7 @@ readerBench doc name = , writerExtensions = wexts } doc return $ (r def{ readerExtensions = rexts }, inp) _ -> throwError $ PandocSomeError $ "not a text format: " - ++ name + <> name getImages :: IO [(FilePath, MimeType, BL.ByteString)] getImages = do @@ -61,13 +63,13 @@ getImages = do ,("movie.jpg", "image/jpg", mv)] writerBench :: Pandoc - -> String + -> T.Text -> Maybe Benchmark writerBench doc name = case res of Right writerFun -> Just $ env getImages $ \imgs -> - bench (name ++ " writer") + bench (T.unpack $ name <> " writer") $ nf (\d -> either (error . show) id $ runPure (do mapM_ (\(fp, mt, bs) -> @@ -81,11 +83,11 @@ writerBench doc name = TextWriter w -> return $ w def{ writerExtensions = wexts } _ -> throwError $ PandocSomeError - $ "could not get text writer for " ++ name + $ "could not get text writer for " <> name main :: IO () main = do - args <- filter (\x -> take 1 x /= "-") <$> getArgs + args <- filter (\x -> T.take 1 x /= "-") . fmap T.pack <$> getArgs print args let matchReader (n, TextReader _) = null args || ("reader" `elem` args && n `elem` args) @@ -94,9 +96,9 @@ main = do null args || ("writer" `elem` args && n `elem` args) matchWriter _ = False let matchedReaders = map fst $ (filter matchReader readers - :: [(String, Reader PandocPure)]) + :: [(T.Text, Reader PandocPure)]) let matchedWriters = map fst $ (filter matchWriter writers - :: [(String, Writer PandocPure)]) + :: [(T.Text, Writer PandocPure)]) inp <- UTF8.toText <$> B.readFile "test/testsuite.txt" let opts = def let doc = either (error . show) id $ runPure $ readMarkdown opts inp diff --git a/benchmark/weigh-pandoc.hs b/benchmark/weigh-pandoc.hs index f633255df..3b8a414aa 100644 --- a/benchmark/weigh-pandoc.hs +++ b/benchmark/weigh-pandoc.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Main Copyright : © 2016-2019 John MacFarlane <jgm@berkeley.edu> @@ -13,7 +14,7 @@ Benchmarks to determine resource use of readers and writers. import Prelude import Weigh import Text.Pandoc -import Data.Text (Text) +import Data.Text (Text, unpack) main :: IO () main = do @@ -40,12 +41,12 @@ main = do weighWriter :: Pandoc -> String -> (Pandoc -> Text) -> Weigh () weighWriter doc name writer = func (name ++ " writer") writer doc -weighReader :: Pandoc -> String -> (Text -> Pandoc) -> Weigh () +weighReader :: Pandoc -> Text -> (Text -> Pandoc) -> Weigh () weighReader doc name reader = do case lookup name writers of Just (TextWriter writer) -> let inp = either (error . show) id $ runPure $ writer def{ writerWrapText = WrapAuto} doc - in func (name ++ " reader") reader inp + in func (unpack $ name <> " reader") reader inp _ -> return () -- no writer for reader diff --git a/cabal.project b/cabal.project index f022522ce..d2a7df9c6 100644 --- a/cabal.project +++ b/cabal.project @@ -11,9 +11,4 @@ package pandoc-citeproc source-repository-package type: git location: https://github.com/jgm/pandoc-citeproc - tag: 0.16.3.1 - -source-repository-package - type: git - location: https://github.com/jgm/pandoc-types - tag: 00f7bb79e79d7cfd3523880dbc64ba3ea46c3da2 + tag: dc09b028d6876df81cd76b731e58886f77f269b1 diff --git a/pandoc.cabal b/pandoc.cabal index 67357eae6..d25446779 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -381,11 +381,11 @@ library safe >= 0.3 && < 0.4, zip-archive >= 0.2.3.4 && < 0.5, HTTP >= 4000.0.5 && < 4000.4, - texmath >= 0.11.3 && < 0.12, + texmath >= 0.12 && < 0.13, xml >= 1.3.12 && < 1.4, split >= 0.2 && < 0.3, random >= 1 && < 1.2, - pandoc-types >= 1.17.6 && < 1.18, + pandoc-types >= 1.20 && < 1.21, aeson >= 0.7 && < 1.5, scientific >= 0.3 && < 0.4, aeson-pretty >= 0.8.5 && < 0.9, @@ -705,7 +705,7 @@ test-suite test-pandoc hs-source-dirs: test build-depends: base >= 4.8 && < 5, pandoc, - pandoc-types >= 1.17.6 && < 1.18, + pandoc-types >= 1.20 && < 1.21, mtl >= 2.2 && < 2.3, bytestring >= 0.9 && < 0.11, base64-bytestring >= 0.1 && < 1.1, diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 0d34eca11..ecbdeecd8 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -1,4 +1,5 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE CPP #-} {-# LANGUAGE ScopedTypeVariables #-} {- | @@ -58,7 +59,7 @@ import Text.Pandoc.Readers.Markdown (yamlToMeta) import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained) import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs, headerShift, isURI, tabFilter, uriPathToPath, filterIpynbOutput, - defaultUserDataDirs) + defaultUserDataDirs, tshow) import Text.Pandoc.Writers.Shared (lookupMetaString) import qualified Text.Pandoc.UTF8 as UTF8 #ifndef _WINDOWS @@ -66,7 +67,6 @@ import System.Posix.IO (stdOutput) import System.Posix.Terminal (queryTerminal) #endif - convertWithOpts :: Opt -> IO () convertWithOpts opts = do let outputFile = fromMaybe "-" (optOutputFile opts) @@ -141,17 +141,17 @@ convertWithOpts opts = do Nothing -> case formatFromFilePaths sources of Just f' -> return f' Nothing | sources == ["-"] -> return "markdown" - | any isURI sources -> return "html" + | any (isURI . T.pack) sources -> return "html" | otherwise -> do report $ CouldNotDeduceFormat - (map takeExtension sources) "markdown" + (map (T.pack . takeExtension) sources) "markdown" return "markdown" let pdfOutput = map toLower (takeExtension outputFile) == ".pdf" when (pdfOutput && readerName == "latex") $ case (optInputFiles opts) of - (inputFile:_) -> report $ UnusualConversion $ + (inputFile:_) -> report $ UnusualConversion $ T.pack $ "to convert a .tex file to PDF, you get better results by using pdflatex " <> "(or lualatex or xelatex) directly, try `pdflatex " <> inputFile <> "` instead of `pandoc " <> inputFile <> " -o " <> outputFile <> "`." @@ -187,15 +187,15 @@ convertWithOpts opts = do when ((pdfOutput || not (isTextFormat format)) && istty && isNothing ( optOutputFile opts)) $ throwError $ PandocAppError $ - "Cannot write " ++ format ++ " output to terminal.\n" ++ - "Specify an output file using the -o option, or " ++ + "Cannot write " <> format <> " output to terminal.\n" <> + "Specify an output file using the -o option, or " <> "use '-o -' to force output to stdout." - abbrevs <- Set.fromList . filter (not . null) . lines <$> + abbrevs <- Set.fromList . filter (not . T.null) . T.lines <$> case optAbbreviations opts of - Nothing -> UTF8.toString <$> readDataFile "abbreviations" - Just f -> UTF8.toString <$> readFileStrict f + Nothing -> UTF8.toText <$> readDataFile "abbreviations" + Just f -> UTF8.toText <$> readFileStrict f metadata <- if format == "jats" && isNothing (lookupMeta "csl" (optMetadata opts)) && @@ -285,7 +285,7 @@ convertWithOpts opts = do >=> return . adjustMetadata (metadataFromFile <>) >=> return . adjustMetadata (<> metadata) >=> applyTransforms transforms - >=> applyFilters readerOpts filters' [format] + >=> applyFilters readerOpts filters' [T.unpack format] >=> maybe return extractMedia (optExtractMedia opts) ) @@ -298,7 +298,7 @@ convertWithOpts opts = do case res of Right pdf -> writeFnBinary outputFile pdf Left err' -> throwError $ PandocPDFError $ - TL.unpack (TE.decodeUtf8With TE.lenientDecode err') + TL.toStrict (TE.decodeUtf8With TE.lenientDecode err') Nothing -> do let ensureNl t @@ -308,18 +308,16 @@ convertWithOpts opts = do output <- ensureNl <$> f writerOptions doc writerFn eol outputFile =<< if optSelfContained opts && htmlFormat format - -- TODO not maximally efficient; change type - -- of makeSelfContained so it works w/ Text - then T.pack <$> makeSelfContained (T.unpack output) + then makeSelfContained output else return output type Transform = Pandoc -> Pandoc -htmlFormat :: String -> Bool +htmlFormat :: Text -> Bool htmlFormat = (`elem` ["html","html4","html5","s5","slidy", "slideous","dzslides","revealjs"]) -isTextFormat :: String -> Bool +isTextFormat :: Text -> Bool isTextFormat s = s `notElem` ["odt","docx","epub2","epub3","epub","pptx"] adjustMetadata :: (Meta -> Meta) -> Pandoc -> Pandoc @@ -335,7 +333,7 @@ readSource src = case parseURI src of Just u | uriScheme u `elem` ["http:","https:"] -> readURI src | uriScheme u == "file:" -> liftIO $ - readTextFile (uriPathToPath $ uriPath u) + readTextFile (uriPathToPath $ T.pack $ uriPath u) _ -> liftIO $ readTextFile src where readTextFile :: FilePath -> IO Text readTextFile fp = do @@ -347,12 +345,12 @@ readSource src = case parseURI src of TSE.DecodeError _ (Just w) -> do case BS.elemIndex w bs of Just offset -> E.throwIO $ - PandocUTF8DecodingError fp offset w - _ -> E.throwIO $ PandocUTF8DecodingError fp 0 w - _ -> E.throwIO $ PandocAppError (show e)) + PandocUTF8DecodingError (T.pack fp) offset w + _ -> E.throwIO $ PandocUTF8DecodingError (T.pack fp) 0 w + _ -> E.throwIO $ PandocAppError (tshow e)) readURI :: FilePath -> PandocIO Text -readURI src = UTF8.toText . fst <$> openURL src +readURI src = UTF8.toText . fst <$> openURL (T.pack src) readFile' :: MonadIO m => FilePath -> m BL.ByteString readFile' "-" = liftIO BL.getContents diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs index c6f88af24..56b1f780a 100644 --- a/src/Text/Pandoc/App/CommandLineOptions.hs +++ b/src/Text/Pandoc/App/CommandLineOptions.hs @@ -2,6 +2,7 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.App.CommandLineOptions Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -45,7 +46,7 @@ import Text.Pandoc.Builder (setMeta) import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), IpynbOutput (..)) import Text.Pandoc.Filter (Filter (..)) import Text.Pandoc.Highlighting (highlightingStyles) -import Text.Pandoc.Shared (ordNub, safeRead, defaultUserDataDirs) +import Text.Pandoc.Shared (ordNub, elemText, safeStrRead, defaultUserDataDirs) import Text.Printf #ifdef EMBED_DATA_FILES @@ -78,7 +79,7 @@ parseOptions options' defaults = do unrecognizedOpts unless (null errors && null unknownOptionErrors) $ - E.throwIO $ PandocOptionError $ + E.throwIO $ PandocOptionError $ T.pack $ concat errors ++ unlines unknownOptionErrors ++ ("Try " ++ prg ++ " --help for more information.") @@ -92,7 +93,7 @@ latexEngines = ["pdflatex", "lualatex", "xelatex", "latexmk", "tectonic"] htmlEngines :: [String] htmlEngines = ["wkhtmltopdf", "weasyprint", "prince"] -engines :: [(String, String)] +engines :: [(Text, String)] engines = map ("html",) htmlEngines ++ map ("html5",) htmlEngines ++ map ("latex",) latexEngines ++ @@ -119,13 +120,13 @@ options = [ Option "fr" ["from","read"] (ReqArg (\arg opt -> return opt { optFrom = - Just (map toLower arg) }) + Just (T.toLower $ T.pack arg) }) "FORMAT") "" , Option "tw" ["to","write"] (ReqArg - (\arg opt -> return opt { optTo = Just arg }) + (\arg opt -> return opt { optTo = Just $ T.pack arg }) "FORMAT") "" @@ -218,7 +219,7 @@ options = , Option "" ["toc-depth"] (ReqArg (\arg opt -> - case safeRead arg of + case safeStrRead arg of Just t | t >= 1 && t <= 6 -> return opt { optTOCDepth = t } _ -> E.throwIO $ PandocOptionError @@ -234,7 +235,7 @@ options = , Option "" ["number-offset"] (ReqArg (\arg opt -> - case safeRead ('[':arg ++ "]") of + case safeStrRead ("[" <> arg <> "]") of Just ns -> return opt { optNumberOffset = ns, optNumberSections = True } _ -> E.throwIO $ PandocOptionError @@ -255,7 +256,7 @@ options = "default" -> return opt{ optTopLevelDivision = TopLevelDefault } _ -> E.throwIO $ PandocOptionError $ - "Top-level division must be " ++ + "Top-level division must be " <> "section, chapter, part, or default" ) "section|chapter|part") "" -- "Use top-level division type in LaTeX, ConTeXt, DocBook" @@ -307,7 +308,7 @@ options = , Option "" ["highlight-style"] (ReqArg (\arg opt -> - return opt{ optHighlightStyle = Just arg }) + return opt{ optHighlightStyle = Just $ T.pack arg }) "STYLE|FILE") "" -- "Style for highlighted code" @@ -328,7 +329,7 @@ options = , Option "" ["dpi"] (ReqArg (\arg opt -> - case safeRead arg of + case safeStrRead arg of Just t | t > 0 -> return opt { optDpi = t } _ -> E.throwIO $ PandocOptionError "dpi must be a number greater than 0") @@ -351,7 +352,7 @@ options = , Option "" ["columns"] (ReqArg (\arg opt -> - case safeRead arg of + case safeStrRead arg of Just t | t > 0 -> return opt { optColumns = t } _ -> E.throwIO $ PandocOptionError "columns must be a number greater than 0") @@ -366,7 +367,7 @@ options = , Option "" ["tab-stop"] (ReqArg (\arg opt -> - case safeRead arg of + case safeStrRead arg of Just t | t > 0 -> return opt { optTabStop = t } _ -> E.throwIO $ PandocOptionError "tab-stop must be a number greater than 0") @@ -379,7 +380,7 @@ options = let b = takeBaseName arg if b `elem` pdfEngines then return opt { optPdfEngine = Just arg } - else E.throwIO $ PandocOptionError $ "pdf-engine must be one of " + else E.throwIO $ PandocOptionError $ T.pack $ "pdf-engine must be one of " ++ intercalate ", " pdfEngines) "PROGRAM") "" -- "Name of program to use in generating PDF" @@ -410,7 +411,7 @@ options = (\arg opt -> do let (key, val) = splitField arg return opt{ optRequestHeaders = - (key, val) : optRequestHeaders opt }) + (T.pack key, T.pack val) : optRequestHeaders opt }) "NAME:VALUE") "" @@ -422,14 +423,15 @@ options = , Option "" ["indented-code-classes"] (ReqArg - (\arg opt -> return opt { optIndentedCodeClasses = words $ - map (\c -> if c == ',' then ' ' else c) arg }) + (\arg opt -> return opt { optIndentedCodeClasses = T.words $ + T.map (\c -> if c == ',' then ' ' else c) $ + T.pack arg }) "STRING") "" -- "Classes (whitespace- or comma-separated) to use for indented code-blocks" , Option "" ["default-image-extension"] (ReqArg - (\arg opt -> return opt { optDefaultImageExtension = arg }) + (\arg opt -> return opt { optDefaultImageExtension = T.pack arg }) "extension") "" -- "Default extension for extensionless images" @@ -450,7 +452,7 @@ options = , Option "" ["shift-heading-level-by"] (ReqArg (\arg opt -> - case safeRead arg of + case safeStrRead arg of Just t -> return opt{ optShiftHeadingLevelBy = t } _ -> E.throwIO $ PandocOptionError @@ -463,7 +465,7 @@ options = (\arg opt -> do deprecatedOption "--base-header-level" "Use --shift-heading-level-by instead." - case safeRead arg of + case safeStrRead arg of Just t | t > 0 && t < 6 -> return opt{ optShiftHeadingLevelBy = t - 1 } _ -> E.throwIO $ PandocOptionError @@ -486,7 +488,7 @@ options = "accept" -> return AcceptChanges "reject" -> return RejectChanges "all" -> return AllChanges - _ -> E.throwIO $ PandocOptionError + _ -> E.throwIO $ PandocOptionError $ T.pack ("Unknown option for track-changes: " ++ arg) return opt { optTrackChanges = action }) "accept|reject|all") @@ -509,7 +511,7 @@ options = "block" -> return EndOfBlock "section" -> return EndOfSection "document" -> return EndOfDocument - _ -> E.throwIO $ PandocOptionError + _ -> E.throwIO $ PandocOptionError $ T.pack ("Unknown option for reference-location: " ++ arg) return opt { optReferenceLocation = action }) "block|section|document") @@ -533,7 +535,7 @@ options = , Option "" ["slide-level"] (ReqArg (\arg opt -> - case safeRead arg of + case safeStrRead arg of Just t | t >= 1 && t <= 6 -> return opt { optSlideLevel = Just t } _ -> E.throwIO $ PandocOptionError @@ -559,7 +561,7 @@ options = "references" -> return ReferenceObfuscation "javascript" -> return JavascriptObfuscation "none" -> return NoObfuscation - _ -> E.throwIO $ PandocOptionError + _ -> E.throwIO $ PandocOptionError $ T.pack ("Unknown obfuscation method: " ++ arg) return opt { optEmailObfuscation = method }) "none|javascript|references") @@ -567,7 +569,7 @@ options = , Option "" ["id-prefix"] (ReqArg - (\arg opt -> return opt { optIdentifierPrefix = arg }) + (\arg opt -> return opt { optIdentifierPrefix = T.pack arg }) "STRING") "" -- "Prefix to add to automatically generated HTML identifiers" @@ -620,7 +622,7 @@ options = , Option "" ["epub-chapter-level"] (ReqArg (\arg opt -> - case safeRead arg of + case safeStrRead arg of Just t | t >= 1 && t <= 6 -> return opt { optEpubChapterLevel = t } _ -> E.throwIO $ PandocOptionError @@ -685,15 +687,15 @@ options = (OptArg (\arg opt -> do let url' = fromMaybe "https://latex.codecogs.com/png.latex?" arg - return opt { optHTMLMathMethod = WebTeX url' }) + return opt { optHTMLMathMethod = WebTeX $ T.pack url' }) "URL") "" -- "Use web service for HTML math" , Option "" ["mathjax"] (OptArg (\arg opt -> do - let url' = fromMaybe (defaultMathJaxURL ++ - "tex-mml-chtml.js") arg + let url' = maybe (defaultMathJaxURL <> + "tex-mml-chtml.js") T.pack arg return opt { optHTMLMathMethod = MathJax url'}) "URL") "" -- "Use MathJax for HTML math" @@ -703,7 +705,7 @@ options = (\arg opt -> return opt { optHTMLMathMethod = KaTeX $ - fromMaybe defaultKaTeXURL arg }) + maybe defaultKaTeXURL T.pack arg }) "URL") "" -- Use KaTeX for HTML Math @@ -763,7 +765,7 @@ options = UTF8.hPutStrLn stdout $ printf tpl allopts (unwords readersNames) (unwords writersNames) - (unwords $ map fst highlightingStyles) + (unwords $ map (T.unpack . fst) highlightingStyles) (unwords datafiles) exitSuccess )) "" -- "Print bash completion script" @@ -790,12 +792,12 @@ options = let allExts = case arg of Nothing -> extensionsFromList extList - Just fmt -> getAllExtensions fmt + Just fmt -> getAllExtensions $ T.pack fmt let defExts = case arg of Nothing -> getDefaultExtensions "markdown" - Just fmt -> getDefaultExtensions fmt + Just fmt -> getDefaultExtensions $ T.pack fmt let showExt x = (if extensionEnabled x defExts then '+' @@ -823,7 +825,7 @@ options = , Option "" ["list-highlight-styles"] (NoArg (\_ -> do - mapM_ (UTF8.hPutStrLn stdout . fst) highlightingStyles + mapM_ (UTF8.hPutStrLn stdout . T.unpack . fst) highlightingStyles exitSuccess )) "" @@ -835,11 +837,11 @@ options = Nothing -> UTF8.hPutStr stdout templ <- runIO $ do setUserDataDir Nothing - getDefaultTemplate arg + getDefaultTemplate (T.pack arg) case templ of Right t | T.null t -> -- e.g. for docx, odt, json: - E.throwIO $ PandocCouldNotFindDataFileError + E.throwIO $ PandocCouldNotFindDataFileError $ T.pack ("templates/default." ++ arg) | otherwise -> write . T.unpack $ t Left e -> E.throwIO e @@ -890,7 +892,7 @@ options = (\_ -> do prg <- getProgName defaultDatadirs <- defaultUserDataDirs - UTF8.hPutStrLn stdout (prg ++ " " ++ pandocVersion ++ + UTF8.hPutStrLn stdout (prg ++ " " ++ T.unpack pandocVersion ++ compileInfo ++ "\nDefault user data directory: " ++ intercalate " or " defaultDatadirs ++ ('\n':copyrightMessage)) @@ -963,14 +965,14 @@ handleUnrecognizedOption x = (("Unknown option " ++ x ++ ".") :) readersNames :: [String] -readersNames = sort (map fst (readers :: [(String, Reader PandocIO)])) +readersNames = sort (map (T.unpack . fst) (readers :: [(Text, Reader PandocIO)])) writersNames :: [String] -writersNames = sort (map fst (writers :: [(String, Writer PandocIO)])) +writersNames = sort (map (T.unpack . fst) (writers :: [(Text, Writer PandocIO)])) splitField :: String -> (String, String) splitField s = - case break (`elem` ":=") s of + case break (`elemText` ":=") s of (k,_:v) -> (k,v) (k,[]) -> (k,"true") @@ -991,7 +993,7 @@ applyDefaults opt file = runIOorExplode $ do case Y.decode1 inp of Right (f :: Opt -> Opt) -> return $ f opt Left (errpos, errmsg) -> throwError $ - PandocParseError $ + PandocParseError $ T.pack $ "Error parsing " ++ fp' ++ " line " ++ show (Y.posLine errpos) ++ " column " ++ show (Y.posColumn errpos) ++ ":\n" ++ errmsg @@ -1001,18 +1003,18 @@ lookupHighlightStyle s | takeExtension s == ".theme" = -- attempt to load KDE theme do contents <- readFileLazy s case parseTheme contents of - Left _ -> throwError $ PandocOptionError $ + Left _ -> throwError $ PandocOptionError $ T.pack $ "Could not read highlighting theme " ++ s Right sty -> return sty | otherwise = - case lookup (map toLower s) highlightingStyles of + case lookup (T.toLower $ T.pack s) highlightingStyles of Just sty -> return sty - Nothing -> throwError $ PandocOptionError $ + Nothing -> throwError $ PandocOptionError $ T.pack $ "Unknown highlight-style " ++ s deprecatedOption :: String -> String -> IO () deprecatedOption o msg = - runIO (report $ Deprecated o msg) >>= + runIO (report $ Deprecated (T.pack o) (T.pack msg)) >>= \r -> case r of Right () -> return () Left e -> E.throwIO e @@ -1024,13 +1026,14 @@ setVariable key val (Context ctx) = addMeta :: String -> String -> Meta -> Meta addMeta k v meta = - case lookupMeta k meta of - Nothing -> setMeta k v' meta + case lookupMeta k' meta of + Nothing -> setMeta k' v' meta Just (MetaList xs) -> - setMeta k (MetaList (xs ++ [v'])) meta - Just x -> setMeta k (MetaList [x, v']) meta + setMeta k' (MetaList (xs ++ [v'])) meta + Just x -> setMeta k' (MetaList [x, v']) meta where v' = readMetaValue v + k' = T.pack k readMetaValue :: String -> MetaValue readMetaValue s @@ -1040,7 +1043,7 @@ readMetaValue s | s == "false" = MetaBool False | s == "False" = MetaBool False | s == "FALSE" = MetaBool False - | otherwise = MetaString s + | otherwise = MetaString $ T.pack s -- On Windows with ghc 8.6+, we need to rewrite paths -- beginning with \\ to \\?\UNC\. -- See #5127. diff --git a/src/Text/Pandoc/App/FormatHeuristics.hs b/src/Text/Pandoc/App/FormatHeuristics.hs index a02d8d15e..25e0a303e 100644 --- a/src/Text/Pandoc/App/FormatHeuristics.hs +++ b/src/Text/Pandoc/App/FormatHeuristics.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.App.FormatHeuristics Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -16,10 +17,11 @@ module Text.Pandoc.App.FormatHeuristics import Prelude import Data.Char (toLower) +import Data.Text (Text) import System.FilePath (takeExtension) -- Determine default format based on file extensions. -formatFromFilePaths :: [FilePath] -> Maybe String +formatFromFilePaths :: [FilePath] -> Maybe Text formatFromFilePaths [] = Nothing formatFromFilePaths (x:xs) = case formatFromFilePath x of @@ -27,7 +29,7 @@ formatFromFilePaths (x:xs) = Nothing -> formatFromFilePaths xs -- Determine format based on file extension -formatFromFilePath :: FilePath -> Maybe String +formatFromFilePath :: FilePath -> Maybe Text formatFromFilePath x = case takeExtension (map toLower x) of ".adoc" -> Just "asciidoc" diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs index 6db397147..c0d06e0f4 100644 --- a/src/Text/Pandoc/App/Opt.hs +++ b/src/Text/Pandoc/App/Opt.hs @@ -33,7 +33,7 @@ import Text.Pandoc.Options (TopLevelDivision (TopLevelDefault), ReferenceLocation (EndOfDocument), ObfuscationMethod (NoObfuscation), CiteMethod (Citeproc)) -import Text.Pandoc.Shared (camelCaseToHyphenated) +import Text.Pandoc.Shared (camelCaseStrToHyphenated) import Text.DocLayout (render) import Text.DocTemplates (Context(..), Val(..)) import Data.Text (Text, unpack) @@ -76,8 +76,8 @@ data Opt = Opt { optTabStop :: Int -- ^ Number of spaces per tab , optPreserveTabs :: Bool -- ^ Preserve tabs instead of converting to spaces , optStandalone :: Bool -- ^ Include header, footer - , optFrom :: Maybe String -- ^ Reader format - , optTo :: Maybe String -- ^ Writer format + , optFrom :: Maybe Text -- ^ Reader format + , optTo :: Maybe Text -- ^ Writer format , optTableOfContents :: Bool -- ^ Include table of contents , optShiftHeadingLevelBy :: Int -- ^ Shift heading level by , optTemplate :: Maybe FilePath -- ^ Custom template @@ -92,7 +92,7 @@ data Opt = Opt , optIncremental :: Bool -- ^ Use incremental lists in Slidy/Slideous/S5 , optSelfContained :: Bool -- ^ Make HTML accessible offline , optHtmlQTags :: Bool -- ^ Use <q> tags in HTML - , optHighlightStyle :: Maybe String -- ^ Style to use for highlighted code + , optHighlightStyle :: Maybe Text -- ^ Style to use for highlighted code , optSyntaxDefinitions :: [FilePath] -- ^ xml syntax defs to load , optTopLevelDivision :: TopLevelDivision -- ^ Type of the top-level divisions , optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math @@ -117,9 +117,9 @@ data Opt = Opt , optColumns :: Int -- ^ Line length in characters , optFilters :: [Filter] -- ^ Filters to apply , optEmailObfuscation :: ObfuscationMethod - , optIdentifierPrefix :: String + , optIdentifierPrefix :: Text , optStripEmptyParagraphs :: Bool -- ^ Strip empty paragraphs - , optIndentedCodeClasses :: [String] -- ^ Default classes for indented code blocks + , optIndentedCodeClasses :: [Text] -- ^ Default classes for indented code blocks , optDataDir :: Maybe FilePath , optCiteMethod :: CiteMethod -- ^ Method to output cites , optListings :: Bool -- ^ Use listings package for code blocks @@ -128,18 +128,18 @@ data Opt = Opt , optSlideLevel :: Maybe Int -- ^ Header level that creates slides , optSetextHeaders :: Bool -- ^ Use atx headers for markdown level 1-2 , optAscii :: Bool -- ^ Prefer ascii output - , optDefaultImageExtension :: String -- ^ Default image extension + , optDefaultImageExtension :: Text -- ^ Default image extension , optExtractMedia :: Maybe FilePath -- ^ Path to extract embedded media , optTrackChanges :: TrackChanges -- ^ Accept or reject MS Word track-changes. , optFileScope :: Bool -- ^ Parse input files before combining - , optTitlePrefix :: Maybe String -- ^ Prefix for title + , optTitlePrefix :: Maybe Text -- ^ Prefix for title , optCss :: [FilePath] -- ^ CSS files to link to , optIpynbOutput :: IpynbOutput -- ^ How to treat ipynb output blocks , optIncludeBeforeBody :: [FilePath] -- ^ Files to include before , optIncludeAfterBody :: [FilePath] -- ^ Files to include after body , optIncludeInHeader :: [FilePath] -- ^ Files to include in header , optResourcePath :: [FilePath] -- ^ Path to search for images etc - , optRequestHeaders :: [(String, String)] -- ^ Headers for HTTP requests + , optRequestHeaders :: [(Text, Text)] -- ^ Headers for HTTP requests , optEol :: LineEnding -- ^ Style of line-endings to use , optStripComments :: Bool -- ^ Skip HTML comments } deriving (Generic, Show) @@ -167,13 +167,13 @@ doOpt (k',v) = do "toc" -> parseYAML v >>= \x -> return (\o -> o{ optTableOfContents = x }) "from" -> - parseYAML v >>= \x -> return (\o -> o{ optFrom = unpack <$> x }) + parseYAML v >>= \x -> return (\o -> o{ optFrom = x }) "reader" -> - parseYAML v >>= \x -> return (\o -> o{ optFrom = unpack <$> x }) + parseYAML v >>= \x -> return (\o -> o{ optFrom = x }) "to" -> - parseYAML v >>= \x -> return (\o -> o{ optTo = unpack <$> x }) + parseYAML v >>= \x -> return (\o -> o{ optTo = x }) "writer" -> - parseYAML v >>= \x -> return (\o -> o{ optTo = unpack <$> x }) + parseYAML v >>= \x -> return (\o -> o{ optTo = x }) "shift-heading-level-by" -> parseYAML v >>= \x -> return (\o -> o{ optShiftHeadingLevelBy = x }) "template" -> @@ -211,7 +211,7 @@ doOpt (k',v) = do "html-q-tags" -> parseYAML v >>= \x -> return (\o -> o{ optHtmlQTags = x }) "highlight-style" -> - parseYAML v >>= \x -> return (\o -> o{ optHighlightStyle = unpack <$> x }) + parseYAML v >>= \x -> return (\o -> o{ optHighlightStyle = x }) "syntax-definition" -> (parseYAML v >>= \x -> return (\o -> o{ optSyntaxDefinitions = map unpack x })) @@ -274,12 +274,12 @@ doOpt (k',v) = do parseYAML v >>= \x -> return (\o -> o{ optEmailObfuscation = x }) "identifier-prefix" -> parseYAML v >>= \x -> - return (\o -> o{ optIdentifierPrefix = unpack x }) + return (\o -> o{ optIdentifierPrefix = x }) "strip-empty-paragraphs" -> parseYAML v >>= \x -> return (\o -> o{ optStripEmptyParagraphs = x }) "indented-code-classes" -> parseYAML v >>= \x -> - return (\o -> o{ optIndentedCodeClasses = map unpack x }) + return (\o -> o{ optIndentedCodeClasses = x }) "data-dir" -> parseYAML v >>= \x -> return (\o -> o{ optDataDir = unpack <$> x }) "cite-method" -> @@ -305,7 +305,7 @@ doOpt (k',v) = do parseYAML v >>= \x -> return (\o -> o{ optAscii = x }) "default-image-extension" -> parseYAML v >>= \x -> - return (\o -> o{ optDefaultImageExtension = unpack x }) + return (\o -> o{ optDefaultImageExtension = x }) "extract-media" -> parseYAML v >>= \x -> return (\o -> o{ optExtractMedia = unpack <$> x }) @@ -314,7 +314,7 @@ doOpt (k',v) = do "file-scope" -> parseYAML v >>= \x -> return (\o -> o{ optFileScope = x }) "title-prefix" -> - parseYAML v >>= \x -> return (\o -> o{ optTitlePrefix = unpack <$> x }) + parseYAML v >>= \x -> return (\o -> o{ optTitlePrefix = x }) "css" -> (parseYAML v >>= \x -> return (\o -> o{ optCss = map unpack x })) <|> @@ -344,9 +344,7 @@ doOpt (k',v) = do return (\o -> o{ optResourcePath = map unpack x }) "request-headers" -> parseYAML v >>= \x -> - return (\o -> o{ optRequestHeaders = - map (\(key,val) -> - (unpack key, unpack val)) x }) + return (\o -> o{ optRequestHeaders = x }) "eol" -> parseYAML v >>= \x -> return (\o -> o{ optEol = x }) "strip-comments" -> @@ -429,13 +427,13 @@ defaultOpts = Opt contextToMeta :: Context Text -> Meta contextToMeta (Context m) = - Meta . M.mapKeys unpack . M.map valToMetaVal $ m + Meta . M.map valToMetaVal $ m valToMetaVal :: Val Text -> MetaValue valToMetaVal (MapVal (Context m)) = - MetaMap . M.mapKeys unpack . M.map valToMetaVal $ m + MetaMap . M.map valToMetaVal $ m valToMetaVal (ListVal xs) = MetaList $ map valToMetaVal xs -valToMetaVal (SimpleVal d) = MetaString (unpack $ render Nothing d) +valToMetaVal (SimpleVal d) = MetaString $ render Nothing d valToMetaVal NullVal = MetaString "" -- see https://github.com/jgm/pandoc/pull/4083 @@ -446,5 +444,5 @@ $(deriveJSON defaultOptions{ fieldLabelModifier = map toLower } ''LineEnding) $(deriveJSON defaultOptions{ fieldLabelModifier = - camelCaseToHyphenated . dropWhile isLower + camelCaseStrToHyphenated . dropWhile isLower } ''Opt) diff --git a/src/Text/Pandoc/App/OutputSettings.hs b/src/Text/Pandoc/App/OutputSettings.hs index b29860c03..d328a9b6a 100644 --- a/src/Text/Pandoc/App/OutputSettings.hs +++ b/src/Text/Pandoc/App/OutputSettings.hs @@ -1,6 +1,7 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} {- | @@ -27,7 +28,7 @@ import Control.Monad import Control.Monad.Except (catchError, throwError) import Control.Monad.Trans import Data.Char (toLower) -import Data.List (find, isPrefixOf, isSuffixOf) +import Data.List (find, isPrefixOf) import Data.Maybe (fromMaybe) import Skylighting (defaultSyntaxMap) import Skylighting.Parser (addSyntaxDefinition, parseSyntaxDefinition) @@ -42,18 +43,18 @@ import Text.Pandoc.App.CommandLineOptions (engines, lookupHighlightStyle, setVariable) import qualified Text.Pandoc.UTF8 as UTF8 +readUtf8File :: PandocMonad m => FilePath -> m T.Text +readUtf8File = fmap UTF8.toText . readFileStrict + -- | Settings specifying how document output should be produced. data OutputSettings = OutputSettings - { outputFormat :: String + { outputFormat :: T.Text , outputWriter :: Writer PandocIO - , outputWriterName :: String + , outputWriterName :: T.Text , outputWriterOptions :: WriterOptions , outputPdfProgram :: Maybe String } -readUtf8File :: PandocMonad m => FilePath -> m String -readUtf8File = fmap UTF8.toString . readFileStrict - -- | Get output settings from command line options. optToOutputSettings :: Opt -> PandocIO OutputSettings optToOutputSettings opts = do @@ -85,33 +86,33 @@ optToOutputSettings opts = do case formatFromFilePaths [outputFile] of Nothing -> do report $ CouldNotDeduceFormat - [takeExtension outputFile] "html" + [T.pack $ takeExtension outputFile] "html" return ("html", Nothing) Just f -> return (f, Nothing) - let format = if ".lua" `isSuffixOf` writerName + let format = if ".lua" `T.isSuffixOf` writerName then writerName - else map toLower $ baseWriterName writerName + else T.toLower $ baseWriterName writerName (writer :: Writer PandocIO, writerExts) <- - if ".lua" `isSuffixOf` format + if ".lua" `T.isSuffixOf` format then return (TextWriter - (\o d -> writeCustom writerName o d) + (\o d -> writeCustom (T.unpack writerName) o d) :: Writer PandocIO, mempty) - else getWriter (map toLower writerName) + else getWriter (T.toLower writerName) let standalone = optStandalone opts || not (isTextFormat format) || pdfOutput let addSyntaxMap existingmap f = do res <- liftIO (parseSyntaxDefinition f) case res of - Left errstr -> throwError $ PandocSyntaxMapError errstr + Left errstr -> throwError $ PandocSyntaxMapError $ T.pack errstr Right syn -> return $ addSyntaxDefinition syn existingmap syntaxMap <- foldM addSyntaxMap defaultSyntaxMap (optSyntaxDefinitions opts) - hlStyle <- maybe (return Nothing) (fmap Just . lookupHighlightStyle) + hlStyle <- maybe (return Nothing) (fmap Just . lookupHighlightStyle . T.unpack) (optHighlightStyle opts) let setVariableM k v = return . setVariable k v @@ -135,15 +136,15 @@ optToOutputSettings opts = do >>= setVariableM "outputfile" outputFile >>= - setFilesVariableM "include-before" (optIncludeBeforeBody opts) + setFilesVariableM "include-before" (T.pack <$> optIncludeBeforeBody opts) >>= - setFilesVariableM "include-after" (optIncludeAfterBody opts) + setFilesVariableM "include-after" (T.pack <$> optIncludeAfterBody opts) >>= - setFilesVariableM "header-includes" (optIncludeInHeader opts) + setFilesVariableM "header-includes" (T.pack <$> optIncludeInHeader opts) >>= setListVariableM "css" (optCss opts) >>= - maybe return (setVariableM "title-prefix") + maybe return (setVariableM "title-prefix" . T.unpack) (optTitlePrefix opts) >>= maybe return (setVariableM "epub-cover-image") @@ -168,7 +169,7 @@ optToOutputSettings opts = do Just tp -> do -- strip off extensions let tp' = case takeExtension tp of - "" -> tp <.> format + "" -> tp <.> T.unpack format _ -> tp Just . UTF8.toText <$> ((do surl <- stSourceURL <$> getCommonState @@ -176,7 +177,7 @@ optToOutputSettings opts = do -- unless the full URL is specified: modifyCommonState $ \st -> st{ stSourceURL = Nothing } - (bs, _) <- fetchItem tp' + (bs, _) <- fetchItem $ T.pack tp' modifyCommonState $ \st -> st{ stSourceURL = surl } return bs) @@ -194,7 +195,7 @@ optToOutputSettings opts = do Just ts -> do res <- compileTemplate templatePath ts case res of - Left e -> throwError $ PandocTemplateError e + Left e -> throwError $ PandocTemplateError $ T.pack e Right t -> return $ Just t let writerOpts = def { @@ -222,7 +223,7 @@ optToOutputSettings opts = do , writerSlideLevel = optSlideLevel opts , writerHighlightStyle = hlStyle , writerSetextHeaders = optSetextHeaders opts - , writerEpubSubdirectory = optEpubSubdirectory opts + , writerEpubSubdirectory = T.pack $ optEpubSubdirectory opts , writerEpubMetadata = epubMetadata , writerEpubFonts = optEpubFonts opts , writerEpubChapterLevel = optEpubChapterLevel opts @@ -239,12 +240,12 @@ optToOutputSettings opts = do , outputPdfProgram = maybePdfProg } -baseWriterName :: String -> String -baseWriterName = takeWhile (\c -> c /= '+' && c /= '-') +baseWriterName :: T.Text -> T.Text +baseWriterName = T.takeWhile (\c -> c /= '+' && c /= '-') -pdfWriterAndProg :: Maybe String -- ^ user-specified writer name +pdfWriterAndProg :: Maybe T.Text -- ^ user-specified writer name -> Maybe String -- ^ user-specified pdf-engine - -> IO (String, Maybe String) -- ^ IO (writerName, maybePdfEngineProg) + -> IO (T.Text, Maybe String) -- ^ IO (writerName, maybePdfEngineProg) pdfWriterAndProg mWriter mEngine = case go mWriter mEngine of Right (writ, prog) -> return (writ, Just prog) @@ -256,20 +257,20 @@ pdfWriterAndProg mWriter mEngine = go (Just writer) (Just engine) = case find (== (baseWriterName writer, takeBaseName engine)) engines of Just _ -> Right (writer, engine) - Nothing -> Left $ "pdf-engine " ++ engine ++ - " is not compatible with output format " ++ writer + Nothing -> Left $ "pdf-engine " <> T.pack engine <> + " is not compatible with output format " <> writer writerForEngine eng = case [f | (f,e) <- engines, e == eng] of fmt : _ -> Right fmt [] -> Left $ - "pdf-engine " ++ eng ++ " not known" + "pdf-engine " <> T.pack eng <> " not known" engineForWriter "pdf" = Left "pdf writer" engineForWriter w = case [e | (f,e) <- engines, f == baseWriterName w] of eng : _ -> Right eng [] -> Left $ - "cannot produce pdf output from " ++ w + "cannot produce pdf output from " <> w -isTextFormat :: String -> Bool +isTextFormat :: T.Text -> Bool isTextFormat s = s `notElem` ["odt","docx","epub2","epub3","epub","pptx","pdf"] diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs index ce8aa99ca..f4afec90c 100644 --- a/src/Text/Pandoc/BCP47.hs +++ b/src/Text/Pandoc/BCP47.hs @@ -20,9 +20,7 @@ module Text.Pandoc.BCP47 ( where import Prelude import Control.Monad (guard) -import Data.Char (isAlphaNum, isAscii, isLetter, isLower, isUpper, toLower, - toUpper) -import Data.List (intercalate) +import Data.Char (isAlphaNum, isAscii, isLetter, isLower, isUpper) import Text.Pandoc.Definition import Text.Pandoc.Options import Text.DocTemplates (FromContext(..)) @@ -30,22 +28,22 @@ import qualified Data.Text as T import qualified Text.Parsec as P -- | Represents BCP 47 language/country code. -data Lang = Lang{ langLanguage :: String - , langScript :: String - , langRegion :: String - , langVariants :: [String] } +data Lang = Lang{ langLanguage :: T.Text + , langScript :: T.Text + , langRegion :: T.Text + , langVariants :: [T.Text] } deriving (Eq, Ord, Show) -- | Render a Lang as BCP 47. -renderLang :: Lang -> String -renderLang lang = intercalate "-" (langLanguage lang : filter (not . null) +renderLang :: Lang -> T.Text +renderLang lang = T.intercalate "-" (langLanguage lang : filter (not . T.null) ([langScript lang, langRegion lang] ++ langVariants lang)) -- | Get the contents of the `lang` metadata field or variable. -getLang :: WriterOptions -> Meta -> Maybe String +getLang :: WriterOptions -> Meta -> Maybe T.Text getLang opts meta = case lookupContext "lang" (writerVariables opts) of - Just s -> Just $ T.unpack s + Just s -> Just s _ -> case lookupMeta "lang" meta of Just (MetaInlines [Str s]) -> Just s @@ -55,11 +53,11 @@ getLang opts meta = -- | Parse a BCP 47 string as a Lang. Currently we parse -- extensions and private-use fields as "variants," even -- though officially they aren't. -parseBCP47 :: String -> Either String Lang +parseBCP47 :: T.Text -> Either T.Text Lang parseBCP47 lang = case P.parse bcp47 "lang" lang of Right r -> Right r - Left e -> Left $ show e + Left e -> Left $ T.pack $ show e where bcp47 = do language <- pLanguage script <- P.option "" pScript @@ -75,19 +73,19 @@ parseBCP47 lang = cs <- P.many1 asciiLetter let lcs = length cs guard $ lcs == 2 || lcs == 3 - return $ map toLower cs + return $ T.toLower $ T.pack $ cs pScript = P.try $ do P.char '-' x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c) xs <- P.count 3 (P.satisfy (\c -> isAscii c && isLetter c && isLower c)) - return $ map toLower (x:xs) + return $ T.toLower $ T.pack (x:xs) pRegion = P.try $ do P.char '-' cs <- P.many1 asciiLetter let lcs = length cs guard $ lcs == 2 || lcs == 3 - return $ map toUpper cs + return $ T.toUpper $ T.pack cs pVariant = P.try $ do P.char '-' ds <- P.option "" (P.count 1 P.digit) @@ -96,12 +94,12 @@ parseBCP47 lang = guard $ if null ds then length var >= 5 && length var <= 8 else length var == 4 - return $ map toLower var + return $ T.toLower $ T.pack var pExtension = P.try $ do P.char '-' cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c) guard $ length cs >= 2 && length cs <= 8 - return $ map toLower cs + return $ T.toLower $ T.pack cs pPrivateUse = P.try $ do P.char '-' P.char 'x' @@ -109,4 +107,4 @@ parseBCP47 lang = cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c) guard $ not (null cs) && length cs <= 8 let var = "x-" ++ cs - return $ map toLower var + return $ T.toLower $ T.pack var diff --git a/src/Text/Pandoc/CSS.hs b/src/Text/Pandoc/CSS.hs index 660ec1b12..47a96b468 100644 --- a/src/Text/Pandoc/CSS.hs +++ b/src/Text/Pandoc/CSS.hs @@ -19,17 +19,18 @@ module Text.Pandoc.CSS ( foldOrElse where import Prelude +import qualified Data.Text as T import Text.Pandoc.Shared (trim) import Text.Parsec -import Text.Parsec.String +import Text.Parsec.Text -ruleParser :: Parser (String, String) +ruleParser :: Parser (T.Text, T.Text) ruleParser = do p <- many1 (noneOf ":") <* char ':' v <- many1 (noneOf ":;") <* optional (char ';') <* spaces - return (trim p, trim v) + return (trim $ T.pack p, trim $ T.pack v) -styleAttrParser :: Parser [(String, String)] +styleAttrParser :: Parser [(T.Text, T.Text)] styleAttrParser = many1 ruleParser orElse :: Eq a => a -> a -> a -> a @@ -44,7 +45,7 @@ eitherToMaybe _ = Nothing -- | takes a list of keys/properties and a CSS string and -- returns the corresponding key-value-pairs. -pickStylesToKVs :: [String] -> String -> [(String, String)] +pickStylesToKVs :: [T.Text] -> T.Text -> [(T.Text, T.Text)] pickStylesToKVs props styleAttr = case parse styleAttrParser "" styleAttr of Left _ -> [] @@ -52,7 +53,7 @@ pickStylesToKVs props styleAttr = -- | takes a list of key/property synonyms and a CSS string and maybe -- returns the value of the first match (in order of the supplied list) -pickStyleAttrProps :: [String] -> String -> Maybe String +pickStyleAttrProps :: [T.Text] -> T.Text -> Maybe T.Text pickStyleAttrProps lookupProps styleAttr = do styles <- eitherToMaybe $ parse styleAttrParser "" styleAttr foldOrElse Nothing $ map (`lookup` styles) lookupProps diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs index 6d4e8d895..8449e4a0e 100644 --- a/src/Text/Pandoc/Class.hs +++ b/src/Text/Pandoc/Class.hs @@ -9,6 +9,8 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE UndecidableInstances #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Class Copyright : Copyright (C) 2016-17 Jesse Rosenthal, John MacFarlane @@ -79,7 +81,6 @@ import qualified System.Random as IO (newStdGen) import Codec.Archive.Zip import qualified Data.CaseInsensitive as CI import Data.Unique (hashUnique) -import Data.List (stripPrefix) import qualified Data.Unique as IO (newUnique) import qualified Text.Pandoc.UTF8 as UTF8 import qualified System.Directory as Directory @@ -134,6 +135,7 @@ import Data.Default import System.IO.Error import System.IO (stderr) import qualified Data.Map as M +import qualified Data.Text as T import Text.Pandoc.Error import Text.Pandoc.BCP47 (Lang(..), parseBCP47, renderLang) import Text.Pandoc.Translations (Term(..), Translations, lookupTerm, @@ -153,7 +155,7 @@ import qualified Paths_pandoc as Paths class (Functor m, Applicative m, Monad m, MonadError PandocError m) => PandocMonad m where -- | Lookup an environment variable. - lookupEnv :: String -> m (Maybe String) + lookupEnv :: T.Text -> m (Maybe T.Text) -- | Get the current (UTC) time. getCurrentTime :: m UTCTime -- | Get the locale's time zone. @@ -164,7 +166,7 @@ class (Functor m, Applicative m, Monad m, MonadError PandocError m) newUniqueHash :: m Int -- | Retrieve contents and mime type from a URL, raising -- an error on failure. - openURL :: String -> m (B.ByteString, Maybe MimeType) + openURL :: T.Text -> m (B.ByteString, Maybe MimeType) -- | Read the lazy ByteString contents from a file path, -- raising an error on failure. readFileLazy :: FilePath -> m BL.ByteString @@ -199,10 +201,10 @@ class (Functor m, Applicative m, Monad m, MonadError PandocError m) -- Output a debug message to sterr, using 'Debug.Trace.trace', -- if tracing is enabled. Note: this writes to stderr even in -- pure instances. - trace :: String -> m () + trace :: T.Text -> m () trace msg = do tracing <- getsCommonState stTrace - when tracing $ Debug.Trace.trace ("[trace] " ++ msg) (return ()) + when tracing $ Debug.Trace.trace ("[trace] " ++ T.unpack msg) (return ()) -- * Functions defined for all PandocMonad instances @@ -238,8 +240,8 @@ setTrace useTracing = modifyCommonState $ \st -> st{stTrace = useTracing} -- | Set request header to use in HTTP requests. setRequestHeader :: PandocMonad m - => String -- ^ Header name - -> String -- ^ Value + => T.Text -- ^ Header name + -> T.Text -- ^ Value -> m () setRequestHeader name val = modifyCommonState $ \st -> st{ stRequestHeaders = @@ -277,7 +279,7 @@ setInputFiles fs = do _ -> Nothing modifyCommonState $ \st -> st{ stInputFiles = fs - , stSourceURL = sourceURL } + , stSourceURL = T.pack <$> sourceURL } -- Retrieve the output filename. getOutputFile :: PandocMonad m => m (Maybe FilePath) @@ -307,10 +309,10 @@ getZonedTime = do return $ utcToZonedTime tz t -- | Read file, checking in any number of directories. -readFileFromDirs :: PandocMonad m => [FilePath] -> FilePath -> m (Maybe String) +readFileFromDirs :: PandocMonad m => [FilePath] -> FilePath -> m (Maybe T.Text) readFileFromDirs [] _ = return Nothing readFileFromDirs (d:ds) f = catchError - ((Just . UTF8.toStringLazy) <$> readFileLazy (d </> f)) + ((Just . T.pack . UTF8.toStringLazy) <$> readFileLazy (d </> f)) (\_ -> readFileFromDirs ds f) instance TemplateMonad PandocIO where @@ -331,9 +333,9 @@ data CommonState = CommonState { stLog :: [LogMessage] -- ^ A list of log messages in reverse order , stUserDataDir :: Maybe FilePath -- ^ Directory to search for data files - , stSourceURL :: Maybe String + , stSourceURL :: Maybe T.Text -- ^ Absolute URL + dir of 1st source file - , stRequestHeaders :: [(String, String)] + , stRequestHeaders :: [(T.Text, T.Text)] -- ^ Headers to add for HTTP requests , stMediaBag :: MediaBag -- ^ Media parsed from binary containers @@ -370,7 +372,7 @@ instance Default CommonState where -- | Convert BCP47 string to a Lang, issuing warning -- if there are problems. -toLang :: PandocMonad m => Maybe String -> m (Maybe Lang) +toLang :: PandocMonad m => Maybe T.Text -> m (Maybe Lang) toLang Nothing = return Nothing toLang (Just s) = case parseBCP47 s of @@ -395,14 +397,14 @@ getTranslations = do Nothing -> return mempty -- no language defined Just (_, Just t) -> return t Just (lang, Nothing) -> do -- read from file - let translationFile = "translations/" ++ renderLang lang ++ ".yaml" - let fallbackFile = "translations/" ++ langLanguage lang ++ ".yaml" + let translationFile = "translations/" <> renderLang lang <> ".yaml" + let fallbackFile = "translations/" <> langLanguage lang <> ".yaml" let getTrans fp = do bs <- readDataFile fp - case readTranslations (UTF8.toString bs) of + case readTranslations (UTF8.toText bs) of Left e -> do report $ CouldNotLoadTranslations (renderLang lang) - (fp ++ ": " ++ e) + (T.pack fp <> ": " <> e) -- make sure we don't try again... modifyCommonState $ \st -> st{ stTranslations = Nothing } @@ -411,14 +413,14 @@ getTranslations = do modifyCommonState $ \st -> st{ stTranslations = Just (lang, Just t) } return t - catchError (getTrans translationFile) + catchError (getTrans $ T.unpack translationFile) (\_ -> - catchError (getTrans fallbackFile) + catchError (getTrans $ T.unpack fallbackFile) (\e -> do report $ CouldNotLoadTranslations (renderLang lang) $ case e of PandocCouldNotFindDataFileError _ -> - "data file " ++ fallbackFile ++ " not found" + "data file " <> fallbackFile <> " not found" _ -> "" -- make sure we don't try again... modifyCommonState $ \st -> st{ stTranslations = Nothing } @@ -426,13 +428,13 @@ getTranslations = do -- | Get a translation from the current term map. -- Issue a warning if the term is not defined. -translateTerm :: PandocMonad m => Term -> m String +translateTerm :: PandocMonad m => Term -> m T.Text translateTerm term = do translations <- getTranslations case lookupTerm term translations of Just s -> return s Nothing -> do - report $ NoTranslation (show term) + report $ NoTranslation $ T.pack $ show term return "" -- | Evaluate a 'PandocIO' operation. @@ -458,7 +460,7 @@ liftIOError :: (String -> IO a) -> String -> PandocIO a liftIOError f u = do res <- liftIO $ tryIOError $ f u case res of - Left e -> throwError $ PandocIOError u e + Left e -> throwError $ PandocIOError (T.pack u) e Right r -> return r -- | Show potential IO errors to the user continuing execution anyway @@ -466,24 +468,24 @@ logIOError :: IO () -> PandocIO () logIOError f = do res <- liftIO $ tryIOError f case res of - Left e -> report $ IgnoredIOError (E.displayException e) + Left e -> report $ IgnoredIOError $ T.pack $ E.displayException e Right _ -> pure () instance PandocMonad PandocIO where - lookupEnv = liftIO . IO.lookupEnv + lookupEnv = fmap (fmap T.pack) . liftIO . IO.lookupEnv . T.unpack getCurrentTime = liftIO IO.getCurrentTime getCurrentTimeZone = liftIO IO.getCurrentTimeZone newStdGen = liftIO IO.newStdGen newUniqueHash = hashUnique <$> liftIO IO.newUnique openURL u - | Just u'' <- stripPrefix "data:" u = do - let mime = takeWhile (/=',') u'' + | Just u'' <- T.stripPrefix "data:" u = do + let mime = T.takeWhile (/=',') u'' let contents = UTF8.fromString $ - unEscapeString $ drop 1 $ dropWhile (/=',') u'' + unEscapeString $ T.unpack $ T.drop 1 $ T.dropWhile (/=',') u'' return (decodeLenient contents, Just mime) | otherwise = do - let toReqHeader (n, v) = (CI.mk (UTF8.fromString n), UTF8.fromString v) + let toReqHeader (n, v) = (CI.mk (UTF8.fromText n), UTF8.fromText v) customHeaders <- map toReqHeader <$> getsCommonState stRequestHeaders report $ Fetching u res <- liftIO $ E.try $ withSocketsDo $ do @@ -493,11 +495,11 @@ instance PandocMonad PandocIO where Left _ -> return x Right pr -> parseReq pr >>= \r -> return (addProxy (host r) (port r) x) - req <- parseReq u >>= addProxy' + req <- parseReq (T.unpack u) >>= addProxy' let req' = req{requestHeaders = customHeaders ++ requestHeaders req} resp <- newManager tlsManagerSettings >>= httpLbs req' return (B.concat $ toChunks $ responseBody resp, - UTF8.toString `fmap` lookup hContentType (responseHeaders resp)) + UTF8.toText `fmap` lookup hContentType (responseHeaders resp)) case res of Right r -> return r @@ -519,22 +521,22 @@ instance PandocMonad PandocIO where logOutput msg = liftIO $ do UTF8.hPutStr stderr $ "[" ++ show (messageVerbosity msg) ++ "] " - alertIndent $ lines $ showLogMessage msg + alertIndent $ T.lines $ showLogMessage msg -alertIndent :: [String] -> IO () +alertIndent :: [T.Text] -> IO () alertIndent [] = return () alertIndent (l:ls) = do - UTF8.hPutStrLn stderr l + UTF8.hPutStrLn stderr $ T.unpack l mapM_ go ls where go l' = do UTF8.hPutStr stderr " " - UTF8.hPutStrLn stderr l' + UTF8.hPutStrLn stderr $ T.unpack l' -- | Specialized version of parseURIReference that disallows -- single-letter schemes. Reason: these are usually windows absolute -- paths. -parseURIReference' :: String -> Maybe URI +parseURIReference' :: T.Text -> Maybe URI parseURIReference' s = - case parseURIReference s of + case parseURIReference (T.unpack s) of Just u | length (uriScheme u) > 2 -> Just u | null (uriScheme u) -> Just u -- protocol-relative @@ -554,16 +556,16 @@ getUserDataDir = getsCommonState stUserDataDir -- | Fetch an image or other item from the local filesystem or the net. -- Returns raw content and maybe mime type. fetchItem :: PandocMonad m - => String + => T.Text -> m (B.ByteString, Maybe MimeType) fetchItem s = do mediabag <- getMediaBag - case lookupMedia s mediabag of + case lookupMedia (T.unpack s) mediabag of Just (mime, bs) -> return (BL.toStrict bs, Just mime) Nothing -> downloadOrRead s downloadOrRead :: PandocMonad m - => String + => T.Text -> m (B.ByteString, Maybe MimeType) downloadOrRead s = do sourceURL <- getsCommonState stSourceURL @@ -571,19 +573,19 @@ downloadOrRead s = do ensureEscaped, ensureEscaped s) of (Just u, s') -> -- try fetching from relative path at source case parseURIReference' s' of - Just u' -> openURL $ show $ u' `nonStrictRelativeTo` u + Just u' -> openURL $ T.pack $ show $ u' `nonStrictRelativeTo` u Nothing -> openURL s' -- will throw error - (Nothing, s'@('/':'/':c:_)) | c /= '?' -> -- protocol-relative URI + (Nothing, s'@(T.unpack -> ('/':'/':c:_))) | c /= '?' -> -- protocol-relative URI -- we exclude //? because of //?UNC/ on Windows case parseURIReference' s' of - Just u' -> openURL $ show $ u' `nonStrictRelativeTo` httpcolon + Just u' -> openURL $ T.pack $ show $ u' `nonStrictRelativeTo` httpcolon Nothing -> openURL s' -- will throw error (Nothing, s') -> - case parseURI s' of -- requires absolute URI + case parseURI (T.unpack s') of -- requires absolute URI Just u' | uriScheme u' == "file:" -> - readLocalFile $ uriPathToPath (uriPath u') + readLocalFile $ uriPathToPath (T.pack $ uriPath u') -- We don't want to treat C:/ as a scheme: - Just u' | length (uriScheme u') > 2 -> openURL (show u') + Just u' | length (uriScheme u') > 2 -> openURL (T.pack $ show u') _ -> readLocalFile fp -- get from local file system where readLocalFile f = do resourcePath <- getResourcePath @@ -596,13 +598,13 @@ downloadOrRead s = do uriPath = "", uriQuery = "", uriFragment = "" } - dropFragmentAndQuery = takeWhile (\c -> c /= '?' && c /= '#') - fp = unEscapeString $ dropFragmentAndQuery s + dropFragmentAndQuery = T.takeWhile (\c -> c /= '?' && c /= '#') + fp = unEscapeString $ T.unpack $ dropFragmentAndQuery s mime = case takeExtension fp of ".gz" -> getMimeType $ dropExtension fp ".svgz" -> getMimeType $ dropExtension fp ++ ".svg" x -> getMimeType x - ensureEscaped = escapeURIString isAllowedInURI . map convertSlash + ensureEscaped = T.pack . escapeURIString isAllowedInURI . T.unpack . T.map convertSlash convertSlash '\\' = '/' convertSlash x = x @@ -770,7 +772,7 @@ readDefaultDataFile "reference.odt" = readDefaultDataFile fname = #ifdef EMBED_DATA_FILES case lookup (makeCanonical fname) dataFiles of - Nothing -> throwError $ PandocCouldNotFindDataFileError fname + Nothing -> throwError $ PandocCouldNotFindDataFileError $ T.pack fname Just contents -> return contents #else getDataFileName fname' >>= checkExistence >>= readFileStrict @@ -781,7 +783,7 @@ checkExistence fn = do exists <- fileExists fn if exists then return fn - else throwError $ PandocCouldNotFindDataFileError fn + else throwError $ PandocCouldNotFindDataFileError $ T.pack fn #endif makeCanonical :: FilePath -> FilePath @@ -792,7 +794,7 @@ makeCanonical = Posix.joinPath . transformPathParts . splitDirectories go as x = x : as withPaths :: PandocMonad m => [FilePath] -> (FilePath -> m a) -> FilePath -> m a -withPaths [] _ fp = throwError $ PandocResourceNotFound fp +withPaths [] _ fp = throwError $ PandocResourceNotFound $ T.pack fp withPaths (p:ps) action fp = catchError (action (p </> fp)) (\_ -> withPaths ps action fp) @@ -800,14 +802,14 @@ withPaths (p:ps) action fp = -- | Fetch local or remote resource (like an image) and provide data suitable -- for adding it to the MediaBag. fetchMediaResource :: PandocMonad m - => String -> m (FilePath, Maybe MimeType, BL.ByteString) + => T.Text -> m (FilePath, Maybe MimeType, BL.ByteString) fetchMediaResource src = do (bs, mt) <- downloadOrRead src - let ext = fromMaybe (takeExtension src) + let ext = fromMaybe (T.pack $ takeExtension $ T.unpack src) (mt >>= extensionFromMimeType) let bs' = BL.fromChunks [bs] let basename = showDigest $ sha1 bs' - let fname = basename <.> ext + let fname = basename <.> T.unpack ext return (fname, mt, bs') -- | Traverse tree, filling media bag for any images that @@ -817,12 +819,12 @@ fillMediaBag d = walkM handleImage d where handleImage :: PandocMonad m => Inline -> m Inline handleImage (Image attr lab (src, tit)) = catchError (do mediabag <- getMediaBag - case lookupMedia src mediabag of + case lookupMedia (T.unpack src) mediabag of Just (_, _) -> return $ Image attr lab (src, tit) Nothing -> do (fname, mt, bs) <- fetchMediaResource src insertMedia fname mt bs - return $ Image attr lab (fname, tit)) + return $ Image attr lab (T.pack fname, tit)) (\e -> case e of PandocResourceNotFound _ -> do @@ -832,7 +834,7 @@ fillMediaBag d = walkM handleImage d return $ Span ("",["image"],[]) lab PandocHttpError u er -> do report $ CouldNotFetchResource u - (show er ++ "\rReplacing image with description.") + (T.pack $ show er ++ "\rReplacing image with description.") -- emit alt text return $ Span ("",["image"],[]) lab _ -> throwError e) @@ -856,15 +858,15 @@ writeMedia dir mediabag subpath = do let fullpath = dir </> unEscapeString (normalise subpath) let mbcontents = lookupMedia subpath mediabag case mbcontents of - Nothing -> throwError $ PandocResourceNotFound subpath + Nothing -> throwError $ PandocResourceNotFound $ T.pack subpath Just (_, bs) -> do - report $ Extracting fullpath + report $ Extracting $ T.pack fullpath liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath) logIOError $ BL.writeFile fullpath bs adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline adjustImagePath dir paths (Image attr lab (src, tit)) - | src `elem` paths = Image attr lab (dir ++ "/" ++ src, tit) + | T.unpack src `elem` paths = Image attr lab (T.pack dir <> "/" <> src, tit) adjustImagePath _ _ x = x -- | The 'PureState' contains ersatz representations @@ -878,7 +880,7 @@ data PureState = PureState { stStdGen :: StdGen -- contain every -- element at most -- once, e.g. [1..] - , stEnv :: [(String, String)] + , stEnv :: [(T.Text, T.Text)] , stTime :: UTCTime , stTimeZone :: TimeZone , stReferenceDocx :: Archive @@ -996,12 +998,12 @@ instance PandocMonad PandocPure where fps <- getsPureState stFiles case infoFileContents <$> getFileInfo fp fps of Just bs -> return (BL.fromStrict bs) - Nothing -> throwError $ PandocResourceNotFound fp + Nothing -> throwError $ PandocResourceNotFound $ T.pack fp readFileStrict fp = do fps <- getsPureState stFiles case infoFileContents <$> getFileInfo fp fps of Just bs -> return bs - Nothing -> throwError $ PandocResourceNotFound fp + Nothing -> throwError $ PandocResourceNotFound $ T.pack fp glob s = do FileTree ftmap <- getsPureState stFiles @@ -1019,7 +1021,7 @@ instance PandocMonad PandocPure where fps <- getsPureState stFiles case infoFileMTime <$> getFileInfo fp fps of Just tm -> return tm - Nothing -> throwError $ PandocIOError fp + Nothing -> throwError $ PandocIOError (T.pack fp) (userError "Can't get modification time") getCommonState = PandocPure $ lift get @@ -1070,7 +1072,7 @@ instance {-# OVERLAPS #-} PandocMonad m => PandocMonad (ParsecT s st m) where when tracing $ do pos <- getPosition Debug.Trace.trace - ("[trace] Parsed " ++ msg ++ " at line " ++ + ("[trace] Parsed " ++ T.unpack msg ++ " at line " ++ show (sourceLine pos) ++ if sourceName pos == "chunk" then " of chunk" diff --git a/src/Text/Pandoc/Emoji.hs b/src/Text/Pandoc/Emoji.hs index 92a07b4c2..ec544e15d 100644 --- a/src/Text/Pandoc/Emoji.hs +++ b/src/Text/Pandoc/Emoji.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Emoji Copyright : Copyright (C) 2015 John MacFarlane @@ -13,9 +14,10 @@ Emoji symbol lookup from canonical string identifier. module Text.Pandoc.Emoji ( emojis, emojiToInline ) where import Prelude import qualified Data.Map as M +import qualified Data.Text as T import Text.Pandoc.Definition (Inline (Span, Str)) -emojis :: M.Map String String +emojis :: M.Map T.Text T.Text emojis = M.fromList [("+1","\128077") ,("-1","\128078") @@ -1810,6 +1812,6 @@ emojis = M.fromList ,("zzz","\128164") ] -emojiToInline :: String -> Maybe Inline +emojiToInline :: T.Text -> Maybe Inline emojiToInline emojikey = makeSpan <$> M.lookup emojikey emojis where makeSpan = Span ("", ["emoji"], [("data-emoji", emojikey)]) . (:[]) . Str diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index 113ab9d6e..38db4fda9 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -1,6 +1,7 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Error Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -22,6 +23,8 @@ import Prelude import Control.Exception (Exception) import Data.Typeable (Typeable) import Data.Word (Word8) +import Data.Text (Text) +import qualified Data.Text as T import GHC.Generics (Generic) import Network.HTTP.Client (HttpException) import System.Exit (ExitCode (..), exitWith) @@ -31,32 +34,32 @@ import Text.Printf (printf) import Text.Parsec.Error import Text.Parsec.Pos hiding (Line) -type Input = String +type Input = Text -data PandocError = PandocIOError String IOError - | PandocHttpError String HttpException - | PandocShouldNeverHappenError String - | PandocSomeError String - | PandocParseError String +data PandocError = PandocIOError Text IOError + | PandocHttpError Text HttpException + | PandocShouldNeverHappenError Text + | PandocSomeError Text + | PandocParseError Text | PandocParsecError Input ParseError - | PandocMakePDFError String - | PandocOptionError String - | PandocSyntaxMapError String + | PandocMakePDFError Text + | PandocOptionError Text + | PandocSyntaxMapError Text | PandocFailOnWarningError - | PandocPDFProgramNotFoundError String - | PandocPDFError String - | PandocFilterError String String - | PandocCouldNotFindDataFileError String - | PandocResourceNotFound String - | PandocTemplateError String - | PandocAppError String - | PandocEpubSubdirectoryError String - | PandocMacroLoop String - | PandocUTF8DecodingError String Int Word8 - | PandocIpynbDecodingError String - | PandocUnknownReaderError String - | PandocUnknownWriterError String - | PandocUnsupportedExtensionError String String + | PandocPDFProgramNotFoundError Text + | PandocPDFError Text + | PandocFilterError Text Text + | PandocCouldNotFindDataFileError Text + | PandocResourceNotFound Text + | PandocTemplateError Text + | PandocAppError Text + | PandocEpubSubdirectoryError Text + | PandocMacroLoop Text + | PandocUTF8DecodingError Text Int Word8 + | PandocIpynbDecodingError Text + | PandocUnknownReaderError Text + | PandocUnknownWriterError Text + | PandocUnsupportedExtensionError Text Text deriving (Show, Typeable, Generic) instance Exception PandocError @@ -68,23 +71,23 @@ handleError (Left e) = case e of PandocIOError _ err' -> ioError err' PandocHttpError u err' -> err 61 $ - "Could not fetch " ++ u ++ "\n" ++ show err' + "Could not fetch " <> u <> "\n" <> tshow err' PandocShouldNeverHappenError s -> err 62 $ - "Something we thought was impossible happened!\n" ++ - "Please report this to pandoc's developers: " ++ s + "Something we thought was impossible happened!\n" <> + "Please report this to pandoc's developers: " <> s PandocSomeError s -> err 63 s PandocParseError s -> err 64 s PandocParsecError input err' -> let errPos = errorPos err' errLine = sourceLine errPos errColumn = sourceColumn errPos - ls = lines input ++ [""] + ls = T.lines input <> [""] errorInFile = if length ls > errLine - 1 - then concat ["\n", ls !! (errLine - 1) - ,"\n", replicate (errColumn - 1) ' ' - ,"^"] + then T.concat ["\n", ls !! (errLine - 1) + ,"\n", T.replicate (errColumn - 1) " " + ,"^"] else "" - in err 65 $ "\nError at " ++ show err' ++ + in err 65 $ "\nError at " <> tshow err' <> -- if error comes from a chunk or included file, -- then we won't get the right text this way: if sourceName errPos == "source" @@ -95,49 +98,52 @@ handleError (Left e) = PandocSyntaxMapError s -> err 67 s PandocFailOnWarningError -> err 3 "Failing because there were warnings." PandocPDFProgramNotFoundError pdfprog -> err 47 $ - pdfprog ++ " not found. Please select a different --pdf-engine or install " ++ pdfprog - PandocPDFError logmsg -> err 43 $ "Error producing PDF.\n" ++ logmsg - PandocFilterError filtername msg -> err 83 $ "Error running filter " ++ - filtername ++ ":\n" ++ msg + pdfprog <> " not found. Please select a different --pdf-engine or install " <> pdfprog + PandocPDFError logmsg -> err 43 $ "Error producing PDF.\n" <> logmsg + PandocFilterError filtername msg -> err 83 $ "Error running filter " <> + filtername <> ":\n" <> msg PandocCouldNotFindDataFileError fn -> err 97 $ - "Could not find data file " ++ fn + "Could not find data file " <> fn PandocResourceNotFound fn -> err 99 $ - "File " ++ fn ++ " not found in resource path" - PandocTemplateError s -> err 5 $ "Error compiling template " ++ s + "File " <> fn <> " not found in resource path" + PandocTemplateError s -> err 5 $ "Error compiling template " <> s PandocAppError s -> err 4 s PandocEpubSubdirectoryError s -> err 31 $ - "EPUB subdirectory name '" ++ s ++ "' contains illegal characters" + "EPUB subdirectory name '" <> s <> "' contains illegal characters" PandocMacroLoop s -> err 91 $ - "Loop encountered in expanding macro " ++ s + "Loop encountered in expanding macro " <> s PandocUTF8DecodingError f offset w -> err 92 $ - "UTF-8 decoding error in " ++ f ++ " at byte offset " ++ show offset ++ - " (" ++ printf "%2x" w ++ ").\n" ++ + "UTF-8 decoding error in " <> f <> " at byte offset " <> tshow offset <> + " (" <> T.pack (printf "%2x" w) <> ").\n" <> "The input must be a UTF-8 encoded text." PandocIpynbDecodingError w -> err 93 $ - "ipynb decoding error: " ++ w + "ipynb decoding error: " <> w PandocUnknownReaderError r -> err 21 $ - "Unknown input format " ++ r ++ + "Unknown input format " <> r <> case r of - "doc" -> "\nPandoc can convert from DOCX, but not from DOC." ++ - "\nTry using Word to save your DOC file as DOCX," ++ + "doc" -> "\nPandoc can convert from DOCX, but not from DOC." <> + "\nTry using Word to save your DOC file as DOCX," <> " and convert that with pandoc." "pdf" -> "\nPandoc can convert to PDF, but not from PDF." _ -> "" PandocUnknownWriterError w -> err 22 $ - "Unknown output format " ++ w ++ + "Unknown output format " <> w <> case w of - "pdf" -> "To create a pdf using pandoc, use" ++ - " -t latex|beamer|context|ms|html5" ++ - "\nand specify an output file with " ++ + "pdf" -> "To create a pdf using pandoc, use" <> + " -t latex|beamer|context|ms|html5" <> + "\nand specify an output file with " <> ".pdf extension (-o filename.pdf)." "doc" -> "\nPandoc can convert to DOCX, but not from DOC." _ -> "" PandocUnsupportedExtensionError ext f -> err 23 $ - "The extension " ++ ext ++ " is not supported " ++ - "for " ++ f + "The extension " <> ext <> " is not supported " <> + "for " <> f -err :: Int -> String -> IO a +err :: Int -> Text -> IO a err exitCode msg = do - UTF8.hPutStrLn stderr msg + UTF8.hPutStrLn stderr (T.unpack msg) exitWith $ ExitFailure exitCode return undefined + +tshow :: Show a => a -> Text +tshow = T.pack . show diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index cdf4f159d..f079a9432 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -4,6 +4,7 @@ {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Extensions Copyright : Copyright (C) 2012-2019 John MacFarlane @@ -35,6 +36,7 @@ where import Prelude import Data.Bits (clearBit, setBit, testBit, (.|.)) import Data.Data (Data) +import qualified Data.Text as T import Data.Typeable (Typeable) import GHC.Generics (Generic) import Safe (readMay) @@ -304,7 +306,7 @@ strictExtensions = extensionsFromList ] -- | Default extensions from format-describing string. -getDefaultExtensions :: String -> Extensions +getDefaultExtensions :: T.Text -> Extensions getDefaultExtensions "markdown_strict" = strictExtensions getDefaultExtensions "markdown_phpextra" = phpMarkdownExtraExtensions getDefaultExtensions "markdown_mmd" = multimarkdownExtensions @@ -402,7 +404,7 @@ allMarkdownExtensions = -- | Get all valid extensions for a format. This is used -- mainly in checking format specifications for validity. -getAllExtensions :: String -> Extensions +getAllExtensions :: T.Text -> Extensions getAllExtensions f = universalExtensions <> getAll f where autoIdExtensions = extensionsFromList @@ -507,14 +509,14 @@ getAllExtensions f = universalExtensions <> getAll f -- | Parse a format-specifying string into a markup format, -- a set of extensions to enable, and a set of extensions to disable. -parseFormatSpec :: String - -> Either ParseError (String, [Extension], [Extension]) +parseFormatSpec :: T.Text + -> Either ParseError (T.Text, [Extension], [Extension]) parseFormatSpec = parse formatSpec "" where formatSpec = do name <- formatName (extsToEnable, extsToDisable) <- foldl (flip ($)) ([],[]) <$> many extMod - return (name, reverse extsToEnable, reverse extsToDisable) + return (T.pack name, reverse extsToEnable, reverse extsToDisable) formatName = many1 $ noneOf "-+" extMod = do polarity <- oneOf "-+" diff --git a/src/Text/Pandoc/Filter/JSON.hs b/src/Text/Pandoc/Filter/JSON.hs index c1cbf91a9..e8e737499 100644 --- a/src/Text/Pandoc/Filter/JSON.hs +++ b/src/Text/Pandoc/Filter/JSON.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Filter Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -18,6 +19,7 @@ import Control.Monad.Trans (MonadIO (liftIO)) import Data.Aeson (eitherDecode', encode) import Data.Char (toLower) import Data.Maybe (isNothing) +import qualified Data.Text as T import System.Directory (executable, doesFileExist, findExecutable, getPermissions) import System.Environment (getEnvironment) @@ -28,7 +30,7 @@ import Text.Pandoc.Error (PandocError (PandocFilterError)) import Text.Pandoc.Definition (Pandoc) import Text.Pandoc.Options (ReaderOptions) import Text.Pandoc.Process (pipeProcess) -import Text.Pandoc.Shared (pandocVersion) +import Text.Pandoc.Shared (pandocVersion, tshow) import qualified Control.Exception as E import qualified Text.Pandoc.UTF8 as UTF8 @@ -61,18 +63,20 @@ externalFilter ropts f args' d = liftIO $ do unless (exists && isExecutable) $ do mbExe <- findExecutable f' when (isNothing mbExe) $ - E.throwIO $ PandocFilterError f ("Could not find executable " ++ f') + E.throwIO $ PandocFilterError fText (T.pack $ "Could not find executable " <> f') env <- getEnvironment let env' = Just - ( ("PANDOC_VERSION", pandocVersion) + ( ("PANDOC_VERSION", T.unpack pandocVersion) : ("PANDOC_READER_OPTIONS", UTF8.toStringLazy (encode ropts)) : env ) (exitcode, outbs) <- E.handle filterException $ pipeProcess env' f' args'' $ encode d case exitcode of - ExitSuccess -> either (E.throwIO . PandocFilterError f) + ExitSuccess -> either (E.throwIO . PandocFilterError fText . T.pack) return $ eitherDecode' outbs - ExitFailure ec -> E.throwIO $ PandocFilterError f - ("Filter returned error status " ++ show ec) - where filterException :: E.SomeException -> IO a - filterException e = E.throwIO $ PandocFilterError f (show e) + ExitFailure ec -> E.throwIO $ PandocFilterError fText + ("Filter returned error status " <> tshow ec) + where fText = T.pack f + + filterException :: E.SomeException -> IO a + filterException e = E.throwIO $ PandocFilterError fText $ tshow e diff --git a/src/Text/Pandoc/Filter/Lua.hs b/src/Text/Pandoc/Filter/Lua.hs index afe525ab1..87c51ac42 100644 --- a/src/Text/Pandoc/Filter/Lua.hs +++ b/src/Text/Pandoc/Filter/Lua.hs @@ -15,6 +15,7 @@ module Text.Pandoc.Filter.Lua (apply) where import Prelude import Control.Exception (throw) import Control.Monad ((>=>)) +import qualified Data.Text as T import Text.Pandoc.Class (PandocIO) import Text.Pandoc.Definition (Pandoc) import Text.Pandoc.Error (PandocError (PandocFilterError)) @@ -35,7 +36,7 @@ apply ropts args fp doc = do (x:_) -> x _ -> error "Format not supplied for Lua filter" runLua >=> forceResult fp $ do - setGlobals [ FORMAT format + setGlobals [ FORMAT $ T.pack format , PANDOC_READER_OPTIONS ropts , PANDOC_SCRIPT_FILE fp ] @@ -44,4 +45,4 @@ apply ropts args fp doc = do forceResult :: FilePath -> Either LuaException Pandoc -> PandocIO Pandoc forceResult fp eitherResult = case eitherResult of Right x -> return x - Left (LuaException s) -> throw (PandocFilterError fp s) + Left (LuaException s) -> throw (PandocFilterError (T.pack fp) s) diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs index 62aa5afc4..3e02355f7 100644 --- a/src/Text/Pandoc/Highlighting.hs +++ b/src/Text/Pandoc/Highlighting.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Highlighting Copyright : Copyright (C) 2008-2019 John MacFarlane @@ -35,7 +36,6 @@ module Text.Pandoc.Highlighting ( highlightingStyles ) where import Prelude import Control.Monad -import Data.Char (toLower) import qualified Data.Map as M import Data.Maybe (fromMaybe) import qualified Data.Text as T @@ -43,7 +43,7 @@ import Skylighting import Text.Pandoc.Definition import Text.Pandoc.Shared (safeRead) -highlightingStyles :: [(String, Style)] +highlightingStyles :: [(T.Text, Style)] highlightingStyles = [("pygments", pygments), ("tango", tango), @@ -54,18 +54,18 @@ highlightingStyles = ("breezedark", breezeDark), ("haddock", haddock)] -languages :: [String] -languages = [T.unpack (T.toLower (sName s)) | s <- M.elems defaultSyntaxMap] +languages :: [T.Text] +languages = [T.toLower (sName s) | s <- M.elems defaultSyntaxMap] -languagesByExtension :: String -> [String] +languagesByExtension :: T.Text -> [T.Text] languagesByExtension ext = - [T.unpack (T.toLower (sName s)) | s <- syntaxesByExtension defaultSyntaxMap ext] + [T.toLower (sName s) | s <- syntaxesByExtension defaultSyntaxMap (T.unpack ext)] highlight :: SyntaxMap -> (FormatOptions -> [SourceLine] -> a) -- ^ Formatter -> Attr -- ^ Attributes of the CodeBlock - -> String -- ^ Raw contents of the CodeBlock - -> Either String a + -> T.Text -- ^ Raw contents of the CodeBlock + -> Either T.Text a highlight syntaxmap formatter (ident, classes, keyvals) rawCode = let firstNum = fromMaybe 1 (safeRead (fromMaybe "1" $ lookup "startFrom" keyvals)) fmtOpts = defaultFormatOpts{ @@ -74,38 +74,36 @@ highlight syntaxmap formatter (ident, classes, keyvals) rawCode = ["line-anchors", "lineAnchors"]) classes, numberLines = any (`elem` ["number","numberLines", "number-lines"]) classes, - lineIdPrefix = if null ident + lineIdPrefix = if T.null ident then mempty - else T.pack (ident ++ "-") } + else ident <> "-" } tokenizeOpts = TokenizerConfig{ syntaxMap = syntaxmap , traceOutput = False } - classes' = map T.pack classes - rawCode' = T.pack rawCode - in case msum (map (`lookupSyntax` syntaxmap) classes') of + in case msum (map (`lookupSyntax` syntaxmap) classes) of Nothing | numberLines fmtOpts -> Right $ formatter fmtOpts{ codeClasses = [], - containerClasses = classes' } + containerClasses = classes } $ map (\ln -> [(NormalTok, ln)]) - $ T.lines rawCode' + $ T.lines rawCode | otherwise -> Left "" - Just syntax -> + Just syntax -> either (Left . T.pack) Right $ formatter fmtOpts{ codeClasses = [T.toLower (sShortname syntax)], - containerClasses = classes' } <$> - tokenize tokenizeOpts syntax rawCode' + containerClasses = classes } <$> + tokenize tokenizeOpts syntax rawCode -- Functions for correlating latex listings package's language names -- with skylighting language names: -langToListingsMap :: M.Map String String +langToListingsMap :: M.Map T.Text T.Text langToListingsMap = M.fromList langsList -listingsToLangMap :: M.Map String String +listingsToLangMap :: M.Map T.Text T.Text listingsToLangMap = M.fromList $ map switch langsList where switch (a,b) = (b,a) -langsList :: [(String, String)] +langsList :: [(T.Text, T.Text)] langsList = [("abap","ABAP"), ("acm","ACM"), @@ -212,9 +210,9 @@ langsList = ("xslt","XSLT")] -- | Determine listings language name from skylighting language name. -toListingsLanguage :: String -> Maybe String -toListingsLanguage lang = M.lookup (map toLower lang) langToListingsMap +toListingsLanguage :: T.Text -> Maybe T.Text +toListingsLanguage lang = M.lookup (T.toLower lang) langToListingsMap -- | Determine skylighting language name from listings language name. -fromListingsLanguage :: String -> Maybe String +fromListingsLanguage :: T.Text -> Maybe T.Text fromListingsLanguage lang = M.lookup lang listingsToLangMap diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index afbba9b8b..d9ded22be 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings, ScopedTypeVariables, CPP #-} +{-# LANGUAGE ViewPatterns #-} {-# OPTIONS_GHC -fno-warn-type-defaults #-} {- | Module : Text.Pandoc.ImageSize @@ -49,6 +50,8 @@ import Text.Pandoc.Options import qualified Text.Pandoc.UTF8 as UTF8 import qualified Text.XML.Light as Xml import qualified Data.Map as M +import qualified Data.Text as T +import qualified Data.Text.Encoding as TE import Control.Monad.Except import Control.Applicative import Data.Maybe (fromMaybe) @@ -72,12 +75,12 @@ data Dimension = Pixel Integer deriving Eq instance Show Dimension where - show (Pixel a) = show a ++ "px" - show (Centimeter a) = showFl a ++ "cm" - show (Millimeter a) = showFl a ++ "mm" - show (Inch a) = showFl a ++ "in" - show (Percent a) = show a ++ "%" - show (Em a) = showFl a ++ "em" + show (Pixel a) = show a ++ "px" + show (Centimeter a) = T.unpack (showFl a) ++ "cm" + show (Millimeter a) = T.unpack (showFl a) ++ "mm" + show (Inch a) = T.unpack (showFl a) ++ "in" + show (Percent a) = show a ++ "%" + show (Em a) = T.unpack (showFl a) ++ "em" data ImageSize = ImageSize{ pxX :: Integer @@ -88,14 +91,13 @@ data ImageSize = ImageSize{ instance Default ImageSize where def = ImageSize 300 200 72 72 -showFl :: (RealFloat a) => a -> String -showFl a = removeExtra0s $ showFFloat (Just 5) a "" +showFl :: (RealFloat a) => a -> T.Text +showFl a = removeExtra0s $ T.pack $ showFFloat (Just 5) a "" -removeExtra0s :: String -> String -removeExtra0s s = - case dropWhile (=='0') $ reverse s of - '.':xs -> reverse xs - xs -> reverse xs +removeExtra0s :: T.Text -> T.Text +removeExtra0s s = case T.dropWhileEnd (=='0') s of + (T.unsnoc -> Just (xs, '.')) -> xs + xs -> xs imageType :: ByteString -> Maybe ImageType imageType img = case B.take 4 img of @@ -119,7 +121,7 @@ imageType img = case B.take 4 img of findSvgTag :: ByteString -> Bool findSvgTag img = "<svg" `B.isInfixOf` img || "<SVG" `B.isInfixOf` img -imageSize :: WriterOptions -> ByteString -> Either String ImageSize +imageSize :: WriterOptions -> ByteString -> Either T.Text ImageSize imageSize opts img = case imageType img of Just Png -> mbToEither "could not determine PNG size" $ pngSize img @@ -194,22 +196,22 @@ inPixel opts dim = where dpi = fromIntegral $ writerDpi opts --- | Convert a Dimension to a String denoting its equivalent in inches, for example "2.00000". +-- | Convert a Dimension to Text denoting its equivalent in inches, for example "2.00000". -- Note: Dimensions in percentages are converted to the empty string. -showInInch :: WriterOptions -> Dimension -> String +showInInch :: WriterOptions -> Dimension -> T.Text showInInch _ (Percent _) = "" showInInch opts dim = showFl $ inInch opts dim --- | Convert a Dimension to a String denoting its equivalent in pixels, for example "600". +-- | Convert a Dimension to Text denoting its equivalent in pixels, for example "600". -- Note: Dimensions in percentages are converted to the empty string. -showInPixel :: WriterOptions -> Dimension -> String +showInPixel :: WriterOptions -> Dimension -> T.Text showInPixel _ (Percent _) = "" -showInPixel opts dim = show $ inPixel opts dim +showInPixel opts dim = T.pack $ show $ inPixel opts dim -- | Maybe split a string into a leading number and trailing unit, e.g. "3cm" to Just (3.0, "cm") -numUnit :: String -> Maybe (Double, String) +numUnit :: T.Text -> Maybe (Double, T.Text) numUnit s = - let (nums, unit) = span (\c -> isDigit c || ('.'==c)) s + let (nums, unit) = T.span (\c -> isDigit c || ('.'==c)) s in case safeRead nums of Just n -> Just (n, unit) Nothing -> Nothing @@ -235,7 +237,7 @@ dimension dir (_, _, kvs) = where extractDim key = lookup key kvs >>= lengthToDim -lengthToDim :: String -> Maybe Dimension +lengthToDim :: T.Text -> Maybe Dimension lengthToDim s = numUnit s >>= uncurry toDim where toDim a "cm" = Just $ Centimeter a @@ -258,8 +260,8 @@ epsSize img = do [] -> mzero (x:_) -> case B.words x of [_, _, _, ux, uy] -> do - ux' <- safeRead $ B.unpack ux - uy' <- safeRead $ B.unpack uy + ux' <- safeRead $ TE.decodeUtf8 ux + uy' <- safeRead $ TE.decodeUtf8 uy return ImageSize{ pxX = ux' , pxY = uy' @@ -284,7 +286,7 @@ pPdfSize = do [x1,y1,x2,y2] <- A.count 4 $ do A.skipSpace raw <- A.many1 $ A.satisfy (\c -> isDigit c || c == '.') - case safeRead raw of + case safeRead $ T.pack raw of Just (r :: Double) -> return $ floor r Nothing -> mzero A.skipSpace @@ -345,7 +347,7 @@ svgSize opts img = do doc <- Xml.parseXMLDoc $ UTF8.toString img let dpi = fromIntegral $ writerDpi opts let dirToInt dir = do - dim <- Xml.findAttrBy (== Xml.QName dir Nothing Nothing) doc >>= lengthToDim + dim <- Xml.findAttrBy (== Xml.QName dir Nothing Nothing) doc >>= lengthToDim . T.pack return $ inPixel opts dim w <- dirToInt "width" h <- dirToInt "height" @@ -388,7 +390,7 @@ emfSize img = Right (_, _, size) -> Just size -jpegSize :: ByteString -> Either String ImageSize +jpegSize :: ByteString -> Either T.Text ImageSize jpegSize img = let (hdr, rest) = B.splitAt 4 img in if B.length rest < 14 @@ -398,7 +400,7 @@ jpegSize img = "\xff\xd8\xff\xe1" -> exifSize rest _ -> Left "unable to determine JPEG size" -jfifSize :: ByteString -> Either String ImageSize +jfifSize :: ByteString -> Either T.Text ImageSize jfifSize rest = case map fromIntegral $ unpack $ B.take 5 $ B.drop 9 rest of [dpiDensity,dpix1,dpix2,dpiy1,dpiy2] -> @@ -416,7 +418,7 @@ jfifSize rest = , dpiY = dpiy } _ -> Left "unable to determine JFIF size" -findJfifSize :: ByteString -> Either String (Integer,Integer) +findJfifSize :: ByteString -> Either T.Text (Integer,Integer) findJfifSize bs = let bs' = B.dropWhile (=='\xff') $ B.dropWhile (/='\xff') bs in case B.uncons bs' of @@ -433,19 +435,18 @@ findJfifSize bs = _ -> Left "JFIF parse error" Nothing -> Left "Did not find JFIF length record" -runGet' :: Get (Either String a) -> BL.ByteString -> Either String a +runGet' :: Get (Either T.Text a) -> BL.ByteString -> Either T.Text a runGet' p bl = #if MIN_VERSION_binary(0,7,0) case runGetOrFail p bl of - Left (_,_,msg) -> Left msg + Left (_,_,msg) -> Left $ T.pack msg Right (_,_,x) -> x #else runGet p bl #endif - -exifSize :: ByteString -> Either String ImageSize -exifSize bs =runGet' header bl +exifSize :: ByteString -> Either T.Text ImageSize +exifSize bs = runGet' header bl where bl = BL.fromChunks [bs] header = runExceptT $ exifHeader bl -- NOTE: It would be nicer to do @@ -454,7 +455,7 @@ exifSize bs =runGet' header bl -- be parsed. But we only get an Alternative instance for Get in binary 0.6, -- and binary 0.5 ships with ghc 7.6. -exifHeader :: BL.ByteString -> ExceptT String Get ImageSize +exifHeader :: BL.ByteString -> ExceptT T.Text Get ImageSize exifHeader hdr = do _app1DataSize <- lift getWord16be exifHdr <- lift getWord32be @@ -479,7 +480,7 @@ exifHeader hdr = do ifdOffset <- lift getWord32 lift $ skip (fromIntegral ifdOffset - 8) -- skip to IDF numentries <- lift getWord16 - let ifdEntry :: ExceptT String Get (TagType, DataFormat) + let ifdEntry :: ExceptT T.Text Get (TagType, DataFormat) ifdEntry = do tag <- fromMaybe UnknownTagType . flip M.lookup tagTypeTable <$> lift getWord16 @@ -502,7 +503,7 @@ exifHeader hdr = do 10 -> return (SignedRational <$> getRational, 8) 11 -> return (SingleFloat <$> getWord32 {- TODO -}, 4) 12 -> return (DoubleFloat <$> getWord64 {- TODO -}, 8) - _ -> throwError $ "Unknown data format " ++ show dataFormat + _ -> throwError $ "Unknown data format " <> T.pack (show dataFormat) let totalBytes = fromIntegral $ numComponents * bytesPerComponent payload <- if totalBytes <= 4 -- data is right here then lift $ fmt <* skip (4 - totalBytes) diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs index 74b8e1bb2..f13139fa2 100644 --- a/src/Text/Pandoc/Logging.hs +++ b/src/Text/Pandoc/Logging.hs @@ -31,7 +31,6 @@ import Data.Aeson.Encode.Pretty (Config (..), defConfig, encodePretty', keyOrder) import qualified Data.ByteString.Lazy as BL import Data.Data (Data, toConstr) -import Data.List (isSuffixOf, intercalate) import qualified Data.Text as Text import Data.Typeable (Typeable) import GHC.Generics (Generic) @@ -62,43 +61,43 @@ instance FromYAML Verbosity where _ -> mzero data LogMessage = - SkippedContent String SourcePos - | IgnoredElement String - | CouldNotParseYamlMetadata String SourcePos - | DuplicateLinkReference String SourcePos - | DuplicateNoteReference String SourcePos - | NoteDefinedButNotUsed String SourcePos - | DuplicateIdentifier String SourcePos - | ReferenceNotFound String SourcePos - | CircularReference String SourcePos - | UndefinedToggle String SourcePos - | ParsingUnescaped String SourcePos - | CouldNotLoadIncludeFile String SourcePos - | MacroAlreadyDefined String SourcePos + SkippedContent Text.Text SourcePos + | IgnoredElement Text.Text + | CouldNotParseYamlMetadata Text.Text SourcePos + | DuplicateLinkReference Text.Text SourcePos + | DuplicateNoteReference Text.Text SourcePos + | NoteDefinedButNotUsed Text.Text SourcePos + | DuplicateIdentifier Text.Text SourcePos + | ReferenceNotFound Text.Text SourcePos + | CircularReference Text.Text SourcePos + | UndefinedToggle Text.Text SourcePos + | ParsingUnescaped Text.Text SourcePos + | CouldNotLoadIncludeFile Text.Text SourcePos + | MacroAlreadyDefined Text.Text SourcePos | InlineNotRendered Inline | BlockNotRendered Block - | DocxParserWarning String - | IgnoredIOError String - | CouldNotFetchResource String String - | CouldNotDetermineImageSize String String - | CouldNotConvertImage String String - | CouldNotDetermineMimeType String - | CouldNotConvertTeXMath String String - | CouldNotParseCSS String - | Fetching String - | Extracting String - | NoTitleElement String + | DocxParserWarning Text.Text + | IgnoredIOError Text.Text + | CouldNotFetchResource Text.Text Text.Text + | CouldNotDetermineImageSize Text.Text Text.Text + | CouldNotConvertImage Text.Text Text.Text + | CouldNotDetermineMimeType Text.Text + | CouldNotConvertTeXMath Text.Text Text.Text + | CouldNotParseCSS Text.Text + | Fetching Text.Text + | Extracting Text.Text + | NoTitleElement Text.Text | NoLangSpecified - | InvalidLang String - | CouldNotHighlight String - | MissingCharacter String - | Deprecated String String - | NoTranslation String - | CouldNotLoadTranslations String String - | UnusualConversion String - | UnexpectedXmlElement String String - | UnknownOrgExportOption String - | CouldNotDeduceFormat [String] String + | InvalidLang Text.Text + | CouldNotHighlight Text.Text + | MissingCharacter Text.Text + | Deprecated Text.Text Text.Text + | NoTranslation Text.Text + | CouldNotLoadTranslations Text.Text Text.Text + | UnusualConversion Text.Text + | UnexpectedXmlElement Text.Text Text.Text + | UnknownOrgExportOption Text.Text + | CouldNotDeduceFormat [Text.Text] Text.Text deriving (Show, Eq, Data, Ord, Typeable, Generic) instance ToJSON LogMessage where @@ -107,65 +106,65 @@ instance ToJSON LogMessage where "type" .= toJSON (show $ toConstr x) : case x of SkippedContent s pos -> - ["contents" .= Text.pack s, - "source" .= Text.pack (sourceName pos), + ["contents" .= s, + "source" .= sourceName pos, "line" .= sourceLine pos, "column" .= sourceColumn pos] IgnoredElement s -> - ["contents" .= Text.pack s] + ["contents" .= s] CouldNotParseYamlMetadata s pos -> - ["message" .= Text.pack s, - "source" .= Text.pack (sourceName pos), + ["message" .= s, + "source" .= sourceName pos, "line" .= toJSON (sourceLine pos), "column" .= toJSON (sourceColumn pos)] DuplicateLinkReference s pos -> - ["contents" .= Text.pack s, - "source" .= Text.pack (sourceName pos), + ["contents" .= s, + "source" .= sourceName pos, "line" .= toJSON (sourceLine pos), "column" .= toJSON (sourceColumn pos)] NoteDefinedButNotUsed s pos -> - ["key" .= Text.pack s, - "source" .= Text.pack (sourceName pos), + ["key" .= s, + "source" .= sourceName pos, "line" .= toJSON (sourceLine pos), "column" .= toJSON (sourceColumn pos)] DuplicateNoteReference s pos -> - ["contents" .= Text.pack s, - "source" .= Text.pack (sourceName pos), + ["contents" .= s, + "source" .= sourceName pos, "line" .= toJSON (sourceLine pos), "column" .= toJSON (sourceColumn pos)] DuplicateIdentifier s pos -> - ["contents" .= Text.pack s, - "source" .= Text.pack (sourceName pos), + ["contents" .= s, + "source" .= sourceName pos, "line" .= toJSON (sourceLine pos), "column" .= toJSON (sourceColumn pos)] ReferenceNotFound s pos -> - ["contents" .= Text.pack s, - "source" .= Text.pack (sourceName pos), + ["contents" .= s, + "source" .= sourceName pos, "line" .= toJSON (sourceLine pos), "column" .= toJSON (sourceColumn pos)] CircularReference s pos -> - ["contents" .= Text.pack s, - "source" .= Text.pack (sourceName pos), + ["contents" .= s, + "source" .= sourceName pos, "line" .= toJSON (sourceLine pos), "column" .= toJSON (sourceColumn pos)] UndefinedToggle s pos -> - ["contents" .= Text.pack s, - "source" .= Text.pack (sourceName pos), + ["contents" .= s, + "source" .= sourceName pos, "line" .= toJSON (sourceLine pos), "column" .= toJSON (sourceColumn pos)] ParsingUnescaped s pos -> - ["contents" .= Text.pack s, - "source" .= Text.pack (sourceName pos), + ["contents" .= s, + "source" .= sourceName pos, "line" .= toJSON (sourceLine pos), "column" .= toJSON (sourceColumn pos)] CouldNotLoadIncludeFile fp pos -> - ["path" .= Text.pack fp, - "source" .= Text.pack (sourceName pos), + ["path" .= fp, + "source" .= sourceName pos, "line" .= toJSON (sourceLine pos), "column" .= toJSON (sourceColumn pos)] MacroAlreadyDefined name pos -> - ["name" .= Text.pack name, - "source" .= Text.pack (sourceName pos), + ["name" .= name, + "source" .= sourceName pos, "line" .= toJSON (sourceLine pos), "column" .= toJSON (sourceColumn pos)] InlineNotRendered il -> @@ -173,60 +172,60 @@ instance ToJSON LogMessage where BlockNotRendered bl -> ["contents" .= toJSON bl] DocxParserWarning s -> - ["contents" .= Text.pack s] + ["contents" .= s] IgnoredIOError s -> - ["contents" .= Text.pack s] + ["contents" .= s] CouldNotFetchResource fp s -> - ["path" .= Text.pack fp, - "message" .= Text.pack s] + ["path" .= fp, + "message" .= s] CouldNotDetermineImageSize fp s -> - ["path" .= Text.pack fp, - "message" .= Text.pack s] + ["path" .= fp, + "message" .= s] CouldNotConvertImage fp s -> - ["path" .= Text.pack fp, - "message" .= Text.pack s] + ["path" .= fp, + "message" .= s] CouldNotDetermineMimeType fp -> - ["path" .= Text.pack fp] + ["path" .= fp] CouldNotConvertTeXMath s msg -> - ["contents" .= Text.pack s, - "message" .= Text.pack msg] + ["contents" .= s, + "message" .= msg] CouldNotParseCSS msg -> - ["message" .= Text.pack msg] + ["message" .= msg] Fetching fp -> - ["path" .= Text.pack fp] + ["path" .= fp] Extracting fp -> - ["path" .= Text.pack fp] + ["path" .= fp] NoTitleElement fallback -> - ["fallback" .= Text.pack fallback] + ["fallback" .= fallback] NoLangSpecified -> [] InvalidLang s -> - ["lang" .= Text.pack s] + ["lang" .= s] CouldNotHighlight msg -> - ["message" .= Text.pack msg] + ["message" .= msg] MissingCharacter msg -> - ["message" .= Text.pack msg] + ["message" .= msg] Deprecated thing msg -> - ["thing" .= Text.pack thing, - "message" .= Text.pack msg] + ["thing" .= thing, + "message" .= msg] NoTranslation term -> - ["term" .= Text.pack term] + ["term" .= term] CouldNotLoadTranslations lang msg -> - ["lang" .= Text.pack lang, - "message" .= Text.pack msg] + ["lang" .= lang, + "message" .= msg] UnusualConversion msg -> - ["message" .= Text.pack msg] + ["message" .= msg] UnexpectedXmlElement element parent -> - ["element" .= Text.pack element, - "parent" .= Text.pack parent] + ["element" .= element, + "parent" .= parent] UnknownOrgExportOption option -> - ["option" .= Text.pack option] + ["option" .= option] CouldNotDeduceFormat exts format -> - ["extensions" .= map Text.pack exts - ,"format" .= Text.pack format] + ["extensions" .= exts + ,"format" .= format] -showPos :: SourcePos -> String -showPos pos = sn ++ "line " ++ +showPos :: SourcePos -> Text.Text +showPos pos = Text.pack $ sn ++ "line " ++ show (sourceLine pos) ++ " column " ++ show (sourceColumn pos) where sn = if sourceName pos == "source" || sourceName pos == "" then "" @@ -238,140 +237,140 @@ encodeLogMessages ms = keyOrder [ "type", "verbosity", "contents", "message", "path", "source", "line", "column" ] } ms -showLogMessage :: LogMessage -> String +showLogMessage :: LogMessage -> Text.Text showLogMessage msg = case msg of SkippedContent s pos -> - "Skipped '" ++ s ++ "' at " ++ showPos pos + "Skipped '" <> s <> "' at " <> showPos pos IgnoredElement s -> - "Ignored element " ++ s + "Ignored element " <> s CouldNotParseYamlMetadata s pos -> - "Could not parse YAML metadata at " ++ showPos pos ++ - if null s then "" else ": " ++ s + "Could not parse YAML metadata at " <> showPos pos <> + if Text.null s then "" else ": " <> s DuplicateLinkReference s pos -> - "Duplicate link reference '" ++ s ++ "' at " ++ showPos pos + "Duplicate link reference '" <> s <> "' at " <> showPos pos DuplicateNoteReference s pos -> - "Duplicate note reference '" ++ s ++ "' at " ++ showPos pos + "Duplicate note reference '" <> s <> "' at " <> showPos pos NoteDefinedButNotUsed s pos -> - "Note with key '" ++ s ++ "' defined at " ++ showPos pos ++ + "Note with key '" <> s <> "' defined at " <> showPos pos <> " but not used." DuplicateIdentifier s pos -> - "Duplicate identifier '" ++ s ++ "' at " ++ showPos pos + "Duplicate identifier '" <> s <> "' at " <> showPos pos ReferenceNotFound s pos -> - "Reference not found for '" ++ s ++ "' at " ++ showPos pos + "Reference not found for '" <> s <> "' at " <> showPos pos CircularReference s pos -> - "Circular reference '" ++ s ++ "' at " ++ showPos pos + "Circular reference '" <> s <> "' at " <> showPos pos UndefinedToggle s pos -> - "Undefined toggle '" ++ s ++ "' at " ++ showPos pos + "Undefined toggle '" <> s <> "' at " <> showPos pos ParsingUnescaped s pos -> - "Parsing unescaped '" ++ s ++ "' at " ++ showPos pos + "Parsing unescaped '" <> s <> "' at " <> showPos pos CouldNotLoadIncludeFile fp pos -> - "Could not load include file '" ++ fp ++ "' at " ++ showPos pos + "Could not load include file '" <> fp <> "' at " <> showPos pos MacroAlreadyDefined name pos -> - "Macro '" ++ name ++ "' already defined, ignoring at " ++ showPos pos + "Macro '" <> name <> "' already defined, ignoring at " <> showPos pos InlineNotRendered il -> - "Not rendering " ++ show il + "Not rendering " <> Text.pack (show il) BlockNotRendered bl -> - "Not rendering " ++ show bl + "Not rendering " <> Text.pack (show bl) DocxParserWarning s -> - "Docx parser warning: " ++ s + "Docx parser warning: " <> s IgnoredIOError s -> - "IO Error (ignored): " ++ s + "IO Error (ignored): " <> s CouldNotFetchResource fp s -> - "Could not fetch resource '" ++ fp ++ "'" ++ - if null s then "" else ": " ++ s + "Could not fetch resource '" <> fp <> "'" <> + if Text.null s then "" else ": " <> s CouldNotDetermineImageSize fp s -> - "Could not determine image size for '" ++ fp ++ "'" ++ - if null s then "" else ": " ++ s + "Could not determine image size for '" <> fp <> "'" <> + if Text.null s then "" else ": " <> s CouldNotConvertImage fp s -> - "Could not convert image '" ++ fp ++ "'" ++ - if null s then "" else ": " ++ s + "Could not convert image '" <> fp <> "'" <> + if Text.null s then "" else ": " <> s CouldNotDetermineMimeType fp -> - "Could not determine mime type for '" ++ fp ++ "'" + "Could not determine mime type for '" <> fp <> "'" CouldNotConvertTeXMath s m -> - "Could not convert TeX math '" ++ s ++ "', rendering as TeX" ++ - if null m then "" else ':' : '\n' : m + "Could not convert TeX math '" <> s <> "', rendering as TeX" <> + if Text.null m then "" else ":\n" <> m CouldNotParseCSS m -> - "Could not parse CSS" ++ if null m then "" else ':' : '\n' : m + "Could not parse CSS" <> if Text.null m then "" else ":\n" <> m Fetching fp -> - "Fetching " ++ fp ++ "..." + "Fetching " <> fp <> "..." Extracting fp -> - "Extracting " ++ fp ++ "..." + "Extracting " <> fp <> "..." NoTitleElement fallback -> - "This document format requires a nonempty <title> element.\n" ++ - "Defaulting to '" ++ fallback ++ "' as the title.\n" ++ - "To specify a title, use 'title' in metadata or " ++ + "This document format requires a nonempty <title> element.\n" <> + "Defaulting to '" <> fallback <> "' as the title.\n" <> + "To specify a title, use 'title' in metadata or " <> "--metadata title=\"...\"." NoLangSpecified -> - "No value for 'lang' was specified in the metadata.\n" ++ + "No value for 'lang' was specified in the metadata.\n" <> "It is recommended that lang be specified for this format." InvalidLang s -> - "Invalid 'lang' value '" ++ s ++ "'.\n" ++ + "Invalid 'lang' value '" <> s <> "'.\n" <> "Use an IETF language tag like 'en-US'." CouldNotHighlight m -> - "Could not highlight code block:\n" ++ m + "Could not highlight code block:\n" <> m MissingCharacter m -> - "Missing character: " ++ m + "Missing character: " <> m Deprecated t m -> - "Deprecated: " ++ t ++ - if null m + "Deprecated: " <> t <> + if Text.null m then "" - else ". " ++ m + else ". " <> m NoTranslation t -> - "The term " ++ t ++ " has no translation defined." + "The term " <> t <> " has no translation defined." CouldNotLoadTranslations lang m -> - "Could not load translations for " ++ lang ++ - if null m then "" else '\n' : m + "Could not load translations for " <> lang <> + if Text.null m then "" else "\n" <> m UnusualConversion m -> - "Unusual conversion: " ++ m + "Unusual conversion: " <> m UnexpectedXmlElement element parent -> - "Unexpected XML element " ++ element ++ " in " ++ parent + "Unexpected XML element " <> element <> " in " <> parent UnknownOrgExportOption option -> - "Ignoring unknown Org export option: " ++ option + "Ignoring unknown Org export option: " <> option CouldNotDeduceFormat exts format -> - "Could not deduce format from file extension " ++ - intercalate " or " exts ++ "\n" ++ - "Defaulting to " ++ format + "Could not deduce format from file extension " <> + Text.intercalate " or " exts <> "\n" <> + "Defaulting to " <> format -messageVerbosity:: LogMessage -> Verbosity +messageVerbosity :: LogMessage -> Verbosity messageVerbosity msg = case msg of - SkippedContent{} -> INFO - IgnoredElement{} -> INFO - CouldNotParseYamlMetadata{} -> WARNING - DuplicateLinkReference{} -> WARNING - DuplicateNoteReference{} -> WARNING - NoteDefinedButNotUsed{} -> WARNING - DuplicateIdentifier{} -> WARNING - ReferenceNotFound{} -> WARNING - CircularReference{} -> WARNING - UndefinedToggle{} -> WARNING + SkippedContent{} -> INFO + IgnoredElement{} -> INFO + CouldNotParseYamlMetadata{} -> WARNING + DuplicateLinkReference{} -> WARNING + DuplicateNoteReference{} -> WARNING + NoteDefinedButNotUsed{} -> WARNING + DuplicateIdentifier{} -> WARNING + ReferenceNotFound{} -> WARNING + CircularReference{} -> WARNING + UndefinedToggle{} -> WARNING CouldNotLoadIncludeFile f _ - | ".sty" `isSuffixOf` f -> INFO - | otherwise -> WARNING - MacroAlreadyDefined{} -> WARNING - ParsingUnescaped{} -> INFO - InlineNotRendered{} -> INFO - BlockNotRendered{} -> INFO - DocxParserWarning{} -> INFO - IgnoredIOError{} -> WARNING - CouldNotFetchResource{} -> WARNING - CouldNotDetermineImageSize{} -> WARNING - CouldNotConvertImage{} -> WARNING - CouldNotDetermineMimeType{} -> WARNING - CouldNotConvertTeXMath{} -> WARNING - CouldNotParseCSS{} -> WARNING - Fetching{} -> INFO - Extracting{} -> INFO - NoTitleElement{} -> WARNING - NoLangSpecified -> INFO - InvalidLang{} -> WARNING - CouldNotHighlight{} -> WARNING - MissingCharacter{} -> WARNING - Deprecated{} -> WARNING - NoTranslation{} -> WARNING - CouldNotLoadTranslations{} -> WARNING - UnusualConversion {} -> WARNING - UnexpectedXmlElement {} -> WARNING - UnknownOrgExportOption {} -> WARNING - CouldNotDeduceFormat{} -> WARNING + | ".sty" `Text.isSuffixOf` f -> INFO + | otherwise -> WARNING + MacroAlreadyDefined{} -> WARNING + ParsingUnescaped{} -> INFO + InlineNotRendered{} -> INFO + BlockNotRendered{} -> INFO + DocxParserWarning{} -> INFO + IgnoredIOError{} -> WARNING + CouldNotFetchResource{} -> WARNING + CouldNotDetermineImageSize{} -> WARNING + CouldNotConvertImage{} -> WARNING + CouldNotDetermineMimeType{} -> WARNING + CouldNotConvertTeXMath{} -> WARNING + CouldNotParseCSS{} -> WARNING + Fetching{} -> INFO + Extracting{} -> INFO + NoTitleElement{} -> WARNING + NoLangSpecified -> INFO + InvalidLang{} -> WARNING + CouldNotHighlight{} -> WARNING + MissingCharacter{} -> WARNING + Deprecated{} -> WARNING + NoTranslation{} -> WARNING + CouldNotLoadTranslations{} -> WARNING + UnusualConversion {} -> WARNING + UnexpectedXmlElement {} -> WARNING + UnknownOrgExportOption {} -> WARNING + CouldNotDeduceFormat{} -> WARNING diff --git a/src/Text/Pandoc/Lua/Filter.hs b/src/Text/Pandoc/Lua/Filter.hs index 9416bf41f..74c7058f3 100644 --- a/src/Text/Pandoc/Lua/Filter.hs +++ b/src/Text/Pandoc/Lua/Filter.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE NoImplicitPrelude #-} {- | Module : Text.Pandoc.Lua.Filter @@ -180,7 +180,7 @@ constructorsFor :: DataType -> [String] constructorsFor x = map show (dataTypeConstrs x) inlineElementNames :: [String] -inlineElementNames = "Inline" : constructorsFor (dataTypeOf (Str [])) +inlineElementNames = "Inline" : constructorsFor (dataTypeOf (Str mempty)) blockElementNames :: [String] blockElementNames = "Block" : constructorsFor (dataTypeOf (Para [])) diff --git a/src/Text/Pandoc/Lua/Global.hs b/src/Text/Pandoc/Lua/Global.hs index b9b6c9cd9..20963f831 100644 --- a/src/Text/Pandoc/Lua/Global.hs +++ b/src/Text/Pandoc/Lua/Global.hs @@ -27,11 +27,12 @@ import Text.Pandoc.Lua.Marshaling () import Text.Pandoc.Lua.Util (addFunction) import Text.Pandoc.Options (ReaderOptions) +import qualified Data.Text as Text import qualified Foreign.Lua as Lua -- | Permissible global Lua variables. data Global = - FORMAT String + FORMAT Text.Text | PANDOC_API_VERSION | PANDOC_DOCUMENT Pandoc | PANDOC_READER_OPTIONS ReaderOptions diff --git a/src/Text/Pandoc/Lua/Init.hs b/src/Text/Pandoc/Lua/Init.hs index f1cab7e82..cf6c71231 100644 --- a/src/Text/Pandoc/Lua/Init.hs +++ b/src/Text/Pandoc/Lua/Init.hs @@ -28,13 +28,14 @@ import Text.Pandoc.Lua.Packages (LuaPackageParams (..), installPandocPackageSearcher) import Text.Pandoc.Lua.Util (loadScriptFromDataDir) +import qualified Data.Text as Text import qualified Foreign.Lua as Lua import qualified Foreign.Lua.Module.Text as Lua import qualified Text.Pandoc.Definition as Pandoc import qualified Text.Pandoc.Lua.Module.Pandoc as ModulePandoc -- | Lua error message -newtype LuaException = LuaException String deriving (Show) +newtype LuaException = LuaException Text.Text deriving (Show) -- | Run the lua interpreter, using pandoc's default way of environment -- initialization. @@ -56,7 +57,7 @@ runLua luaOp = do return (opResult, st) liftIO $ setForeignEncoding enc case res of - Left (Lua.Exception msg) -> return $ Left (LuaException msg) + Left (Lua.Exception msg) -> return $ Left (LuaException $ Text.pack msg) Right (x, newState) -> do putCommonState newState return $ Right x diff --git a/src/Text/Pandoc/Lua/Marshaling/CommonState.hs b/src/Text/Pandoc/Lua/Marshaling/CommonState.hs index eed1500ec..b65396f68 100644 --- a/src/Text/Pandoc/Lua/Marshaling/CommonState.hs +++ b/src/Text/Pandoc/Lua/Marshaling/CommonState.hs @@ -1,6 +1,7 @@ {-# OPTIONS_GHC -fno-warn-orphans #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Lua.Marshaling.CommonState Copyright : © 2012-2019 John MacFarlane @@ -23,6 +24,7 @@ import Text.Pandoc.Logging (LogMessage, showLogMessage) import Text.Pandoc.Lua.Marshaling.AnyValue (AnyValue (..)) import qualified Data.Map as Map +import qualified Data.Text as Text import qualified Foreign.Lua as Lua import qualified Text.Pandoc.Lua.Util as LuaUtil @@ -46,7 +48,7 @@ indexCommonState st (AnyValue idx) = Lua.ltype idx >>= \case Lua.TypeString -> 1 <$ (Lua.peek idx >>= pushField) _ -> 1 <$ Lua.pushnil where - pushField :: String -> Lua () + pushField :: Text.Text -> Lua () pushField name = case lookup name commonStateFields of Just pushValue -> pushValue st Nothing -> Lua.pushnil @@ -71,7 +73,7 @@ pairsCommonState st = do (nextKey, pushValue):_ -> 2 <$ (Lua.push nextKey *> pushValue st) _ -> 2 <$ (Lua.pushnil *> Lua.pushnil) -commonStateFields :: [(String, CommonState -> Lua ())] +commonStateFields :: [(Text.Text, CommonState -> Lua ())] commonStateFields = [ ("input_files", Lua.push . stInputFiles) , ("output_file", Lua.push . Lua.Optional . stOutputFile) @@ -98,5 +100,5 @@ instance Pushable LogMessage where pushLogMessageMetatable = ensureUserdataMetatable logMessageTypeName $ LuaUtil.addFunction "__tostring" tostringLogMessage -tostringLogMessage :: LogMessage -> Lua String +tostringLogMessage :: LogMessage -> Lua Text.Text tostringLogMessage = return . showLogMessage diff --git a/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs b/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs index 5395f6fc8..226fe2e71 100644 --- a/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs +++ b/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs @@ -25,6 +25,7 @@ import Text.Pandoc.Lua.Marshaling.CommonState () import Text.Pandoc.Options (ReaderOptions (..), TrackChanges) import qualified Data.Set as Set +import qualified Data.Text as Text import qualified Foreign.Lua as Lua import qualified Text.Pandoc.Lua.Util as LuaUtil @@ -44,9 +45,9 @@ instance Pushable ReaderOptions where (standalone :: Bool) (columns :: Int) (tabStop :: Int) - (indentedCodeClasses :: [String]) - (abbreviations :: Set.Set String) - (defaultImageExtension :: String) + (indentedCodeClasses :: [Text.Text]) + (abbreviations :: Set.Set Text.Text) + (defaultImageExtension :: Text.Text) (trackChanges :: TrackChanges) (stripComments :: Bool) = ro diff --git a/src/Text/Pandoc/Lua/Module/MediaBag.hs b/src/Text/Pandoc/Lua/Module/MediaBag.hs index 261785665..951571ddd 100644 --- a/src/Text/Pandoc/Lua/Module/MediaBag.hs +++ b/src/Text/Pandoc/Lua/Module/MediaBag.hs @@ -15,7 +15,6 @@ module Text.Pandoc.Lua.Module.MediaBag import Prelude import Control.Monad (zipWithM_) -import Data.Maybe (fromMaybe) import Foreign.Lua (Lua, NumResults, Optional, liftIO) import Text.Pandoc.Class (CommonState (..), fetchItem, putCommonState, runIOorExplode, setMediaBag) @@ -25,6 +24,7 @@ import Text.Pandoc.Lua.Util (addFunction) import Text.Pandoc.MIME (MimeType) import qualified Data.ByteString.Lazy as BL +import qualified Data.Text as T import qualified Foreign.Lua as Lua import qualified Text.Pandoc.MediaBag as MB @@ -113,7 +113,7 @@ mediaDirectoryFn = do Lua.push "length" *> Lua.push contentLength *> Lua.rawset (-3) Lua.rawseti (-2) idx -fetch :: String +fetch :: T.Text -> Lua NumResults fetch src = do commonState <- getCommonState @@ -122,6 +122,6 @@ fetch src = do putCommonState commonState setMediaBag mediaBag fetchItem src - Lua.push $ fromMaybe "" mimeType + Lua.push $ maybe "" T.unpack mimeType Lua.push bs return 2 -- returns 2 values: contents, mimetype diff --git a/src/Text/Pandoc/Lua/Module/Pandoc.hs b/src/Text/Pandoc/Lua/Module/Pandoc.hs index 182008da7..36d6f4009 100644 --- a/src/Text/Pandoc/Lua/Module/Pandoc.hs +++ b/src/Text/Pandoc/Lua/Module/Pandoc.hs @@ -1,5 +1,6 @@ -{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Lua.Module.Pandoc Copyright : Copyright © 2017-2019 Albert Krewinkel @@ -19,7 +20,6 @@ import Control.Monad (when) import Control.Monad.Except (throwError) import Data.Default (Default (..)) import Data.Maybe (fromMaybe) -import Data.Text (pack) import Foreign.Lua (Lua, NumResults, Optional, Peekable, Pushable) import System.Exit (ExitCode (..)) import Text.Pandoc.Class (runIO) @@ -33,6 +33,7 @@ import Text.Pandoc.Readers (Reader (..), getReader) import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BSL +import qualified Data.Text as T import qualified Foreign.Lua as Lua import qualified Text.Pandoc.Lua.Util as LuaUtil import Text.Pandoc.Error @@ -59,22 +60,22 @@ walkInline = walkElement walkBlock :: Block -> LuaFilter -> Lua Block walkBlock = walkElement -readDoc :: String -> Optional String -> Lua NumResults +readDoc :: T.Text -> Optional T.Text -> Lua NumResults readDoc content formatSpecOrNil = do let formatSpec = fromMaybe "markdown" (Lua.fromOptional formatSpecOrNil) res <- Lua.liftIO . runIO $ getReader formatSpec >>= \(rdr,es) -> case rdr of TextReader r -> - r def{ readerExtensions = es } (pack content) + r def{ readerExtensions = es } content _ -> throwError $ PandocSomeError $ "Only textual formats are supported" case res of Right pd -> (1 :: NumResults) <$ Lua.push pd -- success, push Pandoc Left (PandocUnknownReaderError f) -> Lua.raiseError $ - "Unknown reader: " ++ f + "Unknown reader: " <> f Left (PandocUnsupportedExtensionError e f) -> Lua.raiseError $ - "Extension " ++ e ++ " not supported for " ++ f + "Extension " <> e <> " not supported for " <> f Left e -> Lua.raiseError $ show e -- | Pipes input through a command. @@ -86,10 +87,10 @@ pipeFn command args input = do (ec, output) <- Lua.liftIO $ pipeProcess Nothing command args input case ec of ExitSuccess -> 1 <$ Lua.push output - ExitFailure n -> Lua.raiseError (PipeError command n output) + ExitFailure n -> Lua.raiseError (PipeError (T.pack command) n output) data PipeError = PipeError - { pipeErrorCommand :: String + { pipeErrorCommand :: T.Text , pipeErrorCode :: Int , pipeErrorOutput :: BL.ByteString } @@ -118,7 +119,7 @@ instance Pushable PipeError where pipeErrorMessage :: PipeError -> Lua BL.ByteString pipeErrorMessage (PipeError cmd errorCode output) = return $ mconcat [ BSL.pack "Error running " - , BSL.pack cmd + , BSL.pack $ T.unpack cmd , BSL.pack " (error code " , BSL.pack $ show errorCode , BSL.pack "): " diff --git a/src/Text/Pandoc/Lua/Module/Utils.hs b/src/Text/Pandoc/Lua/Module/Utils.hs index 057e6580b..7d6dd0fab 100644 --- a/src/Text/Pandoc/Lua/Module/Utils.hs +++ b/src/Text/Pandoc/Lua/Module/Utils.hs @@ -15,7 +15,6 @@ module Text.Pandoc.Lua.Module.Utils import Prelude import Control.Applicative ((<|>)) -import Data.Char (toLower) import Data.Default (def) import Data.Version (Version) import Foreign.Lua (Peekable, Lua, NumResults) @@ -27,6 +26,7 @@ import Text.Pandoc.Lua.Util (addFunction) import qualified Data.Digest.Pure.SHA as SHA import qualified Data.ByteString.Lazy as BSL +import qualified Data.Text as T import qualified Foreign.Lua as Lua import qualified Text.Pandoc.Builder as B import qualified Text.Pandoc.Filter.JSON as JSONFilter @@ -64,7 +64,7 @@ makeSections number baselevel = -- limit years to the range 1601-9999 (ISO 8601 accepts greater than -- or equal to 1583, but MS Word only accepts dates starting 1601). -- Returns nil instead of a string if the conversion failed. -normalizeDate :: String -> Lua (Lua.Optional String) +normalizeDate :: T.Text -> Lua (Lua.Optional T.Text) normalizeDate = return . Lua.Optional . Shared.normalizeDate -- | Run a JSON filter on the given document. @@ -88,13 +88,13 @@ runJSONFilter mbDatadir doc filterFile optArgs = do -- | Calculate the hash of the given contents. sha1 :: BSL.ByteString - -> Lua String -sha1 = return . SHA.showDigest . SHA.sha1 + -> Lua T.Text +sha1 = return . T.pack . SHA.showDigest . SHA.sha1 -- | Convert pandoc structure to a string with formatting removed. -- Footnotes are skipped (since we don't want their contents in link -- labels). -stringify :: AstElement -> Lua String +stringify :: AstElement -> Lua T.Text stringify el = return $ case el of PandocElement pd -> Shared.stringify pd InlineElement i -> Shared.stringify i @@ -102,11 +102,11 @@ stringify el = return $ case el of MetaElement m -> Shared.stringify m CitationElement c -> Shared.stringify c MetaValueElement m -> stringifyMetaValue m - _ -> "" + _ -> mempty -stringifyMetaValue :: MetaValue -> String +stringifyMetaValue :: MetaValue -> T.Text stringifyMetaValue mv = case mv of - MetaBool b -> map toLower (show b) + MetaBool b -> T.toLower $ T.pack (show b) MetaString s -> s _ -> Shared.stringify mv @@ -139,5 +139,5 @@ instance Peekable AstElement where "Expected an AST element, but could not parse value as such." -- | Convert a number < 4000 to uppercase roman numeral. -toRomanNumeral :: Lua.Integer -> Lua String +toRomanNumeral :: Lua.Integer -> Lua T.Text toRomanNumeral = return . Shared.toRomanNumeral . fromIntegral diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs index ee0fe3efb..77f4c4b96 100644 --- a/src/Text/Pandoc/MIME.hs +++ b/src/Text/Pandoc/MIME.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.MIME Copyright : Copyright (C) 2011-2019 John MacFarlane @@ -13,14 +14,13 @@ Mime type lookup for ODT writer. module Text.Pandoc.MIME ( MimeType, getMimeType, getMimeTypeDef, extensionFromMimeType, mediaCategory ) where import Prelude -import Data.Char (toLower) import Data.List (isPrefixOf, isSuffixOf) -import Data.List.Split (splitOn) import qualified Data.Map as M +import qualified Data.Text as T import Data.Maybe (fromMaybe, listToMaybe) import System.FilePath -type MimeType = String +type MimeType = T.Text -- | Determine mime type appropriate for file path. getMimeType :: FilePath -> Maybe MimeType @@ -31,34 +31,34 @@ getMimeType fp | "Formula-" `isPrefixOf` fp && "/" `isSuffixOf` fp = Just "application/vnd.oasis.opendocument.formula" -- generic - | otherwise = M.lookup (map toLower $ drop 1 $ takeExtension fp) mimeTypes + | otherwise = M.lookup (T.toLower $ T.drop 1 $ T.pack $ takeExtension fp) mimeTypes -- | Determime mime type appropriate for file path, defaulting to -- “application/octet-stream” if nothing else fits. getMimeTypeDef :: FilePath -> MimeType getMimeTypeDef = fromMaybe "application/octet-stream" . getMimeType -extensionFromMimeType :: MimeType -> Maybe String +extensionFromMimeType :: MimeType -> Maybe T.Text extensionFromMimeType mimetype = - M.lookup (takeWhile (/=';') mimetype) reverseMimeTypes + M.lookup (T.takeWhile (/=';') mimetype) reverseMimeTypes -- note: we just look up the basic mime type, dropping the content-encoding etc. -- | Determine general media category for file path, e.g. -- -- prop> mediaCategory "foo.jpg" = Just "image" -mediaCategory :: FilePath -> Maybe String -mediaCategory fp = getMimeType fp >>= listToMaybe . splitOn "/" +mediaCategory :: FilePath -> Maybe T.Text +mediaCategory fp = getMimeType fp >>= listToMaybe . T.splitOn "/" -reverseMimeTypes :: M.Map MimeType String +reverseMimeTypes :: M.Map MimeType T.Text reverseMimeTypes = M.fromList $ map (\(k,v) -> (v,k)) mimeTypesList -mimeTypes :: M.Map String MimeType +mimeTypes :: M.Map T.Text MimeType mimeTypes = M.fromList mimeTypesList -- | Collection of common mime types. -- Except for first entry, list borrowed from -- <https://github.com/Happstack/happstack-server/blob/master/src/Happstack/Server/FileServe/BuildingBlocks.hs happstack-server> -mimeTypesList :: [(String, MimeType)] +mimeTypesList :: [(T.Text, MimeType)] mimeTypesList = [("cpt","image/x-corelphotopaint") ,("gz","application/x-gzip") diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs index bb6fc88ac..87af5c7f8 100644 --- a/src/Text/Pandoc/MediaBag.hs +++ b/src/Text/Pandoc/MediaBag.hs @@ -36,7 +36,7 @@ import Text.Pandoc.MIME (MimeType, getMimeTypeDef) -- mime types. Note that a 'MediaBag' is a Monoid, so 'mempty' -- can be used for an empty 'MediaBag', and '<>' can be used to append -- two 'MediaBag's. -newtype MediaBag = MediaBag (M.Map [String] (MimeType, BL.ByteString)) +newtype MediaBag = MediaBag (M.Map [FilePath] (MimeType, BL.ByteString)) deriving (Semigroup, Monoid, Data, Typeable) instance Show MediaBag where @@ -72,12 +72,12 @@ lookupMedia fp (MediaBag mediamap) = M.lookup (splitDirectories fp) mediamap -- | Get a list of the file paths stored in a 'MediaBag', with -- their corresponding mime types and the lengths in bytes of the contents. -mediaDirectory :: MediaBag -> [(String, MimeType, Int)] +mediaDirectory :: MediaBag -> [(FilePath, MimeType, Int)] mediaDirectory (MediaBag mediamap) = M.foldrWithKey (\fp (mime,contents) -> ((Posix.joinPath fp, mime, fromIntegral $ BL.length contents):)) [] mediamap -mediaItems :: MediaBag -> [(String, MimeType, BL.ByteString)] +mediaItems :: MediaBag -> [(FilePath, MimeType, BL.ByteString)] mediaItems (MediaBag mediamap) = M.foldrWithKey (\fp (mime,contents) -> ((Posix.joinPath fp, mime, contents):)) [] mediamap diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 66193ef60..0fe80be4e 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -36,9 +36,10 @@ module Text.Pandoc.Options ( module Text.Pandoc.Extensions import Prelude import Control.Applicative ((<|>)) import Data.Char (toLower) +import Data.Maybe (fromMaybe) import Data.Data (Data) import Data.Default -import Data.Text (Text, unpack) +import Data.Text (Text) import Text.DocTemplates (Context(..)) import qualified Data.Set as Set import Data.Typeable (Typeable) @@ -46,7 +47,7 @@ import GHC.Generics (Generic) import Skylighting (SyntaxMap, defaultSyntaxMap) import Text.Pandoc.Extensions import Text.Pandoc.Highlighting (Style, pygments) -import Text.Pandoc.Shared (camelCaseToHyphenated) +import Text.Pandoc.Shared (camelCaseStrToHyphenated) import Text.DocTemplates (Template) import Data.Aeson.TH (deriveJSON, defaultOptions, Options(..), SumEncoding(..)) @@ -60,10 +61,10 @@ data ReaderOptions = ReaderOptions{ , readerStandalone :: Bool -- ^ Standalone document with header , readerColumns :: Int -- ^ Number of columns in terminal , readerTabStop :: Int -- ^ Tab stop - , readerIndentedCodeClasses :: [String] -- ^ Default classes for + , readerIndentedCodeClasses :: [Text] -- ^ Default classes for -- indented code blocks - , readerAbbreviations :: Set.Set String -- ^ Strings to treat as abbreviations - , readerDefaultImageExtension :: String -- ^ Default extension for images + , readerAbbreviations :: Set.Set Text -- ^ Strings to treat as abbreviations + , readerDefaultImageExtension :: Text -- ^ Default extension for images , readerTrackChanges :: TrackChanges -- ^ Track changes setting for docx , readerStripComments :: Bool -- ^ Strip HTML comments instead of parsing as raw HTML } deriving (Show, Read, Data, Typeable, Generic) @@ -84,7 +85,7 @@ instance Default ReaderOptions , readerStripComments = False } -defaultAbbrevs :: Set.Set String +defaultAbbrevs :: Set.Set Text defaultAbbrevs = Set.fromList [ "Mr.", "Mrs.", "Ms.", "Capt.", "Dr.", "Prof.", "Gen.", "Gov.", "e.g.", "i.e.", "Sgt.", "St.", @@ -99,11 +100,11 @@ defaultAbbrevs = Set.fromList data EPUBVersion = EPUB2 | EPUB3 deriving (Eq, Show, Read, Data, Typeable, Generic) data HTMLMathMethod = PlainMath - | WebTeX String -- url of TeX->image script. + | WebTeX Text -- url of TeX->image script. | GladTeX | MathML - | MathJax String -- url of MathJax.js - | KaTeX String -- url of KaTeX files + | MathJax Text -- url of MathJax.js + | KaTeX Text -- url of KaTeX files deriving (Show, Read, Eq, Data, Typeable, Generic) instance FromYAML HTMLMathMethod where @@ -111,18 +112,18 @@ instance FromYAML HTMLMathMethod where (withMap "HTMLMathMethod" $ \m -> do method <- m .: "method" mburl <- m .:? "url" - case unpack method of + case method :: Text of "plain" -> return PlainMath - "webtex" -> return $ WebTeX $ maybe "" unpack mburl + "webtex" -> return $ WebTeX $ fromMaybe "" mburl "gladtex" -> return GladTeX "mathml" -> return MathML "mathjax" -> return $ MathJax $ - maybe defaultMathJaxURL unpack mburl + fromMaybe defaultMathJaxURL mburl "katex" -> return $ KaTeX $ - maybe defaultKaTeXURL unpack mburl + fromMaybe defaultKaTeXURL mburl _ -> fail $ "Unknown HTML math method " ++ show method) node <|> (withStr "HTMLMathMethod" $ \method -> - case unpack method of + case method of "plain" -> return PlainMath "webtex" -> return $ WebTeX "" "gladtex" -> return GladTeX @@ -246,7 +247,7 @@ data WriterOptions = WriterOptions , writerWrapText :: WrapOption -- ^ Option for wrapping text , writerColumns :: Int -- ^ Characters in a line (for text wrapping) , writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails - , writerIdentifierPrefix :: String -- ^ Prefix for section & note ids in HTML + , writerIdentifierPrefix :: Text -- ^ Prefix for section & note ids in HTML -- and for footnote marks in markdown , writerCiteMethod :: CiteMethod -- ^ How to print cites , writerHtmlQTags :: Bool -- ^ Use @<q>@ tags for quotes in HTML @@ -256,8 +257,8 @@ data WriterOptions = WriterOptions , writerHighlightStyle :: Maybe Style -- ^ Style to use for highlighting -- (Nothing = no highlighting) , writerSetextHeaders :: Bool -- ^ Use setext headers for levels 1-2 in markdown - , writerEpubSubdirectory :: String -- ^ Subdir for epub in OCF - , writerEpubMetadata :: Maybe String -- ^ Metadata to include in EPUB + , writerEpubSubdirectory :: Text -- ^ Subdir for epub in OCF + , writerEpubMetadata :: Maybe Text -- ^ Metadata to include in EPUB , writerEpubFonts :: [FilePath] -- ^ Paths to fonts to embed , writerEpubChapterLevel :: Int -- ^ Header level for chapters (separate files) , writerTOCDepth :: Int -- ^ Number of levels to include in TOC @@ -309,10 +310,10 @@ instance HasSyntaxExtensions WriterOptions where isEnabled :: HasSyntaxExtensions a => Extension -> a -> Bool isEnabled ext opts = ext `extensionEnabled` getExtensions opts -defaultMathJaxURL :: String +defaultMathJaxURL :: Text defaultMathJaxURL = "https://cdn.jsdelivr.net/npm/mathjax@3/es5/" -defaultKaTeXURL :: String +defaultKaTeXURL :: Text defaultKaTeXURL = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.11.1/" $(deriveJSON defaultOptions ''ReaderOptions) @@ -325,7 +326,7 @@ $(deriveJSON defaultOptions{ } ''HTMLMathMethod) $(deriveJSON defaultOptions{ constructorTagModifier = - camelCaseToHyphenated + camelCaseStrToHyphenated } ''CiteMethod) $(deriveJSON defaultOptions{ constructorTagModifier = @@ -339,17 +340,17 @@ $(deriveJSON defaultOptions{ constructorTagModifier = $(deriveJSON defaultOptions ''HTMLSlideVariant) $(deriveJSON defaultOptions{ constructorTagModifier = - camelCaseToHyphenated + camelCaseStrToHyphenated } ''TrackChanges) $(deriveJSON defaultOptions{ constructorTagModifier = - camelCaseToHyphenated + camelCaseStrToHyphenated } ''WrapOption) $(deriveJSON defaultOptions{ constructorTagModifier = - camelCaseToHyphenated . drop 8 + camelCaseStrToHyphenated . drop 8 } ''TopLevelDivision) $(deriveJSON defaultOptions{ constructorTagModifier = - camelCaseToHyphenated + camelCaseStrToHyphenated } ''ReferenceLocation) diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index d7e61109f..1d307cdd4 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} @@ -46,7 +46,7 @@ import Text.Pandoc.MIME (getMimeType) import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..)) import Text.Pandoc.Process (pipeProcess) import System.Process (readProcessWithExitCode) -import Text.Pandoc.Shared (inDirectory, stringify) +import Text.Pandoc.Shared (inDirectory, stringify, tshow) import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Walk (walkM) import Text.Pandoc.Writers.Shared (getField, metaToContext) @@ -141,7 +141,7 @@ makeWithWkhtmltopdf program pdfargs writer opts doc@(Pandoc meta _) = do (return . literal . stringify) (return . literal . stringify) meta - let toArgs (f, mbd) = maybe [] (\d -> ['-':'-':f, d]) mbd + let toArgs (f, mbd) = maybe [] (\d -> ["--" <> f, T.unpack d]) mbd let args = pdfargs ++ mathArgs ++ concatMap toArgs [("page-size", getField "papersize" meta') ,("title", getField "title" meta') @@ -173,19 +173,19 @@ handleImages opts tmpdir doc = convertImages :: WriterOptions -> FilePath -> Inline -> PandocIO Inline convertImages opts tmpdir (Image attr ils (src, tit)) = do - img <- liftIO $ convertImage opts tmpdir src + img <- liftIO $ convertImage opts tmpdir $ T.unpack src newPath <- case img of Left e -> do report $ CouldNotConvertImage src e return src - Right fp -> return fp + Right fp -> return $ T.pack fp return (Image attr ils (newPath, tit)) convertImages _ _ x = return x -- Convert formats which do not work well in pdf to png convertImage :: WriterOptions -> FilePath -> FilePath - -> IO (Either String FilePath) + -> IO (Either Text FilePath) convertImage opts tmpdir fname = do let dpi = show $ writerDpi opts case mime of @@ -202,14 +202,14 @@ convertImage opts tmpdir fname = do then return $ Right pdfOut else return $ Left "conversion from SVG failed") (\(e :: E.SomeException) -> return $ Left $ - "check that rsvg-convert is in path.\n" ++ - show e) + "check that rsvg-convert is in path.\n" <> + tshow e) _ -> JP.readImage fname >>= \res -> case res of - Left e -> return $ Left e + Left e -> return $ Left $ T.pack e Right img -> E.catch (Right pngOut <$ JP.savePngImage pngOut img) $ - \(e :: E.SomeException) -> return (Left (show e)) + \(e :: E.SomeException) -> return (Left (tshow e)) where pngOut = replaceDirectory (replaceExtension fname ".png") tmpdir pdfOut = replaceDirectory (replaceExtension fname ".pdf") tmpdir @@ -262,12 +262,11 @@ missingCharacterWarnings :: Verbosity -> ByteString -> PandocIO () missingCharacterWarnings verbosity log' = do let ls = BC.lines log' let isMissingCharacterWarning = BC.isPrefixOf "Missing character: " - let addCodePoint [] = [] - addCodePoint (c:cs) - | isAscii c = c : addCodePoint cs - | otherwise = c : " (U+" ++ printf "%04X" (ord c) ++ ")" ++ - addCodePoint cs - let warnings = [ addCodePoint (utf8ToString (BC.drop 19 l)) + let toCodePoint c + | isAscii c = T.singleton c + | otherwise = T.pack $ c : " (U+" ++ printf "%04X" (ord c) ++ ")" + let addCodePoint = T.concatMap toCodePoint + let warnings = [ addCodePoint (T.pack $ utf8ToString (BC.drop 19 l)) | l <- ls , isMissingCharacterWarning l ] @@ -513,7 +512,7 @@ showVerboseInfo mbTmpDir program programArgs env source = do handlePDFProgramNotFound :: String -> IE.IOError -> IO a handlePDFProgramNotFound program e | IE.isDoesNotExistError e = - E.throwIO $ PandocPDFProgramNotFoundError program + E.throwIO $ PandocPDFProgramNotFoundError $ T.pack program | otherwise = E.throwIO e utf8ToString :: ByteString -> String diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 68e900004..f56b13b66 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE CPP #-} {-# LANGUAGE ExplicitForAll #-} {-# LANGUAGE FlexibleContexts #-} @@ -7,6 +7,8 @@ {-# LANGUAGE IncoherentInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE TypeSynonymInstances #-} +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Parsing Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -18,13 +20,21 @@ A utility library with parsers used in pandoc readers. -} + module Text.Pandoc.Parsing ( takeWhileP, takeP, + countChar, + textStr, anyLine, anyLineNewline, indentWith, + manyChar, + many1Char, + manyTillChar, + many1TillChar, many1Till, manyUntil, + manyUntilChar, sepBy1', notFollowedBy', oneOfStrings, @@ -183,12 +193,14 @@ import Control.Monad.Reader import Data.Char (chr, isAlphaNum, isAscii, isAsciiUpper, isPunctuation, isSpace, ord, toLower, toUpper) import Data.Default -import Data.List (intercalate, isSuffixOf, transpose) +import Data.Functor (($>)) +import Data.List (intercalate, transpose) import qualified Data.Map as M import Data.Maybe (mapMaybe, fromMaybe) import qualified Data.Set as Set import Data.String import Data.Text (Text) +import qualified Data.Text as T import Text.HTML.TagSoup.Entity (lookupEntity) import Text.Pandoc.Asciify (toAsciiChar) import Text.Pandoc.Builder (Blocks, HasMeta (..), Inlines, trimInlines) @@ -240,45 +252,56 @@ instance (Semigroup a, Monoid a) => Monoid (Future s a) where mempty = return mempty mappend = (<>) +-- | Like @count@, but packs its result +countChar :: (Stream s m Char, Monad m) + => Int + -> ParsecT s st m Char + -> ParsecT s st m Text +countChar n = fmap T.pack . count n + +-- | Like @string@, but uses @Text@. +textStr :: Stream s m Char => Text -> ParsecT s u m Text +textStr t = string (T.unpack t) $> t + -- | Parse characters while a predicate is true. takeWhileP :: Monad m => (Char -> Bool) - -> ParserT [Char] st m [Char] + -> ParserT Text st m Text takeWhileP f = do -- faster than 'many (satisfy f)' inp <- getInput pos <- getPosition - let (xs, rest) = span f inp + let (xs, rest) = T.span f inp -- needed to persuade parsec that this won't match an empty string: anyChar setInput rest - setPosition $ updatePosString pos xs + setPosition $ updatePosString pos $ T.unpack xs return xs -- Parse n characters of input (or the rest of the input if -- there aren't n characters). -takeP :: Monad m => Int -> ParserT [Char] st m [Char] +takeP :: Monad m => Int -> ParserT Text st m Text takeP n = do guard (n > 0) -- faster than 'count n anyChar' inp <- getInput pos <- getPosition - let (xs, rest) = splitAt n inp + let (xs, rest) = T.splitAt n inp -- needed to persuade parsec that this won't match an empty string: anyChar setInput rest - setPosition $ updatePosString pos xs + setPosition $ updatePosString pos $ T.unpack xs return xs -- | Parse any line of text -anyLine :: Monad m => ParserT [Char] st m [Char] +anyLine :: Monad m => ParserT Text st m Text anyLine = do -- This is much faster than: -- manyTill anyChar newline inp <- getInput pos <- getPosition - case break (=='\n') inp of - (this, '\n':rest) -> do + case T.break (=='\n') inp of + (this, T.uncons -> Just ('\n', rest)) -> do -- needed to persuade parsec that this won't match an empty string: anyChar setInput rest @@ -287,20 +310,39 @@ anyLine = do _ -> mzero -- | Parse any line, include the final newline in the output -anyLineNewline :: Monad m => ParserT [Char] st m [Char] -anyLineNewline = (++ "\n") <$> anyLine +anyLineNewline :: Monad m => ParserT Text st m Text +anyLineNewline = (<> "\n") <$> anyLine -- | Parse indent by specified number of spaces (or equiv. tabs) indentWith :: Stream s m Char => HasReaderOptions st - => Int -> ParserT s st m [Char] + => Int -> ParserT s st m Text indentWith num = do tabStop <- getOption readerTabStop if num < tabStop - then count num (char ' ') - else choice [ try (count num (char ' ')) + then countChar num (char ' ') + else choice [ try (countChar num (char ' ')) , try (char '\t' >> indentWith (num - tabStop)) ] +-- | Like @many@, but packs its result. +manyChar :: Stream s m t + => ParserT s st m Char + -> ParserT s st m Text +manyChar = fmap T.pack . many + +-- | Like @many1@, but packs its result. +many1Char :: Stream s m t + => ParserT s st m Char + -> ParserT s st m Text +many1Char = fmap T.pack . many1 + +-- | Like @manyTill@, but packs its result. +manyTillChar :: Stream s m t + => ParserT s st m Char + -> ParserT s st m a + -> ParserT s st m Text +manyTillChar p = fmap T.pack . manyTill p + -- | Like @manyTill@, but reads at least one item. many1Till :: (Show end, Stream s m t) => ParserT s st m a @@ -312,6 +354,13 @@ many1Till p end = do rest <- manyTill p end return (first:rest) +-- | Like @many1Till@, but packs its result +many1TillChar :: (Show end, Stream s m t) + => ParserT s st m Char + -> ParserT s st m end + -> ParserT s st m Text +many1TillChar p = fmap T.pack . many1Till p + -- | Like @manyTill@, but also returns the result of end parser. manyUntil :: ParserT s u m a -> ParserT s u m b @@ -325,6 +374,14 @@ manyUntil p end = scan (xs, e) <- scan return (x:xs, e)) +-- | Like @manyUntil@, but also packs its result. +manyUntilChar :: ParserT s u m Char + -> ParserT s u m b + -> ParserT s u m (Text, b) +manyUntilChar p = fmap go . manyUntil p + where + go (x, y) = (T.pack x, y) + -- | Like @sepBy1@ from Parsec, -- but does not fail if it @sep@ succeeds and @p@ fails. sepBy1' :: ParsecT s u m a @@ -342,14 +399,18 @@ notFollowedBy' p = try $ join $ do a <- try p return (return ()) -- (This version due to Andrew Pimlott on the Haskell mailing list.) -oneOfStrings' :: Stream s m Char => (Char -> Char -> Bool) -> [String] -> ParserT s st m String -oneOfStrings' _ [] = Prelude.fail "no strings" -oneOfStrings' matches strs = try $ do +oneOfStrings' :: Stream s m Char => (Char -> Char -> Bool) -> [Text] -> ParserT s st m Text +oneOfStrings' f = fmap T.pack . oneOfStrings'' f . fmap T.unpack + +-- TODO: This should be re-implemented in a Text-aware way +oneOfStrings'' :: Stream s m Char => (Char -> Char -> Bool) -> [String] -> ParserT s st m String +oneOfStrings'' _ [] = Prelude.fail "no strings" +oneOfStrings'' matches strs = try $ do c <- anyChar let strs' = [xs | (x:xs) <- strs, x `matches` c] case strs' of [] -> Prelude.fail "not found" - _ -> (c:) <$> oneOfStrings' matches strs' + _ -> (c:) <$> oneOfStrings'' matches strs' <|> if "" `elem` strs' then return [c] else Prelude.fail "not found" @@ -357,11 +418,14 @@ oneOfStrings' matches strs = try $ do -- | Parses one of a list of strings. If the list contains -- two strings one of which is a prefix of the other, the longer -- string will be matched if possible. -oneOfStrings :: Stream s m Char => [String] -> ParserT s st m String +oneOfStrings :: Stream s m Char => [Text] -> ParserT s st m Text oneOfStrings = oneOfStrings' (==) -- | Parses one of a list of strings (tried in order), case insensitive. -oneOfStringsCI :: Stream s m Char => [String] -> ParserT s st m String + +-- TODO: This will not be accurate with general Unicode (neither +-- Text.toLower nor Text.toCaseFold can be implemented with a map) +oneOfStringsCI :: Stream s m Char => [Text] -> ParserT s st m Text oneOfStringsCI = oneOfStrings' ciMatch where ciMatch x y = toLower' x == toLower' y -- this optimizes toLower by checking common ASCII case @@ -388,13 +452,13 @@ blankline :: Stream s m Char => ParserT s st m Char blankline = try $ skipSpaces >> newline -- | Parses one or more blank lines and returns a string of newlines. -blanklines :: Stream s m Char => ParserT s st m [Char] -blanklines = many1 blankline +blanklines :: Stream s m Char => ParserT s st m Text +blanklines = T.pack <$> many1 blankline -- | Gobble n spaces; if tabs are encountered, expand them -- and gobble some or all of their spaces, leaving the rest. gobbleSpaces :: (HasReaderOptions st, Monad m) - => Int -> ParserT [Char] st m () + => Int -> ParserT Text st m () gobbleSpaces 0 = return () gobbleSpaces n | n < 0 = error "gobbleSpaces called with negative number" @@ -402,18 +466,18 @@ gobbleSpaces n char ' ' <|> eatOneSpaceOfTab gobbleSpaces (n - 1) -eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT [Char] st m Char +eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT Text st m Char eatOneSpaceOfTab = do char '\t' tabstop <- getOption readerTabStop inp <- getInput - setInput $ replicate (tabstop - 1) ' ' ++ inp + setInput $ T.replicate (tabstop - 1) " " <> inp return ' ' -- | Gobble up to n spaces; if tabs are encountered, expand them -- and gobble some or all of their spaces, leaving the rest. gobbleAtMostSpaces :: (HasReaderOptions st, Monad m) - => Int -> ParserT [Char] st m Int + => Int -> ParserT Text st m Int gobbleAtMostSpaces 0 = return 0 gobbleAtMostSpaces n | n < 0 = error "gobbleAtMostSpaces called with negative number" @@ -430,23 +494,26 @@ enclosed start end parser = try $ start >> notFollowedBy space >> many1Till parser end -- | Parse string, case insensitive. -stringAnyCase :: Stream s m Char => [Char] -> ParserT s st m String -stringAnyCase [] = string "" -stringAnyCase (x:xs) = do +stringAnyCase :: Stream s m Char => Text -> ParserT s st m Text +stringAnyCase = fmap T.pack . stringAnyCase' . T.unpack + +stringAnyCase' :: Stream s m Char => String -> ParserT s st m String +stringAnyCase' [] = string "" +stringAnyCase' (x:xs) = do firstChar <- char (toUpper x) <|> char (toLower x) - rest <- stringAnyCase xs + rest <- stringAnyCase' xs return (firstChar:rest) -- | Parse contents of 'str' using 'parser' and return result. parseFromString :: (Stream s m Char, IsString s) => ParserT s st m r - -> String + -> Text -> ParserT s st m r parseFromString parser str = do oldPos <- getPosition setPosition $ initialPos "chunk" oldInput <- getInput - setInput $ fromString str + setInput $ fromString $ T.unpack str result <- parser spaces eof @@ -458,7 +525,7 @@ parseFromString parser str = do -- This resets 'stateLastStrPos', which is almost always what we want. parseFromString' :: (Stream s m Char, IsString s, HasLastStrPosition u) => ParserT s u m a - -> String + -> Text -> ParserT s u m a parseFromString' parser str = do oldLastStrPos <- getLastStrPos <$> getState @@ -468,9 +535,9 @@ parseFromString' parser str = do return res -- | Parse raw line block up to and including blank lines. -lineClump :: Monad m => ParserT [Char] st m String +lineClump :: Monad m => ParserT Text st m Text lineClump = blanklines - <|> (unlines <$> many1 (notFollowedBy blankline >> anyLine)) + <|> (T.unlines <$> many1 (notFollowedBy blankline >> anyLine)) -- | Parse a string of characters between an open character -- and a close character, including text between balanced @@ -478,15 +545,15 @@ lineClump = blanklines -- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))" -- and return "hello (there)". charsInBalanced :: Stream s m Char => Char -> Char -> ParserT s st m Char - -> ParserT s st m String + -> ParserT s st m Text charsInBalanced open close parser = try $ do char open let isDelim c = c == open || c == close - raw <- many $ many1 (notFollowedBy (satisfy isDelim) >> parser) + raw <- many $ T.pack <$> many1 (notFollowedBy (satisfy isDelim) >> parser) <|> (do res <- charsInBalanced open close parser - return $ [open] ++ res ++ [close]) + return $ T.singleton open <> res <> T.singleton close) char close - return $ concat raw + return $ T.concat raw -- old charsInBalanced would be: -- charsInBalanced open close (noneOf "\n" <|> char '\n' >> notFollowedBy blankline) @@ -532,10 +599,10 @@ romanNumeral upperCase = do -- | Parses an email address; returns original and corresponding -- escaped mailto: URI. -emailAddress :: Stream s m Char => ParserT s st m (String, String) +emailAddress :: Stream s m Char => ParserT s st m (Text, Text) emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain) - where toResult mbox dom = let full = fromEntities $ mbox ++ '@':dom - in (full, escapeURI $ "mailto:" ++ full) + where toResult mbox dom = let full = fromEntities $ T.pack $ mbox ++ '@':dom + in (full, escapeURI $ "mailto:" <> full) mailbox = intercalate "." <$> (emailWord `sepBy1'` dot) domain = intercalate "." <$> (subdomain `sepBy1'` dot) dot = char '.' @@ -553,14 +620,14 @@ emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain) xs <- many (satisfy isEmailChar) return (x:xs) isEmailChar c = isAlphaNum c || isEmailPunct c - isEmailPunct c = c `elem` "!\"#$%&'*+-/=?^_{|}~;" + isEmailPunct c = T.any (== c) "!\"#$%&'*+-/=?^_{|}~;" -uriScheme :: Stream s m Char => ParserT s st m String +uriScheme :: Stream s m Char => ParserT s st m Text uriScheme = oneOfStringsCI (Set.toList schemes) -- | Parses a URI. Returns pair of original and URI-escaped version. -uri :: Stream s m Char => ParserT s st m (String, String) +uri :: Stream s m Char => ParserT s st m (Text, Text) uri = try $ do scheme <- uriScheme char ':' @@ -571,12 +638,12 @@ uri = try $ do -- http://en.wikipedia.org/wiki/State_of_emergency_(disambiguation) -- as a URL, while NOT picking up the closing paren in -- (http://wikipedia.org). So we include balanced parens in the URL. - str <- concat <$> many1 (uriChunkBetween '(' ')' - <|> uriChunkBetween '{' '}' - <|> uriChunkBetween '[' ']' - <|> uriChunk) - str' <- option str $ char '/' >> return (str ++ "/") - let uri' = scheme ++ ":" ++ fromEntities str' + str <- T.concat <$> many1 (uriChunkBetween '(' ')' + <|> uriChunkBetween '{' '}' + <|> uriChunkBetween '[' ']' + <|> T.pack <$> uriChunk) + str' <- option str $ char '/' >> return (str <> "/") + let uri' = scheme <> ":" <> fromEntities str' return (uri', escapeURI uri') where wordChar = alphaNum <|> oneOf "#$%+/@\\_-&=" @@ -588,51 +655,54 @@ uri = try $ do <|> entity <|> try (punct <* lookAhead (void wordChar <|> void percentEscaped)) uriChunkBetween l r = try $ do chunk <- between (char l) (char r) uriChunk - return ([l] ++ chunk ++ [r]) + return (T.pack $ [l] ++ chunk ++ [r]) -mathInlineWith :: Stream s m Char => String -> String -> ParserT s st m String +mathInlineWith :: Stream s m Char => Text -> Text -> ParserT s st m Text mathInlineWith op cl = try $ do - string op + textStr op when (op == "$") $ notFollowedBy space - words' <- many1Till (count 1 (noneOf " \t\n\\") + words' <- many1Till (countChar 1 (noneOf " \t\n\\") <|> (char '\\' >> -- This next clause is needed because \text{..} can -- contain $, \(\), etc. (try (string "text" >> - (("\\text" ++) <$> inBalancedBraces 0 "")) - <|> (\c -> ['\\',c]) <$> anyChar)) + (("\\text" <>) <$> inBalancedBraces 0 "")) + <|> (\c -> T.pack ['\\',c]) <$> anyChar)) <|> do (blankline <* notFollowedBy' blankline) <|> (oneOf " \t" <* skipMany (oneOf " \t")) notFollowedBy (char '$') return " " - ) (try $ string cl) + ) (try $ textStr cl) notFollowedBy digit -- to prevent capture of $5 - return $ trimMath $ concat words' + return $ trimMath $ T.concat words' where - inBalancedBraces :: Stream s m Char => Int -> String -> ParserT s st m String - inBalancedBraces 0 "" = do + inBalancedBraces :: Stream s m Char => Int -> Text -> ParserT s st m Text + inBalancedBraces n = fmap T.pack . inBalancedBraces' n . T.unpack + + inBalancedBraces' :: Stream s m Char => Int -> String -> ParserT s st m String + inBalancedBraces' 0 "" = do c <- anyChar if c == '{' - then inBalancedBraces 1 "{" + then inBalancedBraces' 1 "{" else mzero - inBalancedBraces 0 s = return $ reverse s - inBalancedBraces numOpen ('\\':xs) = do + inBalancedBraces' 0 s = return $ reverse s + inBalancedBraces' numOpen ('\\':xs) = do c <- anyChar - inBalancedBraces numOpen (c:'\\':xs) - inBalancedBraces numOpen xs = do + inBalancedBraces' numOpen (c:'\\':xs) + inBalancedBraces' numOpen xs = do c <- anyChar case c of - '}' -> inBalancedBraces (numOpen - 1) (c:xs) - '{' -> inBalancedBraces (numOpen + 1) (c:xs) - _ -> inBalancedBraces numOpen (c:xs) + '}' -> inBalancedBraces' (numOpen - 1) (c:xs) + '{' -> inBalancedBraces' (numOpen + 1) (c:xs) + _ -> inBalancedBraces' numOpen (c:xs) -mathDisplayWith :: Stream s m Char => String -> String -> ParserT s st m String -mathDisplayWith op cl = try $ do - string op - many1Till (noneOf "\n" <|> (newline <* notFollowedBy' blankline)) (try $ string cl) +mathDisplayWith :: Stream s m Char => Text -> Text -> ParserT s st m Text +mathDisplayWith op cl = try $ fmap T.pack $ do + textStr op + many1Till (noneOf "\n" <|> (newline <* notFollowedBy' blankline)) (try $ textStr cl) mathDisplay :: (HasReaderOptions st, Stream s m Char) - => ParserT s st m String + => ParserT s st m Text mathDisplay = (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$") <|> (guardEnabled Ext_tex_math_single_backslash >> @@ -641,7 +711,7 @@ mathDisplay = mathDisplayWith "\\\\[" "\\\\]") mathInline :: (HasReaderOptions st , Stream s m Char) - => ParserT s st m String + => ParserT s st m Text mathInline = (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$") <|> (guardEnabled Ext_tex_math_single_backslash >> @@ -665,8 +735,8 @@ withHorizDisplacement parser = do -- | Applies a parser and returns the raw string that was parsed, -- along with the value produced by the parser. withRaw :: Monad m - => ParsecT [Char] st m a - -> ParsecT [Char] st m (a, [Char]) + => ParsecT Text st m a + -> ParsecT Text st m (a, Text) withRaw parser = do pos1 <- getPosition inp <- getInput @@ -674,11 +744,11 @@ withRaw parser = do pos2 <- getPosition let (l1,c1) = (sourceLine pos1, sourceColumn pos1) let (l2,c2) = (sourceLine pos2, sourceColumn pos2) - let inplines = take ((l2 - l1) + 1) $ lines inp + let inplines = take ((l2 - l1) + 1) $ T.lines inp let raw = case inplines of [] -> "" - [l] -> take (c2 - c1) l - ls -> unlines (init ls) ++ take (c2 - 1) (last ls) + [l] -> T.take (c2 - c1) l + ls -> T.unlines (init ls) <> T.take (c2 - 1) (last ls) return (result, raw) -- | Parses backslash, then applies character parser. @@ -716,7 +786,7 @@ lowerRoman = do decimal :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) decimal = do num <- many1 digit - return (Decimal, fromMaybe 1 $ safeRead num) + return (Decimal, fromMaybe 1 $ safeRead $ T.pack num) -- | Parses a '@' and optional label and -- returns (DefaultStyle, [next example number]). The next @@ -726,10 +796,10 @@ exampleNum :: Stream s m Char => ParserT s ParserState m (ListNumberStyle, Int) exampleNum = do char '@' - lab <- many (alphaNum <|> satisfy (\c -> c == '_' || c == '-')) + lab <- T.pack <$> many (alphaNum <|> satisfy (\c -> c == '_' || c == '-')) st <- getState let num = stateNextExample st - let newlabels = if null lab + let newlabels = if T.null lab then stateExamples st else M.insert lab num $ stateExamples st updateState $ \s -> s{ stateNextExample = num + 1 @@ -825,25 +895,25 @@ orderedListMarker style delim = do charRef :: Stream s m Char => ParserT s st m Inline charRef = do c <- characterReference - return $ Str [c] + return $ Str $ T.singleton c -lineBlockLine :: Monad m => ParserT [Char] st m String +lineBlockLine :: Monad m => ParserT Text st m Text lineBlockLine = try $ do char '|' char ' ' - white <- many (spaceChar >> return '\160') + white <- T.pack <$> many (spaceChar >> return '\160') notFollowedBy newline line <- anyLine continuations <- many (try $ char ' ' >> anyLine) - return $ white ++ unwords (line : continuations) + return $ white <> T.unwords (line : continuations) blankLineBlockLine :: Stream s m Char => ParserT s st m Char blankLineBlockLine = try (char '|' >> blankline) -- | Parses an RST-style line block and returns a list of strings. -lineBlockLines :: Monad m => ParserT [Char] st m [String] +lineBlockLines :: Monad m => ParserT Text st m [Text] lineBlockLines = try $ do - lines' <- many1 (lineBlockLine <|> ((:[]) <$> blankLineBlockLine)) + lines' <- many1 (lineBlockLine <|> (T.singleton <$> blankLineBlockLine)) skipMany blankline return lines' @@ -927,9 +997,9 @@ gridTableWith' blocks headless = tableWith' (gridTableHeader headless blocks) (gridTableRow blocks) (gridTableSep '-') gridTableFooter -gridTableSplitLine :: [Int] -> String -> [String] +gridTableSplitLine :: [Int] -> Text -> [Text] gridTableSplitLine indices line = map removeFinalBar $ tail $ - splitStringByIndices (init indices) $ trimr line + splitTextByIndices (init indices) $ trimr line gridPart :: Stream s m Char => Char -> ParserT s st m ((Int, Int), Alignment) gridPart ch = do @@ -949,9 +1019,10 @@ gridPart ch = do gridDashedLines :: Stream s m Char => Char -> ParserT s st m [((Int, Int), Alignment)] gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline -removeFinalBar :: String -> String -removeFinalBar = - reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse +removeFinalBar :: Text -> Text +removeFinalBar = T.dropWhileEnd go . T.dropWhileEnd (=='|') + where + go c = T.any (== c) " \t" -- | Separator between rows of grid table. gridTableSep :: Stream s m Char => Char -> ParserT s st m Char @@ -969,7 +1040,7 @@ gridTableHeader headless blocks = try $ do then return $ repeat "" else many1 (notFollowedBy (gridTableSep '=') >> char '|' >> - many1Till anyChar newline) + T.pack <$> many1Till anyChar newline) underDashes <- if headless then return dashes else gridDashedLines '=' @@ -979,16 +1050,16 @@ gridTableHeader headless blocks = try $ do let aligns = map snd underDashes let rawHeads = if headless then replicate (length underDashes) "" - else map (unlines . map trim) $ transpose + else map (T.unlines . map trim) $ transpose $ map (gridTableSplitLine indices) rawContent heads <- sequence <$> mapM (parseFromString' blocks . trim) rawHeads return (heads, aligns, indices) -gridTableRawLine :: Stream s m Char => [Int] -> ParserT s st m [String] +gridTableRawLine :: Stream s m Char => [Int] -> ParserT s st m [Text] gridTableRawLine indices = do char '|' line <- many1Till anyChar newline - return (gridTableSplitLine indices line) + return (gridTableSplitLine indices $ T.pack line) -- | Parse row of grid table. gridTableRow :: (Stream s m Char, Monad mf, IsString s, HasLastStrPosition st) @@ -997,7 +1068,7 @@ gridTableRow :: (Stream s m Char, Monad mf, IsString s, HasLastStrPosition st) -> ParserT s st m (mf [Blocks]) gridTableRow blocks indices = do colLines <- many1 (gridTableRawLine indices) - let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ + let cols = map ((<> "\n") . T.unlines . removeOneLeadingSpace) $ transpose colLines compactifyCell bs = case compactify [bs] of [] -> mempty @@ -1005,40 +1076,41 @@ gridTableRow blocks indices = do cells <- sequence <$> mapM (parseFromString' blocks) cols return $ fmap (map compactifyCell) cells -removeOneLeadingSpace :: [String] -> [String] +removeOneLeadingSpace :: [Text] -> [Text] removeOneLeadingSpace xs = if all startsWithSpace xs - then map (drop 1) xs + then map (T.drop 1) xs else xs - where startsWithSpace "" = True - startsWithSpace (y:_) = y == ' ' + where startsWithSpace t = case T.uncons t of + Nothing -> True + Just (c, _) -> c == ' ' -- | Parse footer for a grid table. -gridTableFooter :: Stream s m Char => ParserT s st m [Char] +gridTableFooter :: Stream s m Char => ParserT s st m Text gridTableFooter = blanklines --- -- | Removes the ParsecT layer from the monad transformer stack -readWithM :: (Stream s m Char, ToString s) +readWithM :: (Stream s m Char, ToText s) => ParserT s st m a -- ^ parser -> st -- ^ initial state -> s -- ^ input -> m (Either PandocError a) readWithM parser state input = - mapLeft (PandocParsecError $ toString input) `liftM` runParserT parser state "source" input + mapLeft (PandocParsecError $ toText input) `liftM` runParserT parser state "source" input -- | Parse a string with a given parser and state -readWith :: Parser [Char] st a +readWith :: Parser Text st a -> st - -> String + -> Text -> Either PandocError a readWith p t inp = runIdentity $ readWithM p t inp -- | Parse a string with @parser@ (for testing). testStringWith :: Show a - => ParserT [Char] ParserState Identity a - -> [Char] + => ParserT Text ParserState Identity a + -> Text -> IO () testStringWith parser str = UTF8.putStrLn $ show $ readWith parser defaultParserState str @@ -1057,23 +1129,23 @@ data ParserState = ParserState stateSubstitutions :: SubstTable, -- ^ List of substitution references stateNotes :: NoteTable, -- ^ List of notes (raw bodies) stateNotes' :: NoteTable', -- ^ List of notes (parsed bodies) - stateNoteRefs :: Set.Set String, -- ^ List of note references used + stateNoteRefs :: Set.Set Text, -- ^ List of note references used stateMeta :: Meta, -- ^ Document metadata stateMeta' :: F Meta, -- ^ Document metadata - stateCitations :: M.Map String String, -- ^ RST-style citations + stateCitations :: M.Map Text Text, -- ^ RST-style citations stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used - stateIdentifiers :: Set.Set String, -- ^ Header identifiers used + stateIdentifiers :: Set.Set Text, -- ^ Header identifiers used stateNextExample :: Int, -- ^ Number of next example - stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers + stateExamples :: M.Map Text Int, -- ^ Map from example labels to numbers stateMacros :: M.Map Text Macro, -- ^ Table of macros defined so far - stateRstDefaultRole :: String, -- ^ Current rST default interpreted text role - stateRstCustomRoles :: M.Map String (String, Maybe String, Attr), -- ^ Current rST custom text roles + stateRstDefaultRole :: Text, -- ^ Current rST default interpreted text role + stateRstCustomRoles :: M.Map Text (Text, Maybe Text, Attr), -- ^ Current rST custom text roles -- Triple represents: 1) Base role, 2) Optional format (only for :raw: -- roles), 3) Additional classes (rest of Attr is unused)). stateCaption :: Maybe Inlines, -- ^ Caption in current environment - stateInHtmlBlock :: Maybe String, -- ^ Tag type of HTML block being parsed + stateInHtmlBlock :: Maybe Text, -- ^ Tag type of HTML block being parsed stateFencedDivLevel :: Int, -- ^ Depth of fenced div - stateContainers :: [String], -- ^ parent include files + stateContainers :: [Text], -- ^ parent include files stateLogMessages :: [LogMessage], -- ^ log messages stateMarkdownAttribute :: Bool -- ^ True if in markdown=1 context } @@ -1112,8 +1184,8 @@ instance Monad m => HasQuoteContext ParserState m where return result class HasIdentifierList st where - extractIdentifierList :: st -> Set.Set String - updateIdentifierList :: (Set.Set String -> Set.Set String) -> st -> st + extractIdentifierList :: st -> Set.Set Text + updateIdentifierList :: (Set.Set Text -> Set.Set Text) -> st -> st instance HasIdentifierList ParserState where extractIdentifierList = stateIdentifiers @@ -1144,8 +1216,8 @@ instance HasLogMessages ParserState where getLogMessages st = reverse $ stateLogMessages st class HasIncludeFiles st where - getIncludeFiles :: st -> [String] - addIncludeFile :: String -> st -> st + getIncludeFiles :: st -> [Text] + addIncludeFile :: Text -> st -> st dropLatestIncludeFile :: st -> st instance HasIncludeFiles ParserState where @@ -1232,17 +1304,21 @@ data QuoteContext | NoQuote -- ^ Used when not parsing inside quotes deriving (Eq, Show) -type NoteTable = [(String, String)] +type NoteTable = [(Text, Text)] -type NoteTable' = M.Map String (SourcePos, F Blocks) +type NoteTable' = M.Map Text (SourcePos, F Blocks) -- used in markdown reader -newtype Key = Key String deriving (Show, Read, Eq, Ord) +newtype Key = Key Text deriving (Show, Read, Eq, Ord) -toKey :: String -> Key -toKey = Key . map toLower . unwords . words . unbracket - where unbracket ('[':xs) | "]" `isSuffixOf` xs = take (length xs - 1) xs - unbracket xs = xs +toKey :: Text -> Key +toKey = Key . T.toLower . T.unwords . T.words . unbracket + where unbracket t + | Just ('[', t') <- T.uncons t + , Just (t'', ']') <- T.unsnoc t' + = t'' + | otherwise + = t type KeyTable = M.Map Key (Target, Attr) @@ -1261,17 +1337,17 @@ registerHeader :: (Stream s m a, HasReaderOptions st, registerHeader (ident,classes,kvs) header' = do ids <- extractIdentifierList <$> getState exts <- getOption readerExtensions - if null ident && Ext_auto_identifiers `extensionEnabled` exts + if T.null ident && Ext_auto_identifiers `extensionEnabled` exts then do let id' = uniqueIdent exts (B.toList header') ids let id'' = if Ext_ascii_identifiers `extensionEnabled` exts - then mapMaybe toAsciiChar id' + then T.pack $ mapMaybe toAsciiChar $ T.unpack id' else id' updateState $ updateIdentifierList $ Set.insert id' updateState $ updateIdentifierList $ Set.insert id'' return (id'',classes,kvs) else do - unless (null ident) $ do + unless (T.null ident) $ do when (ident `Set.member` ids) $ do pos <- getPosition logMessage $ DuplicateIdentifier ident pos @@ -1314,7 +1390,7 @@ failIfInQuoteContext context = do context' <- getQuoteContext when (context' == context) $ Prelude.fail "already inside quotes" -charOrRef :: Stream s m Char => String -> ParserT s st m Char +charOrRef :: Stream s m Char => [Char] -> ParserT s st m Char charOrRef cs = oneOf cs <|> try (do c <- characterReference guard (c `elem` cs) @@ -1379,7 +1455,7 @@ nested p = do return res citeKey :: (Stream s m Char, HasLastStrPosition st) - => ParserT s st m (Bool, String) + => ParserT s st m (Bool, Text) citeKey = try $ do guard =<< notAfterString suppress_author <- option False (True <$ char '-') @@ -1390,15 +1466,15 @@ citeKey = try $ do rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") <|> try (oneOf ":/" <* lookAhead (char '/')) let key = firstChar:rest - return (suppress_author, key) + return (suppress_author, T.pack key) token :: (Stream s m t) - => (t -> String) + => (t -> Text) -> (t -> SourcePos) -> (t -> Maybe a) -> ParsecT s st m a -token pp pos match = tokenPrim pp (\_ t _ -> pos t) match +token pp pos match = tokenPrim (T.unpack . pp) (\_ t _ -> pos t) match infixr 5 <+?> (<+?>) :: (Monoid a) => ParserT s st m a -> ParserT s st m a -> ParserT s st m a @@ -1409,27 +1485,27 @@ extractIdClass (ident, cls, kvs) = (ident', cls', kvs') where ident' = fromMaybe ident (lookup "id" kvs) cls' = case lookup "class" kvs of - Just cl -> words cl + Just cl -> T.words cl Nothing -> cls kvs' = filter (\(k,_) -> k /= "id" || k /= "class") kvs insertIncludedFile' :: (PandocMonad m, HasIncludeFiles st) - => ParserT [a] st m (mf Blocks) - -> (String -> [a]) + => ParserT a st m (mf Blocks) + -> (Text -> a) -> [FilePath] -> FilePath - -> ParserT [a] st m (mf Blocks) + -> ParserT a st m (mf Blocks) insertIncludedFile' blocks totoks dirs f = do oldPos <- getPosition oldInput <- getInput containers <- getIncludeFiles <$> getState - when (f `elem` containers) $ - throwError $ PandocParseError $ "Include file loop at " ++ show oldPos - updateState $ addIncludeFile f + when (T.pack f `elem` containers) $ + throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show oldPos + updateState $ addIncludeFile $ T.pack f mbcontents <- readFileFromDirs dirs f contents <- case mbcontents of Just s -> return s Nothing -> do - report $ CouldNotLoadIncludeFile f oldPos + report $ CouldNotLoadIncludeFile (T.pack f) oldPos return "" setPosition $ newPos f 1 1 setInput $ totoks contents @@ -1443,7 +1519,7 @@ insertIncludedFile' blocks totoks dirs f = do -- @PandocParseError@. insertIncludedFile :: (PandocMonad m, HasIncludeFiles st) => ParserT [a] st m Blocks - -> (String -> [a]) + -> (Text -> [a]) -> [FilePath] -> FilePath -> ParserT [a] st m Blocks insertIncludedFile blocks totoks dirs f = @@ -1452,7 +1528,7 @@ insertIncludedFile blocks totoks dirs f = -- | Parse content of include file as future blocks. Circular includes result in -- an @PandocParseError@. insertIncludedFileF :: (PandocMonad m, HasIncludeFiles st) - => ParserT String st m (Future st Blocks) + => ParserT Text st m (Future st Blocks) -> [FilePath] -> FilePath - -> ParserT String st m (Future st Blocks) + -> ParserT Text st m (Future st Blocks) insertIncludedFileF p = insertIncludedFile' p id diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs index 3ad479287..461f7f4d9 100644 --- a/src/Text/Pandoc/Readers.hs +++ b/src/Text/Pandoc/Readers.hs @@ -1,6 +1,7 @@ {-# LANGUAGE MonoLocalBinds #-} {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -59,8 +60,8 @@ import Control.Monad (unless) import Control.Monad.Except (throwError) import Data.Aeson import qualified Data.ByteString.Lazy as BL -import Data.List (intercalate) import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Class import Text.Pandoc.Definition import Text.Pandoc.Error @@ -99,7 +100,7 @@ data Reader m = TextReader (ReaderOptions -> Text -> m Pandoc) | ByteStringReader (ReaderOptions -> BL.ByteString -> m Pandoc) -- | Association list of formats and readers. -readers :: PandocMonad m => [(String, Reader m)] +readers :: PandocMonad m => [(Text, Reader m)] readers = [ ("native" , TextReader readNative) ,("json" , TextReader readJSON) ,("markdown" , TextReader readMarkdown) @@ -135,11 +136,11 @@ readers = [ ("native" , TextReader readNative) ] -- | Retrieve reader, extensions based on formatSpec (format+extensions). -getReader :: PandocMonad m => String -> m (Reader m, Extensions) +getReader :: PandocMonad m => Text -> m (Reader m, Extensions) getReader s = case parseFormatSpec s of Left e -> throwError $ PandocAppError - $ intercalate "\n" [m | Message m <- errorMessages e] + $ T.intercalate "\n" [T.pack m | Message m <- errorMessages e] Right (readerName, extsToEnable, extsToDisable) -> case lookup readerName readers of Nothing -> throwError $ PandocUnknownReaderError @@ -154,7 +155,7 @@ getReader s = unless (extensionEnabled ext allExts) $ throwError $ PandocUnsupportedExtensionError - (drop 4 $ show ext) readerName) + (T.drop 4 $ T.pack $ show ext) readerName) (extsToEnable ++ extsToDisable) return (r, exts) diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index 368c86d4f..40b6f77c9 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -1,4 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.CommonMark Copyright : Copyright (C) 2015-2019 John MacFarlane @@ -18,9 +20,9 @@ where import Prelude import CMarkGFM import Control.Monad.State -import Data.List (groupBy) import qualified Data.Set as Set -import Data.Text (Text, unpack) +import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Emoji (emojiToInline) @@ -40,24 +42,24 @@ readCommonMark opts s = return $ [ extTable | isEnabled Ext_pipe_tables opts ] ++ [ extAutolink | isEnabled Ext_autolink_bare_uris opts ] -convertEmojis :: String -> [Inline] -convertEmojis s@(':':xs) = - case break (==':') xs of - (ys,':':zs) -> +convertEmojis :: Text -> [Inline] +convertEmojis s@(T.uncons -> Just (':',xs)) = + case T.break (==':') xs of + (ys, T.uncons -> Just (':',zs)) -> case emojiToInline ys of Just em -> em : convertEmojis zs - Nothing -> Str (':' : ys) : convertEmojis (':':zs) + Nothing -> Str (":" <> ys) : convertEmojis (":" <> zs) _ -> [Str s] convertEmojis s = - case break (==':') s of + case T.break (==':') s of ("","") -> [] (_,"") -> [Str s] - (xs,ys) -> Str xs:convertEmojis ys + (xs,ys) -> Str xs : convertEmojis ys addHeaderIdentifiers :: ReaderOptions -> Pandoc -> Pandoc addHeaderIdentifiers opts doc = evalState (walkM (addHeaderId opts) doc) mempty -addHeaderId :: ReaderOptions -> Block -> State (Set.Set String) Block +addHeaderId :: ReaderOptions -> Block -> State (Set.Set Text) Block addHeaderId opts (Header lev (_,classes,kvs) ils) = do ids <- get let ident = uniqueIdent (readerExtensions opts) ils ids @@ -82,14 +84,14 @@ addBlock _ (Node _ THEMATIC_BREAK _) = addBlock opts (Node _ BLOCK_QUOTE nodes) = (BlockQuote (addBlocks opts nodes) :) addBlock opts (Node _ (HTML_BLOCK t) _) - | isEnabled Ext_raw_html opts = (RawBlock (Format "html") (unpack t) :) + | isEnabled Ext_raw_html opts = (RawBlock (Format "html") t :) | otherwise = id -- Note: the cmark parser will never generate CUSTOM_BLOCK, -- so we don't need to handle it: addBlock _ (Node _ (CUSTOM_BLOCK _onEnter _onExit) _nodes) = id addBlock _ (Node _ (CODE_BLOCK info t) _) = - (CodeBlock ("", take 1 (words (unpack info)), []) (unpack t) :) + (CodeBlock ("", take 1 (T.words info), []) t :) addBlock opts (Node _ (HEADING lev) nodes) = (Header lev ("",[],[]) (addInlines opts nodes) :) addBlock opts (Node _ (LIST listAttrs) nodes) = @@ -176,29 +178,28 @@ addInlines opts = foldr (addInline opts) [] addInline :: ReaderOptions -> Node -> [Inline] -> [Inline] addInline opts (Node _ (TEXT t) _) = (foldr ((++) . toinl) [] clumps ++) - where raw = unpack t - clumps = groupBy samekind raw + where clumps = T.groupBy samekind t samekind ' ' ' ' = True samekind ' ' _ = False samekind _ ' ' = False samekind _ _ = True - toinl (' ':_) = [Space] - toinl xs = if isEnabled Ext_emoji opts - then convertEmojis xs - else [Str xs] + toinl (T.uncons -> Just (' ', _)) = [Space] + toinl xs = if isEnabled Ext_emoji opts + then convertEmojis xs + else [Str xs] addInline _ (Node _ LINEBREAK _) = (LineBreak :) addInline opts (Node _ SOFTBREAK _) | isEnabled Ext_hard_line_breaks opts = (LineBreak :) | otherwise = (SoftBreak :) addInline opts (Node _ (HTML_INLINE t) _) - | isEnabled Ext_raw_html opts = (RawInline (Format "html") (unpack t) :) + | isEnabled Ext_raw_html opts = (RawInline (Format "html") t :) | otherwise = id -- Note: the cmark parser will never generate CUSTOM_BLOCK, -- so we don't need to handle it: addInline _ (Node _ (CUSTOM_INLINE _onEnter _onExit) _nodes) = id addInline _ (Node _ (CODE t) _) = - (Code ("",[],[]) (unpack t) :) + (Code ("",[],[]) t :) addInline opts (Node _ EMPH nodes) = (Emph (addInlines opts nodes) :) addInline opts (Node _ STRONG nodes) = @@ -206,7 +207,7 @@ addInline opts (Node _ STRONG nodes) = addInline opts (Node _ STRIKETHROUGH nodes) = (Strikeout (addInlines opts nodes) :) addInline opts (Node _ (LINK url title) nodes) = - (Link nullAttr (addInlines opts nodes) (unpack url, unpack title) :) + (Link nullAttr (addInlines opts nodes) (url, title) :) addInline opts (Node _ (IMAGE url title) nodes) = - (Image nullAttr (addInlines opts nodes) (unpack url, unpack title) :) + (Image nullAttr (addInlines opts nodes) (url, title) :) addInline _ _ = id diff --git a/src/Text/Pandoc/Readers/Creole.hs b/src/Text/Pandoc/Readers/Creole.hs index ceb63ac84..1aa1dfaa4 100644 --- a/src/Text/Pandoc/Readers/Creole.hs +++ b/src/Text/Pandoc/Readers/Creole.hs @@ -19,6 +19,7 @@ import Control.Monad.Except (guard, liftM2, throwError) import qualified Data.Foldable as F import Data.Maybe (fromMaybe) import Data.Text (Text) +import qualified Data.Text as T import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad (..)) import Text.Pandoc.Definition @@ -70,7 +71,6 @@ parseCreole = do eof return $ B.doc bs - -- -- block parsers -- @@ -92,9 +92,9 @@ nowiki = try $ fmap (B.codeBlock . mconcat) (nowikiStart >> manyTill content nowikiEnd) where content = brackets <|> line - brackets = try $ option "" ((:[]) <$> newline) - <+> (char ' ' >> (many (char ' ') <+> string "}}}") <* eol) - line = option "" ((:[]) <$> newline) <+> manyTill anyChar eol + brackets = try $ option "" (T.singleton <$> newline) + <+> (char ' ' >> (manyChar (char ' ') <+> textStr "}}}") <* eol) + line = option "" (T.singleton <$> newline) <+> manyTillChar anyChar eol eol = lookAhead $ try $ nowikiEnd <|> newline nowikiStart = optional newline >> string "{{{" >> skipMany spaceChar >> newline nowikiEnd = try $ linebreak >> string "}}}" >> skipMany spaceChar >> newline @@ -106,7 +106,7 @@ header = try $ do fmap length (many1 (char '=')) guard $ level <= 6 skipSpaces - content <- B.str <$> manyTill (noneOf "\n") headerEnd + content <- B.str <$> manyTillChar (noneOf "\n") headerEnd return $ B.header level content where headerEnd = try $ skipSpaces >> many (char '=') >> skipSpaces >> newline @@ -204,7 +204,7 @@ inline = choice [ whitespace escapedChar :: PandocMonad m => CRLParser m B.Inlines escapedChar = - fmap (B.str . (:[])) (try $ char '~' >> noneOf "\t\n ") + fmap (B.str . T.singleton) (try $ char '~' >> noneOf "\t\n ") escapedLink :: PandocMonad m => CRLParser m B.Inlines escapedLink = try $ do @@ -217,8 +217,8 @@ image = try $ do (orig, src) <- wikiImg return $ B.image src "" (B.str orig) where - linkSrc = many $ noneOf "|}\n\r\t" - linkDsc = char '|' >> many (noneOf "}\n\r\t") + linkSrc = manyChar $ noneOf "|}\n\r\t" + linkDsc = char '|' >> manyChar (noneOf "}\n\r\t") wikiImg = try $ do string "{{" src <- linkSrc @@ -231,11 +231,11 @@ link = try $ do (orig, src) <- uriLink <|> wikiLink return $ B.link src "" orig where - linkSrc = many $ noneOf "|]\n\r\t" - linkDsc :: PandocMonad m => String -> CRLParser m B.Inlines + linkSrc = manyChar $ noneOf "|]\n\r\t" + linkDsc :: PandocMonad m => Text -> CRLParser m B.Inlines linkDsc otxt = B.str <$> try (option otxt - (char '|' >> many (noneOf "]\n\r\t"))) + (char '|' >> manyChar (noneOf "]\n\r\t"))) linkImg = try $ char '|' >> image wikiLink = try $ do string "[[" @@ -248,7 +248,7 @@ link = try $ do return (B.str orig, src) inlineNowiki :: PandocMonad m => CRLParser m B.Inlines -inlineNowiki = B.code <$> (start >> manyTill (noneOf "\n\r") end) +inlineNowiki = B.code <$> (start >> manyTillChar (noneOf "\n\r") end) where start = try $ string "{{{" end = try $ string "}}}" >> lookAhead (noneOf "}") @@ -271,11 +271,11 @@ linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline) innerNewline = return B.space symbol :: PandocMonad m => CRLParser m B.Inlines -symbol = fmap (B.str . (:[])) (oneOf specialChars) +symbol = fmap (B.str . T.singleton) (oneOf specialChars) str :: PandocMonad m => CRLParser m B.Inlines str = let strChar = noneOf ("\t\n " ++ specialChars) in - fmap B.str (many1 strChar) + fmap B.str (many1Char strChar) bold :: PandocMonad m => CRLParser m B.Inlines bold = B.strong . mconcat <$> diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 3f44f83f8..ade9d27a3 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.DocBook Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -536,20 +537,22 @@ instance Default DBState where readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readDocBook _ inp = do - let tree = normalizeTree . parseXML . handleInstructions - $ T.unpack $ crFilter inp + let tree = normalizeTree . parseXML . handleInstructions $ crFilter inp (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree return $ Pandoc (dbMeta st') (toList . mconcat $ bs) -- We treat <?asciidoc-br?> specially (issue #1236), converting it -- to <br/>, since xml-light doesn't parse the instruction correctly. -- Other xml instructions are simply removed from the input stream. -handleInstructions :: String -> String -handleInstructions ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions xs -handleInstructions xs = case break (=='<') xs of +handleInstructions :: Text -> Text +handleInstructions = T.pack . handleInstructions' . T.unpack + +handleInstructions' :: String -> String +handleInstructions' ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions' xs +handleInstructions' xs = case break (=='<') xs of (ys, []) -> ys - ([], '<':zs) -> '<' : handleInstructions zs - (ys, zs) -> ys ++ handleInstructions zs + ([], '<':zs) -> '<' : handleInstructions' zs + (ys, zs) -> ys ++ handleInstructions' zs getFigure :: PandocMonad m => Element -> DB m Blocks getFigure e = do @@ -580,13 +583,13 @@ convertEntity :: String -> String convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e) -- convenience function to get an attribute value, defaulting to "" -attrValue :: String -> Element -> String +attrValue :: String -> Element -> Text attrValue attr elt = - fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) + maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) -- convenience function -named :: String -> Element -> Bool -named s e = qName (elName e) == s +named :: Text -> Element -> Bool +named s e = qName (elName e) == T.unpack s -- @@ -611,7 +614,7 @@ addMetadataFromElement e = do [z] -> getInlines z >>= addMeta fieldname zs -> mapM getInlines zs >>= addMeta fieldname -addMeta :: PandocMonad m => ToMetaValue a => String -> a -> DB m () +addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> DB m () addMeta field val = modify (setMeta field val) instance HasMeta DBState where @@ -638,10 +641,8 @@ admonitionTags :: [String] admonitionTags = ["important","caution","note","tip","warning"] -- Trim leading and trailing newline characters -trimNl :: String -> String -trimNl = reverse . go . reverse . go - where go ('\n':xs) = xs - go xs = xs +trimNl :: Text -> Text +trimNl = T.dropAround (== '\n') -- meld text into beginning of first paragraph of Blocks. -- assumes Blocks start with a Para; if not, does nothing. @@ -668,7 +669,7 @@ getMediaobject e = do h = case atVal "depth" of "" -> [] d -> [("height", d)] - atr = (atVal "id", words $ atVal "role", w ++ h) + atr = (atVal "id", T.words $ atVal "role", w ++ h) in return (atVal "fileref", atr) let getCaption el = case filterChild (\x -> named "caption" x || named "textobject" x @@ -691,8 +692,8 @@ parseBlock :: PandocMonad m => Content -> DB m Blocks parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE parseBlock (Text (CData _ s _)) = if all isSpace s then return mempty - else return $ plain $ trimInlines $ text s -parseBlock (CRef x) = return $ plain $ str $ map toUpper x + else return $ plain $ trimInlines $ text $ T.pack s +parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x parseBlock (Elem e) = case qName (elName e) of "toc" -> skip -- skip TOC, since in pandoc it's autogenerated @@ -740,7 +741,7 @@ parseBlock (Elem e) = "refsect2" -> sect 2 "refsect3" -> sect 3 "refsection" -> gets dbSectionLevel >>= sect . (+1) - l@_ | l `elem` admonitionTags -> parseAdmonition l + l@_ | l `elem` admonitionTags -> parseAdmonition $ T.pack l "area" -> skip "areaset" -> skip "areaspec" -> skip @@ -800,7 +801,7 @@ parseBlock (Elem e) = "subtitle" -> return mempty -- handled in parent element _ -> skip >> getBlocks e where skip = do - lift $ report $ IgnoredElement $ qName (elName e) + lift $ report $ IgnoredElement $ T.pack $ qName (elName e) return mempty parseMixed container conts = do @@ -818,7 +819,7 @@ parseBlock (Elem e) = "" -> [] x -> [x] return $ codeBlockWith (attrValue "id" e, classes', []) - $ trimNl $ strContentRecursive e + $ trimNl $ T.pack $ strContentRecursive e parseBlockquote = do attrib <- case filterChild (named "attribution") e of Nothing -> return mempty @@ -871,9 +872,9 @@ parseBlock (Elem e) = _ -> AlignDefault let toWidth c = case findAttr (unqual "colwidth") c of Just w -> fromMaybe 0 - $ safeRead $ '0': filter (\x -> + $ safeRead $ "0" <> T.filter (\x -> (x >= '0' && x <= '9') - || x == '.') w + || x == '.') (T.pack w) Nothing -> 0 :: Double let numrows = case bodyrows of [] -> 0 @@ -938,9 +939,9 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing elementToStr x = x parseInline :: PandocMonad m => Content -> DB m Inlines -parseInline (Text (CData _ s _)) = return $ text s +parseInline (Text (CData _ s _)) = return $ text $ T.pack s parseInline (CRef ref) = - return $ maybe (text $ map toUpper ref) text $ lookupEntity ref + return $ maybe (text $ T.toUpper $ T.pack ref) (text . T.pack) $ lookupEntity ref parseInline (Elem e) = case qName (elName e) of "equation" -> equation e displayMath @@ -980,7 +981,7 @@ parseInline (Elem e) = "constant" -> codeWithLang "userinput" -> codeWithLang "varargs" -> return $ code "(...)" - "keycap" -> return (str $ strContent e) + "keycap" -> return (str $ T.pack $ strContent e) "keycombo" -> keycombo <$> mapM parseInline (elContent e) "menuchoice" -> menuchoice <$> @@ -992,20 +993,20 @@ parseInline (Elem e) = let title = case attrValue "endterm" e of "" -> maybe "???" xrefTitleByElem (findElementById linkend content) - endterm -> maybe "???" strContent + endterm -> maybe "???" (T.pack . strContent) (findElementById endterm content) - return $ link ('#' : linkend) "" (text title) - "email" -> return $ link ("mailto:" ++ strContent e) "" - $ str $ strContent e - "uri" -> return $ link (strContent e) "" $ str $ strContent e + return $ link ("#" <> linkend) "" (text title) + "email" -> return $ link ("mailto:" <> T.pack (strContent e)) "" + $ str $ T.pack $ strContent e + "uri" -> return $ link (T.pack $ strContent e) "" $ str $ T.pack $ strContent e "ulink" -> link (attrValue "url" e) "" <$> innerInlines "link" -> do ils <- innerInlines let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of - Just h -> h - _ -> '#' : attrValue "linkend" e + Just h -> T.pack h + _ -> "#" <> attrValue "linkend" e let ils' = if ils == mempty then str href else ils - let attr = (attrValue "id" e, words $ attrValue "role" e, []) + let attr = (attrValue "id" e, T.words $ attrValue "role" e, []) return $ linkWith attr href "" ils' "foreignphrase" -> emph <$> innerInlines "emphasis" -> case attrValue "role" e of @@ -1023,7 +1024,7 @@ parseInline (Elem e) = "br" -> return linebreak _ -> skip >> innerInlines where skip = do - lift $ report $ IgnoredElement $ qName (elName e) + lift $ report $ IgnoredElement $ T.pack $ qName (elName e) return mempty innerInlines = (trimInlines . mconcat) <$> @@ -1032,7 +1033,7 @@ parseInline (Elem e) = let classes' = case attrValue "language" e of "" -> [] l -> [l] - return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e + return $ codeWith (attrValue "id" e,classes',[]) $ T.pack $ strContentRecursive e simpleList = (mconcat . intersperse (str "," <> space)) <$> mapM getInlines (filterChildren (named "member") e) segmentedList = do @@ -1062,8 +1063,8 @@ parseInline (Elem e) = -- if there's no such attribute, employ some heuristics based on what -- docbook-xsl does. xrefTitleByElem el - | not (null xrefLabel) = xrefLabel - | otherwise = case qName (elName el) of + | not (T.null xrefLabel) = xrefLabel + | otherwise = case qName (elName el) of "chapter" -> descendantContent "title" el "section" -> descendantContent "title" el "sect1" -> descendantContent "title" el @@ -1073,10 +1074,10 @@ parseInline (Elem e) = "sect5" -> descendantContent "title" el "cmdsynopsis" -> descendantContent "command" el "funcsynopsis" -> descendantContent "function" el - _ -> qName (elName el) ++ "_title" + _ -> T.pack $ qName (elName el) ++ "_title" where xrefLabel = attrValue "xreflabel" el - descendantContent name = maybe "???" strContent + descendantContent name = maybe "???" (T.pack . strContent) . filterElementName (\n -> qName n == name) -- | Extract a math equation from an element @@ -1088,20 +1089,20 @@ equation :: Monad m => Element -- ^ The element from which to extract a mathematical equation - -> (String -> Inlines) + -> (Text -> Inlines) -- ^ A constructor for some Inlines, taking the TeX code as input -> m Inlines equation e constructor = - return $ mconcat $ map constructor $ mathMLEquations ++ latexEquations + return $ mconcat $ map constructor $ mathMLEquations <> latexEquations where - mathMLEquations :: [String] + mathMLEquations :: [Text] mathMLEquations = map writeTeX $ rights $ readMath (\x -> qName (elName x) == "math" && qPrefix (elName x) == Just "mml") - (readMathML . showElement) + (readMathML . T.pack . showElement) - latexEquations :: [String] + latexEquations :: [Text] latexEquations = readMath (\x -> qName (elName x) == "mathphrase") - (concat . fmap showVerbatimCData . elContent) + (T.concat . fmap showVerbatimCData . elContent) readMath :: (Element -> Bool) -> (Element -> b) -> [b] readMath childPredicate fromElement = @@ -1111,9 +1112,10 @@ equation e constructor = -- | Get the actual text stored in a CData block. 'showContent' -- returns the text still surrounded by the [[CDATA]] tags. -showVerbatimCData :: Content -> String -showVerbatimCData (Text (CData _ d _)) = d -showVerbatimCData c = showContent c +showVerbatimCData :: Content -> Text +showVerbatimCData (Text (CData _ d _)) = T.pack d +showVerbatimCData c = T.pack $ showContent c + -- | Set the prefix of a name to 'Nothing' removePrefix :: QName -> QName diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 387c3c7e2..cd4ff01db 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -4,6 +4,7 @@ {-# LANGUAGE PatternGuards #-} {-# LANGUAGE MultiWayIf #-} {-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.Docx Copyright : Copyright (C) 2014-2019 Jesse Rosenthal @@ -68,12 +69,12 @@ import Data.Default (Default) import Data.List (delete, intersect) import Data.Char (isSpace) import qualified Data.Map as M +import qualified Data.Text as T import Data.Maybe (isJust, fromMaybe) import Data.Sequence (ViewL (..), viewl) import qualified Data.Sequence as Seq import qualified Data.Set as Set import Text.Pandoc.Builder --- import Text.Pandoc.Definition import Text.Pandoc.MediaBag (MediaBag) import Text.Pandoc.Options import Text.Pandoc.Readers.Docx.Combine @@ -101,14 +102,14 @@ readDocx opts bytes readDocx _ _ = throwError $ PandocSomeError "couldn't parse docx file" -data DState = DState { docxAnchorMap :: M.Map String String - , docxAnchorSet :: Set.Set String - , docxImmedPrevAnchor :: Maybe String +data DState = DState { docxAnchorMap :: M.Map T.Text T.Text + , docxAnchorSet :: Set.Set T.Text + , docxImmedPrevAnchor :: Maybe T.Text , docxMediaBag :: MediaBag , docxDropCap :: Inlines -- keep track of (numId, lvl) values for -- restarting - , docxListState :: M.Map (String, String) Integer + , docxListState :: M.Map (T.Text, T.Text) Integer , docxPrevPara :: Inlines } @@ -142,7 +143,7 @@ spansToKeep = [] divsToKeep :: [ParaStyleName] divsToKeep = ["Definition", "Definition Term"] -metaStyles :: M.Map ParaStyleName String +metaStyles :: M.Map ParaStyleName T.Text metaStyles = M.fromList [ ("Title", "title") , ("Subtitle", "subtitle") , ("Author", "author") @@ -167,7 +168,7 @@ isEmptyPar (Paragraph _ parParts) = isEmptyElem _ = True isEmptyPar _ = False -bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map String MetaValue) +bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map T.Text MetaValue) bodyPartsToMeta' [] = return M.empty bodyPartsToMeta' (bp : bps) | (Paragraph pPr parParts) <- bp @@ -232,22 +233,22 @@ runElemToInlines Tab = space runElemToInlines SoftHyphen = text "\xad" runElemToInlines NoBreakHyphen = text "\x2011" -runElemToString :: RunElem -> String -runElemToString (TextRun s) = s -runElemToString LnBrk = ['\n'] -runElemToString Tab = ['\t'] -runElemToString SoftHyphen = ['\xad'] -runElemToString NoBreakHyphen = ['\x2011'] +runElemToText :: RunElem -> T.Text +runElemToText (TextRun s) = s +runElemToText LnBrk = T.singleton '\n' +runElemToText Tab = T.singleton '\t' +runElemToText SoftHyphen = T.singleton '\xad' +runElemToText NoBreakHyphen = T.singleton '\x2011' -runToString :: Run -> String -runToString (Run _ runElems) = concatMap runElemToString runElems -runToString _ = "" +runToText :: Run -> T.Text +runToText (Run _ runElems) = T.concat $ map runElemToText runElems +runToText _ = "" -parPartToString :: ParPart -> String -parPartToString (PlainRun run) = runToString run -parPartToString (InternalHyperLink _ runs) = concatMap runToString runs -parPartToString (ExternalHyperLink _ runs) = concatMap runToString runs -parPartToString _ = "" +parPartToText :: ParPart -> T.Text +parPartToText (PlainRun run) = runToText run +parPartToText (InternalHyperLink _ runs) = T.concat $ map runToText runs +parPartToText (ExternalHyperLink _ runs) = T.concat $ map runToText runs +parPartToText _ = "" blacklistedCharStyles :: [CharStyleName] blacklistedCharStyles = ["Hyperlink"] @@ -310,7 +311,7 @@ runToInlines :: PandocMonad m => Run -> DocxContext m Inlines runToInlines (Run rs runElems) | maybe False isCodeCharStyle $ rParentStyle rs = do rPr <- resolveDependentRunStyle rs - let codeString = code $ concatMap runElemToString runElems + let codeString = code $ T.concat $ map runElemToText runElems return $ case rVertAlign rPr of Just SupScrpt -> superscript codeString Just SubScrpt -> subscript codeString @@ -328,17 +329,17 @@ runToInlines (Endnote bps) = do return $ note blksList runToInlines (InlineDrawing fp title alt bs ext) = do (lift . lift) $ P.insertMedia fp Nothing bs - return $ imageWith (extentToAttr ext) fp title $ text alt + return $ imageWith (extentToAttr ext) (T.pack fp) title $ text alt runToInlines InlineChart = return $ spanWith ("", ["chart"], []) $ text "[CHART]" extentToAttr :: Extent -> Attr extentToAttr (Just (w, h)) = ("", [], [("width", showDim w), ("height", showDim h)] ) where - showDim d = show (d / 914400) ++ "in" + showDim d = tshow (d / 914400) <> "in" extentToAttr _ = nullAttr -blocksToInlinesWarn :: PandocMonad m => String -> Blocks -> DocxContext m Inlines +blocksToInlinesWarn :: PandocMonad m => T.Text -> Blocks -> DocxContext m Inlines blocksToInlinesWarn cmtId blks = do let blkList = toList blks notParaOrPlain :: Block -> Bool @@ -347,7 +348,7 @@ blocksToInlinesWarn cmtId blks = do notParaOrPlain _ = True unless ( not (any notParaOrPlain blkList)) $ lift $ P.report $ DocxParserWarning $ - "Docx comment " ++ cmtId ++ " will not retain formatting" + "Docx comment " <> cmtId <> " will not retain formatting" return $ blocksToInlines' blkList -- The majority of work in this function is done in the primed @@ -440,12 +441,12 @@ parPartToInlines' (BookMark _ anchor) = return $ spanWith (newAnchor, ["anchor"], []) mempty parPartToInlines' (Drawing fp title alt bs ext) = do (lift . lift) $ P.insertMedia fp Nothing bs - return $ imageWith (extentToAttr ext) fp title $ text alt + return $ imageWith (extentToAttr ext) (T.pack fp) title $ text alt parPartToInlines' Chart = return $ spanWith ("", ["chart"], []) $ text "[CHART]" parPartToInlines' (InternalHyperLink anchor runs) = do ils <- smushInlines <$> mapM runToInlines runs - return $ link ('#' : anchor) "" ils + return $ link ("#" <> anchor) "" ils parPartToInlines' (ExternalHyperLink target runs) = do ils <- smushInlines <$> mapM runToInlines runs return $ link target "" ils @@ -463,7 +464,7 @@ isAnchorSpan (Span (_, classes, kvs) _) = null kvs isAnchorSpan _ = False -dummyAnchors :: [String] +dummyAnchors :: [T.Text] dummyAnchors = ["_GoBack"] makeHeaderAnchor :: PandocMonad m => Blocks -> DocxContext m Blocks @@ -477,7 +478,7 @@ makeHeaderAnchor' (Header n (ident, classes, kvs) ils) , (Span (anchIdent, ["anchor"], _) cIls) <- c = do hdrIDMap <- gets docxAnchorMap exts <- readerExtensions <$> asks docxOptions - let newIdent = if null ident + let newIdent = if T.null ident then uniqueIdent exts ils (Set.fromList $ M.elems hdrIDMap) else ident newIls = concatMap f ils where f il | il == c = cIls @@ -490,7 +491,7 @@ makeHeaderAnchor' (Header n (ident, classes, kvs) ils) = do hdrIDMap <- gets docxAnchorMap exts <- readerExtensions <$> asks docxOptions - let newIdent = if null ident + let newIdent = if T.null ident then uniqueIdent exts ils (Set.fromList $ M.elems hdrIDMap) else ident modify $ \s -> s {docxAnchorMap = M.insert newIdent newIdent hdrIDMap} @@ -558,8 +559,8 @@ parStyleToTransform pPr else transform parStyleToTransform _ = return id -normalizeToClassName :: (FromStyleName a) => a -> String -normalizeToClassName = map go . fromStyleName +normalizeToClassName :: (FromStyleName a) => a -> T.Text +normalizeToClassName = T.map go . fromStyleName where go c | isSpace c = '-' | otherwise = c @@ -574,7 +575,8 @@ bodyPartToBlocks (Paragraph pPr parparts) return $ transform $ codeBlock $ - concatMap parPartToString parparts + T.concat $ + map parPartToText parparts | Just (style, n) <- pHeading pPr = do ils <-local (\s-> s{docxInHeaderBlock=True}) (smushInlines <$> mapM parPartToInlines parparts) @@ -646,7 +648,7 @@ bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do , ("num-id", numId) , ("format", fmt) , ("text", txt) - , ("start", show start) + , ("start", tshow start) ] modify $ \st -> st{ docxListState = -- expire all the continuation data for lists of level > this one: @@ -705,12 +707,12 @@ bodyPartToBlocks (OMathPara e) = -- replace targets with generated anchors. rewriteLink' :: PandocMonad m => Inline -> DocxContext m Inline -rewriteLink' l@(Link attr ils ('#':target, title)) = do +rewriteLink' l@(Link attr ils (T.uncons -> Just ('#',target), title)) = do anchorMap <- gets docxAnchorMap case M.lookup target anchorMap of Just newTarget -> do modify $ \s -> s{docxAnchorSet = Set.insert newTarget (docxAnchorSet s)} - return $ Link attr ils ('#':newTarget, title) + return $ Link attr ils ("#" <> newTarget, title) Nothing -> do modify $ \s -> s{docxAnchorSet = Set.insert target (docxAnchorSet s)} return l diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs index da40a80ea..82791d669 100644 --- a/src/Text/Pandoc/Readers/Docx/Combine.hs +++ b/src/Text/Pandoc/Readers/Docx/Combine.hs @@ -1,7 +1,8 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE TypeSynonymInstances #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Docx.Combine Copyright : © 2014-2019 Jesse Rosenthal <jrosenthal@jhu.edu>, diff --git a/src/Text/Pandoc/Readers/Docx/Fields.hs b/src/Text/Pandoc/Readers/Docx/Fields.hs index e7a916f1c..05d9dd697 100644 --- a/src/Text/Pandoc/Readers/Docx/Fields.hs +++ b/src/Text/Pandoc/Readers/Docx/Fields.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Docx.Fields Copyright : Copyright (C) 2014-2019 Jesse Rosenthal @@ -16,16 +17,18 @@ module Text.Pandoc.Readers.Docx.Fields ( FieldInfo(..) ) where import Prelude +import Data.Functor (($>)) +import qualified Data.Text as T import Text.Parsec -import Text.Parsec.String (Parser) +import Text.Parsec.Text (Parser) -type URL = String +type URL = T.Text data FieldInfo = HyperlinkField URL | UnknownField deriving (Show) -parseFieldInfo :: String -> Either ParseError FieldInfo +parseFieldInfo :: T.Text -> Either ParseError FieldInfo parseFieldInfo = parse fieldInfo "" fieldInfo :: Parser FieldInfo @@ -34,31 +37,31 @@ fieldInfo = <|> return UnknownField -escapedQuote :: Parser String -escapedQuote = string "\\\"" +escapedQuote :: Parser T.Text +escapedQuote = string "\\\"" $> "\\\"" -inQuotes :: Parser String +inQuotes :: Parser T.Text inQuotes = - (try escapedQuote) <|> (anyChar >>= (\c -> return [c])) + (try escapedQuote) <|> (anyChar >>= (\c -> return $ T.singleton c)) -quotedString :: Parser String +quotedString :: Parser T.Text quotedString = do char '"' - concat <$> manyTill inQuotes (try (char '"')) + T.concat <$> manyTill inQuotes (try (char '"')) -unquotedString :: Parser String -unquotedString = manyTill anyChar (try $ lookAhead space *> return () <|> eof) +unquotedString :: Parser T.Text +unquotedString = T.pack <$> manyTill anyChar (try $ lookAhead space *> return () <|> eof) -fieldArgument :: Parser String +fieldArgument :: Parser T.Text fieldArgument = quotedString <|> unquotedString -- there are other switches, but this is the only one I've seen in the wild so far, so it's the first one I'll implement. See §17.16.5.25 -hyperlinkSwitch :: Parser (String, String) +hyperlinkSwitch :: Parser (T.Text, T.Text) hyperlinkSwitch = do sw <- string "\\l" spaces farg <- fieldArgument - return (sw, farg) + return (T.pack sw, farg) hyperlink :: Parser URL hyperlink = do @@ -68,6 +71,6 @@ hyperlink = do farg <- fieldArgument switches <- spaces *> many hyperlinkSwitch let url = case switches of - ("\\l", s) : _ -> farg ++ ('#': s) + ("\\l", s) : _ -> farg <> "#" <> s _ -> farg return url diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs index eb24640c5..b7b7a3835 100644 --- a/src/Text/Pandoc/Readers/Docx/Lists.hs +++ b/src/Text/Pandoc/Readers/Docx/Lists.hs @@ -22,6 +22,7 @@ import Prelude import Data.List import Data.Maybe import Data.String (fromString) +import qualified Data.Text as T import Text.Pandoc.Generic (bottomUp) import Text.Pandoc.JSON import Text.Pandoc.Readers.Docx.Parse (ParaStyleName) @@ -45,20 +46,20 @@ getNumId _ = Nothing getNumIdN :: Block -> Integer getNumIdN b = fromMaybe (-1) (getNumId b) -getText :: Block -> Maybe String +getText :: Block -> Maybe T.Text getText (Div (_, _, kvs) _) = lookup "text" kvs getText _ = Nothing data ListType = Itemized | Enumerated ListAttributes -listStyleMap :: [(String, ListNumberStyle)] +listStyleMap :: [(T.Text, ListNumberStyle)] listStyleMap = [("upperLetter", UpperAlpha), ("lowerLetter", LowerAlpha), ("upperRoman", UpperRoman), ("lowerRoman", LowerRoman), ("decimal", Decimal)] -listDelimMap :: [(String, ListNumberDelim)] +listDelimMap :: [(T.Text, ListNumberDelim)] listDelimMap = [("%1)", OneParen), ("(%1)", TwoParens), ("%1.", Period)] @@ -82,11 +83,11 @@ getListType b@(Div (_, _, kvs) _) | isListItem b = _ -> Nothing getListType _ = Nothing -listParagraphDivs :: [String] +listParagraphDivs :: [T.Text] listParagraphDivs = ["list-paragraph"] listParagraphStyles :: [ParaStyleName] -listParagraphStyles = map fromString listParagraphDivs +listParagraphStyles = map (fromString . T.unpack) listParagraphDivs -- This is a first stab at going through and attaching meaning to list -- paragraphs, without an item marker, following a list item. We diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 889bd80fc..8598ada6f 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Docx.Parse Copyright : Copyright (C) 2014-2019 Jesse Rosenthal @@ -63,6 +64,7 @@ import qualified Data.ByteString.Lazy as B import Data.Char (chr, ord, readLitChar) import Data.List import qualified Data.Map as M +import qualified Data.Text as T import Data.Maybe import System.FilePath import Text.Pandoc.Readers.Docx.Util @@ -71,7 +73,7 @@ import Text.Pandoc.Shared (filteredFilesFromArchive, safeRead) import qualified Text.Pandoc.UTF8 as UTF8 import Text.TeXMath (Exp) import Text.TeXMath.Readers.OMML (readOMML) -import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, stringToFont) +import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, textToFont) import Text.XML.Light import qualified Text.XML.Light.Cursor as XMLC @@ -88,7 +90,7 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes } deriving Show -data ReaderState = ReaderState { stateWarnings :: [String] +data ReaderState = ReaderState { stateWarnings :: [T.Text] , stateFldCharState :: FldCharState } deriving Show @@ -119,7 +121,6 @@ eitherToD (Left _) = throwError DocxError concatMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b] concatMapM f xs = liftM concat (mapM f xs) - -- This is similar to `mapMaybe`: it maps a function returning the D -- monad over a list, and only keeps the non-erroring return values. mapD :: (a -> D b) -> [a] -> D [b] @@ -178,18 +179,18 @@ type ParStyleMap = M.Map ParaStyleId ParStyle data Numbering = Numbering NameSpaces [Numb] [AbstractNumb] deriving Show -data Numb = Numb String String [LevelOverride] +data Numb = Numb T.Text T.Text [LevelOverride] deriving Show -- ilvl startOverride lvl -data LevelOverride = LevelOverride String (Maybe Integer) (Maybe Level) +data LevelOverride = LevelOverride T.Text (Maybe Integer) (Maybe Level) deriving Show -data AbstractNumb = AbstractNumb String [Level] +data AbstractNumb = AbstractNumb T.Text [Level] deriving Show -- ilvl format string start -data Level = Level String String String (Maybe Integer) +data Level = Level T.Text T.Text T.Text (Maybe Integer) deriving Show data DocumentLocation = InDocument | InFootnote | InEndnote @@ -199,11 +200,11 @@ data Relationship = Relationship DocumentLocation RelId Target deriving Show data Notes = Notes NameSpaces - (Maybe (M.Map String Element)) - (Maybe (M.Map String Element)) + (Maybe (M.Map T.Text Element)) + (Maybe (M.Map T.Text Element)) deriving Show -data Comments = Comments NameSpaces (M.Map String Element) +data Comments = Comments NameSpaces (M.Map T.Text Element) deriving Show data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer @@ -238,8 +239,8 @@ defaultParagraphStyle = ParagraphStyle { pStyle = [] data BodyPart = Paragraph ParagraphStyle [ParPart] - | ListItem ParagraphStyle String String (Maybe Level) [ParPart] - | Tbl String TblGrid TblLook [Row] + | ListItem ParagraphStyle T.Text T.Text (Maybe Level) [ParPart] + | Tbl T.Text TblGrid TblLook [Row] | OMathPara [Exp] deriving Show @@ -279,7 +280,7 @@ data ParPart = PlainRun Run | BookMark BookMarkId Anchor | InternalHyperLink Anchor [Run] | ExternalHyperLink URL [Run] - | Drawing FilePath String String B.ByteString Extent -- title, alt + | Drawing FilePath T.Text T.Text B.ByteString Extent -- title, alt | Chart -- placeholder for now | PlainOMath [Exp] | Field FieldInfo [Run] @@ -290,28 +291,28 @@ data ParPart = PlainRun Run data Run = Run RunStyle [RunElem] | Footnote [BodyPart] | Endnote [BodyPart] - | InlineDrawing FilePath String String B.ByteString Extent -- title, alt + | InlineDrawing FilePath T.Text T.Text B.ByteString Extent -- title, alt | InlineChart -- placeholder deriving Show -data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen +data RunElem = TextRun T.Text | LnBrk | Tab | SoftHyphen | NoBreakHyphen deriving Show -type Target = String -type Anchor = String -type URL = String -type BookMarkId = String -type RelId = String -type ChangeId = String -type CommentId = String -type Author = String -type ChangeDate = String -type CommentDate = String +type Target = T.Text +type Anchor = T.Text +type URL = T.Text +type BookMarkId = T.Text +type RelId = T.Text +type ChangeId = T.Text +type CommentId = T.Text +type Author = T.Text +type ChangeDate = T.Text +type CommentDate = T.Text archiveToDocx :: Archive -> Either DocxError Docx archiveToDocx archive = fst <$> archiveToDocxWithWarnings archive -archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [String]) +archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [T.Text]) archiveToDocxWithWarnings archive = do docXmlPath <- case getDocumentXmlPath archive of Just fp -> Right fp @@ -341,7 +342,7 @@ archiveToDocxWithWarnings archive = do Right doc -> Right (Docx doc, stateWarnings st) Left e -> Left e -getDocumentXmlPath :: Archive -> Maybe String +getDocumentXmlPath :: Archive -> Maybe FilePath getDocumentXmlPath zf = do entry <- findEntryByPath "_rels/.rels" zf relsElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry @@ -394,7 +395,7 @@ constructBogusParStyleData stName = ParStyle , numInfo = Nothing , psParentStyle = Nothing , pStyleName = stName - , pStyleId = ParaStyleId . filter (/=' ') . fromStyleName $ stName + , pStyleId = ParaStyleId . T.filter (/=' ') . fromStyleName $ stName } archiveToNotes :: Archive -> Notes @@ -441,8 +442,8 @@ filePathToRelType path docXmlPath = relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship relElemToRelationship relType element | qName (elName element) == "Relationship" = do - relId <- findAttr (QName "Id" Nothing Nothing) element - target <- findAttr (QName "Target" Nothing Nothing) element + relId <- findAttrText (QName "Id" Nothing Nothing) element + target <- findAttrText (QName "Target" Nothing Nothing) element return $ Relationship relType relId target relElemToRelationship _ _ = Nothing @@ -464,7 +465,7 @@ filePathIsMedia fp = in (dir == "word/media/") -lookupLevel :: String -> String -> Numbering -> Maybe Level +lookupLevel :: T.Text -> T.Text -> Numbering -> Maybe Level lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do (absNumId, ovrrides) <- lookup numId $ map (\(Numb nid absnumid ovrRides) -> (nid, (absnumid, ovrRides))) numbs @@ -483,7 +484,7 @@ lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do loElemToLevelOverride :: NameSpaces -> Element -> Maybe LevelOverride loElemToLevelOverride ns element | isElem ns "w" "lvlOverride" element = do - ilvl <- findAttrByName ns "w" "ilvl" element + ilvl <- findAttrTextByName ns "w" "ilvl" element let startOverride = findChildByName ns "w" "startOverride" element >>= findAttrByName ns "w" "val" >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)]))) @@ -495,9 +496,9 @@ loElemToLevelOverride _ _ = Nothing numElemToNum :: NameSpaces -> Element -> Maybe Numb numElemToNum ns element | isElem ns "w" "num" element = do - numId <- findAttrByName ns "w" "numId" element + numId <- findAttrTextByName ns "w" "numId" element absNumId <- findChildByName ns "w" "abstractNumId" element - >>= findAttrByName ns "w" "val" + >>= findAttrTextByName ns "w" "val" let lvlOverrides = mapMaybe (loElemToLevelOverride ns) (findChildrenByName ns "w" "lvlOverride" element) @@ -507,7 +508,7 @@ numElemToNum _ _ = Nothing absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb absNumElemToAbsNum ns element | isElem ns "w" "abstractNum" element = do - absNumId <- findAttrByName ns "w" "abstractNumId" element + absNumId <- findAttrTextByName ns "w" "abstractNumId" element let levelElems = findChildrenByName ns "w" "lvl" element levels = mapMaybe (levelElemToLevel ns) levelElems return $ AbstractNumb absNumId levels @@ -516,11 +517,11 @@ absNumElemToAbsNum _ _ = Nothing levelElemToLevel :: NameSpaces -> Element -> Maybe Level levelElemToLevel ns element | isElem ns "w" "lvl" element = do - ilvl <- findAttrByName ns "w" "ilvl" element + ilvl <- findAttrTextByName ns "w" "ilvl" element fmt <- findChildByName ns "w" "numFmt" element - >>= findAttrByName ns "w" "val" + >>= findAttrTextByName ns "w" "val" txt <- findChildByName ns "w" "lvlText" element - >>= findAttrByName ns "w" "val" + >>= findAttrTextByName ns "w" "val" let start = findChildByName ns "w" "start" element >>= findAttrByName ns "w" "val" >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)]))) @@ -544,11 +545,11 @@ archiveToNumbering :: Archive -> Numbering archiveToNumbering archive = fromMaybe (Numbering [] [] []) (archiveToNumbering' archive) -elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map String Element) +elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map T.Text Element) elemToNotes ns notetype element - | isElem ns "w" (notetype ++ "s") element = + | isElem ns "w" (notetype <> "s") element = let pairs = mapMaybe - (\e -> findAttrByName ns "w" "id" e >>= + (\e -> findAttrTextByName ns "w" "id" e >>= (\a -> Just (a, e))) (findChildrenByName ns "w" notetype element) in @@ -556,11 +557,11 @@ elemToNotes ns notetype element M.fromList pairs elemToNotes _ _ _ = Nothing -elemToComments :: NameSpaces -> Element -> M.Map String Element +elemToComments :: NameSpaces -> Element -> M.Map T.Text Element elemToComments ns element | isElem ns "w" "comments" element = let pairs = mapMaybe - (\e -> findAttrByName ns "w" "id" e >>= + (\e -> findAttrTextByName ns "w" "id" e >>= (\a -> Just (a, e))) (findChildrenByName ns "w" "comment" element) in @@ -632,7 +633,7 @@ testBitMask bitMaskS n = pHeading :: ParagraphStyle -> Maybe (ParaStyleName, Int) pHeading = getParStyleField headingLev . pStyle -pNumInfo :: ParagraphStyle -> Maybe (String, String) +pNumInfo :: ParagraphStyle -> Maybe (T.Text, T.Text) pNumInfo = getParStyleField numInfo . pStyle elemToBodyPart :: NameSpaces -> Element -> D BodyPart @@ -640,7 +641,7 @@ elemToBodyPart ns element | isElem ns "w" "p" element , (c:_) <- findChildrenByName ns "m" "oMathPara" element = do - expsLst <- eitherToD $ readOMML $ showElement c + expsLst <- eitherToD $ readOMML $ T.pack $ showElement c return $ OMathPara expsLst elemToBodyPart ns element | isElem ns "w" "p" element @@ -664,7 +665,7 @@ elemToBodyPart ns element | isElem ns "w" "tbl" element = do let caption' = findChildByName ns "w" "tblPr" element >>= findChildByName ns "w" "tblCaption" - >>= findAttrByName ns "w" "val" + >>= findAttrTextByName ns "w" "val" caption = fromMaybe "" caption' grid' = case findChildByName ns "w" "tblGrid" element of Just g -> elemToTblGrid ns g @@ -687,10 +688,10 @@ lookupRelationship docLocation relid rels = where pairs = map (\(Relationship loc relid' target) -> ((loc, relid'), target)) rels -expandDrawingId :: String -> D (FilePath, B.ByteString) +expandDrawingId :: T.Text -> D (FilePath, B.ByteString) expandDrawingId s = do location <- asks envLocation - target <- asks (lookupRelationship location s . envRelationships) + target <- asks (fmap T.unpack . lookupRelationship location s . envRelationships) case target of Just filepath -> do bytes <- asks (lookup ("word/" ++ filepath) . envMedia) @@ -699,12 +700,12 @@ expandDrawingId s = do Nothing -> throwError DocxError Nothing -> throwError DocxError -getTitleAndAlt :: NameSpaces -> Element -> (String, String) +getTitleAndAlt :: NameSpaces -> Element -> (T.Text, T.Text) getTitleAndAlt ns element = let mbDocPr = findChildByName ns "wp" "inline" element >>= findChildByName ns "wp" "docPr" - title = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "title") - alt = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "descr") + title = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "title") + alt = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "descr") in (title, alt) elemToParPart :: NameSpaces -> Element -> D ParPart @@ -716,7 +717,7 @@ elemToParPart ns element = let (title, alt) = getTitleAndAlt ns drawingElem a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem - >>= findAttrByName ns "r" "embed" + >>= findAttrTextByName ns "r" "embed" in case drawing of Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem) @@ -726,7 +727,7 @@ elemToParPart ns element | isElem ns "w" "r" element , Just _ <- findChildByName ns "w" "pict" element = let drawing = findElement (elemName ns "v" "imagedata") element - >>= findAttrByName ns "r" "id" + >>= findAttrTextByName ns "r" "id" in case drawing of -- Todo: check out title and attr for deprecated format. @@ -795,7 +796,7 @@ elemToParPart ns element fldCharState <- gets stateFldCharState case fldCharState of FldCharOpen -> do - info <- eitherToD $ parseFieldInfo $ strContent instrText + info <- eitherToD $ parseFieldInfo $ T.pack $ strContent instrText modify $ \st -> st{stateFldCharState = FldCharFieldInfo info} return NullParPart _ -> return NullParPart @@ -816,56 +817,56 @@ elemToParPart ns element return $ ChangedRuns change runs elemToParPart ns element | isElem ns "w" "bookmarkStart" element - , Just bmId <- findAttrByName ns "w" "id" element - , Just bmName <- findAttrByName ns "w" "name" element = + , Just bmId <- findAttrTextByName ns "w" "id" element + , Just bmName <- findAttrTextByName ns "w" "name" element = return $ BookMark bmId bmName elemToParPart ns element | isElem ns "w" "hyperlink" element - , Just relId <- findAttrByName ns "r" "id" element = do + , Just relId <- findAttrTextByName ns "r" "id" element = do location <- asks envLocation runs <- mapD (elemToRun ns) (elChildren element) rels <- asks envRelationships case lookupRelationship location relId rels of Just target -> - case findAttrByName ns "w" "anchor" element of - Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs + case findAttrTextByName ns "w" "anchor" element of + Just anchor -> return $ ExternalHyperLink (target <> "#" <> anchor) runs Nothing -> return $ ExternalHyperLink target runs Nothing -> return $ ExternalHyperLink "" runs elemToParPart ns element | isElem ns "w" "hyperlink" element - , Just anchor <- findAttrByName ns "w" "anchor" element = do + , Just anchor <- findAttrTextByName ns "w" "anchor" element = do runs <- mapD (elemToRun ns) (elChildren element) return $ InternalHyperLink anchor runs elemToParPart ns element | isElem ns "w" "commentRangeStart" element - , Just cmtId <- findAttrByName ns "w" "id" element = do + , Just cmtId <- findAttrTextByName ns "w" "id" element = do (Comments _ commentMap) <- asks envComments case M.lookup cmtId commentMap of Just cmtElem -> elemToCommentStart ns cmtElem Nothing -> throwError WrongElem elemToParPart ns element | isElem ns "w" "commentRangeEnd" element - , Just cmtId <- findAttrByName ns "w" "id" element = + , Just cmtId <- findAttrTextByName ns "w" "id" element = return $ CommentEnd cmtId elemToParPart ns element | isElem ns "m" "oMath" element = - fmap PlainOMath (eitherToD $ readOMML $ showElement element) + fmap PlainOMath (eitherToD $ readOMML $ T.pack $ showElement element) elemToParPart _ _ = throwError WrongElem elemToCommentStart :: NameSpaces -> Element -> D ParPart elemToCommentStart ns element | isElem ns "w" "comment" element - , Just cmtId <- findAttrByName ns "w" "id" element - , Just cmtAuthor <- findAttrByName ns "w" "author" element - , Just cmtDate <- findAttrByName ns "w" "date" element = do + , Just cmtId <- findAttrTextByName ns "w" "id" element + , Just cmtAuthor <- findAttrTextByName ns "w" "author" element + , Just cmtDate <- findAttrTextByName ns "w" "date" element = do bps <- mapD (elemToBodyPart ns) (elChildren element) return $ CommentStart cmtId cmtAuthor cmtDate bps elemToCommentStart _ _ = throwError WrongElem -lookupFootnote :: String -> Notes -> Maybe Element +lookupFootnote :: T.Text -> Notes -> Maybe Element lookupFootnote s (Notes _ fns _) = fns >>= M.lookup s -lookupEndnote :: String -> Notes -> Maybe Element +lookupEndnote :: T.Text -> Notes -> Maybe Element lookupEndnote s (Notes _ _ ens) = ens >>= M.lookup s elemToExtent :: Element -> Extent @@ -876,7 +877,7 @@ elemToExtent drawingElem = where wp_ns = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem - >>= findAttr (QName at Nothing Nothing) >>= safeRead + >>= findAttr (QName at Nothing Nothing) >>= safeRead . T.pack childElemToRun :: NameSpaces -> Element -> D Run @@ -887,7 +888,7 @@ childElemToRun ns element = let (title, alt) = getTitleAndAlt ns element a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem - >>= findAttr (QName "embed" (lookup "r" ns) (Just "r")) + >>= findAttrText (QName "embed" (lookup "r" ns) (Just "r")) in case drawing of Just s -> expandDrawingId s >>= @@ -900,7 +901,7 @@ childElemToRun ns element = return InlineChart childElemToRun ns element | isElem ns "w" "footnoteReference" element - , Just fnId <- findAttrByName ns "w" "id" element = do + , Just fnId <- findAttrTextByName ns "w" "id" element = do notes <- asks envNotes case lookupFootnote fnId notes of Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e) @@ -908,7 +909,7 @@ childElemToRun ns element Nothing -> return $ Footnote [] childElemToRun ns element | isElem ns "w" "endnoteReference" element - , Just enId <- findAttrByName ns "w" "id" element = do + , Just enId <- findAttrTextByName ns "w" "id" element = do notes <- asks envNotes case lookupEndnote enId notes of Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e) @@ -961,15 +962,15 @@ getParStyleField _ _ = Nothing getTrackedChange :: NameSpaces -> Element -> Maybe TrackedChange getTrackedChange ns element | isElem ns "w" "ins" element || isElem ns "w" "moveTo" element - , Just cId <- findAttrByName ns "w" "id" element - , Just cAuthor <- findAttrByName ns "w" "author" element - , Just cDate <- findAttrByName ns "w" "date" element = + , Just cId <- findAttrTextByName ns "w" "id" element + , Just cAuthor <- findAttrTextByName ns "w" "author" element + , Just cDate <- findAttrTextByName ns "w" "date" element = Just $ TrackedChange Insertion (ChangeInfo cId cAuthor cDate) getTrackedChange ns element | isElem ns "w" "del" element || isElem ns "w" "moveFrom" element - , Just cId <- findAttrByName ns "w" "id" element - , Just cAuthor <- findAttrByName ns "w" "author" element - , Just cDate <- findAttrByName ns "w" "date" element = + , Just cId <- findAttrTextByName ns "w" "id" element + , Just cAuthor <- findAttrTextByName ns "w" "author" element + , Just cDate <- findAttrTextByName ns "w" "date" element = Just $ TrackedChange Deletion (ChangeInfo cId cAuthor cDate) getTrackedChange _ _ = Nothing @@ -978,7 +979,7 @@ elemToParagraphStyle ns element sty | Just pPr <- findChildByName ns "w" "pPr" element = let style = mapMaybe - (fmap ParaStyleId . findAttrByName ns "w" "val") + (fmap ParaStyleId . findAttrTextByName ns "w" "val") (findChildrenByName ns "w" "pStyle" pPr) in ParagraphStyle {pStyle = mapMaybe (`M.lookup` sty) style @@ -1010,7 +1011,7 @@ elemToRunStyleD ns element charStyles <- asks envCharStyles let parentSty = findChildByName ns "w" "rStyle" rPr >>= - findAttrByName ns "w" "val" >>= + findAttrTextByName ns "w" "val" >>= flip M.lookup charStyles . CharStyleId return $ elemToRunStyle ns element parentSty elemToRunStyleD _ _ = return defaultRunStyle @@ -1020,12 +1021,12 @@ elemToRunElem ns element | isElem ns "w" "t" element || isElem ns "w" "delText" element || isElem ns "m" "t" element = do - let str = strContent element + let str = T.pack $ strContent element font <- asks envFont case font of Nothing -> return $ TextRun str Just f -> return . TextRun $ - map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str + T.map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str | isElem ns "w" "br" element = return LnBrk | isElem ns "w" "tab" element = return Tab | isElem ns "w" "softHyphen" element = return SoftHyphen @@ -1043,11 +1044,11 @@ getSymChar ns element | Just s <- lowerFromPrivate <$> getCodepoint , Just font <- getFont = case readLitChar ("\\x" ++ s) of - [(char, _)] -> TextRun . maybe "" (:[]) $ getUnicode font char + [(char, _)] -> TextRun . maybe "" T.singleton $ getUnicode font char _ -> TextRun "" where getCodepoint = findAttrByName ns "w" "char" element - getFont = stringToFont =<< findAttrByName ns "w" "font" element + getFont = textToFont . T.pack =<< findAttrByName ns "w" "font" element lowerFromPrivate ('F':xs) = '0':xs lowerFromPrivate xs = xs getSymChar _ _ = TextRun "" @@ -1059,7 +1060,7 @@ elemToRunElems ns element let qualName = elemName ns "w" let font = do fontElem <- findElement (qualName "rFonts") element - stringToFont =<< + textToFont . T.pack =<< foldr ((<|>) . (flip findAttr fontElem . qualName)) Nothing ["ascii", "hAnsi"] local (setFont font) (mapD (elemToRunElem ns) (elChildren element)) elemToRunElems _ _ = throwError WrongElem diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs index ac2d6fa07..f81707e92 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs @@ -3,6 +3,7 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Docx.Parse.Styles Copyright : Copyright (C) 2014-2019 Jesse Rosenthal @@ -46,20 +47,19 @@ import Prelude import Codec.Archive.Zip import Control.Applicative ((<|>)) import Control.Monad.Except -import Data.Char (toLower) -import Data.List import Data.Function (on) import Data.String (IsString(..)) import qualified Data.Map as M +import qualified Data.Text as T import Data.Maybe import Data.Coerce import Text.Pandoc.Readers.Docx.Util import qualified Text.Pandoc.UTF8 as UTF8 import Text.XML.Light -newtype CharStyleId = CharStyleId String +newtype CharStyleId = CharStyleId T.Text deriving (Show, Eq, Ord, IsString, FromStyleId) -newtype ParaStyleId = ParaStyleId String +newtype ParaStyleId = ParaStyleId T.Text deriving (Show, Eq, Ord, IsString, FromStyleId) newtype CharStyleName = CharStyleName CIString @@ -68,25 +68,31 @@ newtype ParaStyleName = ParaStyleName CIString deriving (Show, Eq, Ord, IsString, FromStyleName) -- Case-insensitive comparisons -newtype CIString = CIString String deriving (Show, IsString, FromStyleName) +newtype CIString = CIString T.Text deriving (Show, IsString, FromStyleName) class FromStyleName a where - fromStyleName :: a -> String + fromStyleName :: a -> T.Text instance FromStyleName String where + fromStyleName = T.pack + +instance FromStyleName T.Text where fromStyleName = id class FromStyleId a where - fromStyleId :: a -> String + fromStyleId :: a -> T.Text instance FromStyleId String where + fromStyleId = T.pack + +instance FromStyleId T.Text where fromStyleId = id instance Eq CIString where - (==) = (==) `on` map toLower . coerce + (==) = (==) `on` T.toCaseFold . coerce instance Ord CIString where - compare = compare `on` map toLower . coerce + compare = compare `on` T.toCaseFold . coerce data VertAlign = BaseLn | SupScrpt | SubScrpt deriving Show @@ -108,7 +114,7 @@ data RunStyle = RunStyle { isBold :: Maybe Bool deriving Show data ParStyle = ParStyle { headingLev :: Maybe (ParaStyleName, Int) - , numInfo :: Maybe (String, String) + , numInfo :: Maybe (T.Text, T.Text) , psParentStyle :: Maybe ParStyle , pStyleName :: ParaStyleName , pStyleId :: ParaStyleId @@ -146,7 +152,7 @@ isBasedOnStyle ns element parentStyle , Just styleType <- findAttrByName ns "w" "type" element , styleType == cStyleType parentStyle , Just basedOnVal <- findChildByName ns "w" "basedOn" element >>= - findAttrByName ns "w" "val" + findAttrTextByName ns "w" "val" , Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps) | isElem ns "w" "style" element , Just styleType <- findAttrByName ns "w" "type" element @@ -234,7 +240,7 @@ checkOnOff _ _ _ = Nothing elemToCharStyle :: NameSpaces -> Element -> Maybe CharStyle -> Maybe CharStyle elemToCharStyle ns element parentStyle - = CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element) + = CharStyle <$> (CharStyleId <$> findAttrTextByName ns "w" "styleId" element) <*> getElementStyleName ns element <*> (Just $ elemToRunStyle ns element parentStyle) @@ -267,32 +273,32 @@ elemToRunStyle _ _ _ = defaultRunStyle getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int) getHeaderLevel ns element | Just styleName <- getElementStyleName ns element - , Just n <- stringToInteger =<< - (stripPrefix "heading " . map toLower $ + , Just n <- stringToInteger . T.unpack =<< + (T.stripPrefix "heading " . T.toLower $ fromStyleName styleName) , n > 0 = Just (styleName, fromInteger n) getHeaderLevel _ _ = Nothing -getElementStyleName :: Coercible String a => NameSpaces -> Element -> Maybe a +getElementStyleName :: Coercible T.Text a => NameSpaces -> Element -> Maybe a getElementStyleName ns el = coerce <$> - ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val") - <|> findAttrByName ns "w" "styleId" el) + ((findChildByName ns "w" "name" el >>= findAttrTextByName ns "w" "val") + <|> findAttrTextByName ns "w" "styleId" el) -getNumInfo :: NameSpaces -> Element -> Maybe (String, String) +getNumInfo :: NameSpaces -> Element -> Maybe (T.Text, T.Text) getNumInfo ns element = do let numPr = findChildByName ns "w" "pPr" element >>= findChildByName ns "w" "numPr" lvl = fromMaybe "0" (numPr >>= findChildByName ns "w" "ilvl" >>= - findAttrByName ns "w" "val") + findAttrTextByName ns "w" "val") numId <- numPr >>= findChildByName ns "w" "numId" >>= - findAttrByName ns "w" "val" + findAttrTextByName ns "w" "val" return (numId, lvl) elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle elemToParStyleData ns element parentStyle - | Just styleId <- findAttrByName ns "w" "styleId" element + | Just styleId <- findAttrTextByName ns "w" "styleId" element , Just styleName <- getElementStyleName ns element = Just $ ParStyle { diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs index f4855efd2..0de1114bd 100644 --- a/src/Text/Pandoc/Readers/Docx/Util.hs +++ b/src/Text/Pandoc/Readers/Docx/Util.hs @@ -19,11 +19,14 @@ module Text.Pandoc.Readers.Docx.Util ( , elemToNameSpaces , findChildByName , findChildrenByName + , findAttrText , findAttrByName + , findAttrTextByName ) where import Prelude import Data.Maybe (mapMaybe) +import qualified Data.Text as T import Text.XML.Light type NameSpaces = [(String, String)] @@ -55,7 +58,13 @@ findChildrenByName ns pref name el = let ns' = ns ++ elemToNameSpaces el in findChildren (elemName ns' pref name) el +findAttrText :: QName -> Element -> Maybe T.Text +findAttrText x = fmap T.pack . findAttr x + findAttrByName :: NameSpaces -> String -> String -> Element -> Maybe String findAttrByName ns pref name el = let ns' = ns ++ elemToNameSpaces el in findAttr (elemName ns' pref name) el + +findAttrTextByName :: NameSpaces -> String -> String -> Element -> Maybe T.Text +findAttrTextByName a b c = fmap T.pack . findAttrByName a b c diff --git a/src/Text/Pandoc/Readers/DokuWiki.hs b/src/Text/Pandoc/Readers/DokuWiki.hs index 60d406df1..3a92cfa19 100644 --- a/src/Text/Pandoc/Readers/DokuWiki.hs +++ b/src/Text/Pandoc/Readers/DokuWiki.hs @@ -2,6 +2,7 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TupleSections #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.DokuWiki Copyright : Copyright (C) 2018-2019 Alexander Krotov @@ -20,8 +21,7 @@ import Control.Monad import Control.Monad.Except (throwError) import Data.Char (isAlphaNum, isDigit) import qualified Data.Foldable as F -import Data.List (intercalate, transpose, isPrefixOf, isSuffixOf) -import Data.List.Split (splitOn) +import Data.List (transpose) import Data.Maybe (fromMaybe, catMaybes) import Data.Text (Text) import qualified Data.Text as T @@ -31,7 +31,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Error (PandocError (PandocParsecError)) import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (enclosed, nested) -import Text.Pandoc.Shared (crFilter, trim, underlineSpan) +import Text.Pandoc.Shared (crFilter, trim, underlineSpan, tshow) -- | Read DokuWiki from an input string and return a Pandoc document. readDokuWiki :: PandocMonad m @@ -42,7 +42,7 @@ readDokuWiki opts s = do let input = crFilter s res <- runParserT parseDokuWiki def {stateOptions = opts } "source" input case res of - Left e -> throwError $ PandocParsecError (T.unpack input) e + Left e -> throwError $ PandocParsecError input e Right d -> return d type DWParser = ParserT Text ParserState @@ -71,9 +71,9 @@ parseDokuWiki = B.doc . mconcat <$> many block <* spaces <* eof -- | Parse <code> and <file> attributes -codeLanguage :: PandocMonad m => DWParser m (String, [String], [(String, String)]) +codeLanguage :: PandocMonad m => DWParser m (Text, [Text], [(Text, Text)]) codeLanguage = try $ do - rawLang <- option "-" (spaceChar *> manyTill anyChar (lookAhead (spaceChar <|> char '>'))) + rawLang <- option "-" (spaceChar *> manyTillChar anyChar (lookAhead (spaceChar <|> char '>'))) let attr = case rawLang of "-" -> [] l -> [l] @@ -81,16 +81,16 @@ codeLanguage = try $ do -- | Generic parser for <code> and <file> tags codeTag :: PandocMonad m - => ((String, [String], [(String, String)]) -> String -> a) - -> String + => ((Text, [Text], [(Text, Text)]) -> Text -> a) + -> Text -> DWParser m a codeTag f tag = try $ f <$ char '<' - <* string tag + <* textStr tag <*> codeLanguage <* manyTill anyChar (char '>') <* optional (manyTill spaceChar eol) - <*> manyTill anyChar (try $ string "</" <* string tag <* char '>') + <*> manyTillChar anyChar (try $ string "</" <* textStr tag <* char '>') -- * Inline parsers @@ -167,19 +167,19 @@ underlined :: PandocMonad m => DWParser m B.Inlines underlined = try $ underlineSpan <$> enclosed (string "__") nestedInlines nowiki :: PandocMonad m => DWParser m B.Inlines -nowiki = try $ B.text <$ string "<nowiki>" <*> manyTill anyChar (try $ string "</nowiki>") +nowiki = try $ B.text <$ string "<nowiki>" <*> manyTillChar anyChar (try $ string "</nowiki>") percent :: PandocMonad m => DWParser m B.Inlines -percent = try $ B.text <$> enclosed (string "%%") nestedString +percent = try $ B.text <$> enclosed (string "%%") nestedText -nestedString :: (Show a, PandocMonad m) - => DWParser m a -> DWParser m String -nestedString end = innerSpace <|> count 1 nonspaceChar +nestedText :: (Show a, PandocMonad m) + => DWParser m a -> DWParser m Text +nestedText end = innerSpace <|> countChar 1 nonspaceChar where - innerSpace = try $ many1 spaceChar <* notFollowedBy end + innerSpace = try $ many1Char spaceChar <* notFollowedBy end monospaced :: PandocMonad m => DWParser m B.Inlines -monospaced = try $ B.code <$> enclosed (string "''") nestedString +monospaced = try $ B.code <$> enclosed (string "''") nestedText subscript :: PandocMonad m => DWParser m B.Inlines subscript = try $ B.subscript <$> between (string "<sub>") (try $ string "</sub>") nestedInlines @@ -201,12 +201,12 @@ inlineFile :: PandocMonad m => DWParser m B.Inlines inlineFile = codeTag B.codeWith "file" inlineHtml :: PandocMonad m => DWParser m B.Inlines -inlineHtml = try $ B.rawInline "html" <$ string "<html>" <*> manyTill anyChar (try $ string "</html>") +inlineHtml = try $ B.rawInline "html" <$ string "<html>" <*> manyTillChar anyChar (try $ string "</html>") inlinePhp :: PandocMonad m => DWParser m B.Inlines -inlinePhp = try $ B.codeWith ("", ["php"], []) <$ string "<php>" <*> manyTill anyChar (try $ string "</php>") +inlinePhp = try $ B.codeWith ("", ["php"], []) <$ string "<php>" <*> manyTillChar anyChar (try $ string "</php>") -makeLink :: (String, String) -> B.Inlines +makeLink :: (Text, Text) -> B.Inlines makeLink (text, url) = B.link url "" $ B.str text autoEmail :: PandocMonad m => DWParser m B.Inlines @@ -220,7 +220,7 @@ autoLink = try $ do state <- getState guard $ stateAllowLinks state (text, url) <- uri - guard $ checkLink (last url) + guard $ checkLink (T.last url) return $ makeLink (text, url) where checkLink c @@ -234,10 +234,10 @@ nocache :: PandocMonad m => DWParser m B.Inlines nocache = try $ mempty <$ string "~~NOCACHE~~" str :: PandocMonad m => DWParser m B.Inlines -str = B.str <$> (many1 alphaNum <|> count 1 characterReference) +str = B.str <$> (many1Char alphaNum <|> countChar 1 characterReference) symbol :: PandocMonad m => DWParser m B.Inlines -symbol = B.str <$> count 1 nonspaceChar +symbol = B.str <$> countChar 1 nonspaceChar link :: PandocMonad m => DWParser m B.Inlines link = try $ do @@ -248,77 +248,78 @@ link = try $ do setState $ st{ stateAllowLinks = True } return l -isExternalLink :: String -> Bool -isExternalLink s = - case dropWhile (\c -> isAlphaNum c || (c `elem` ['-', '.', '+'])) s of - (':':'/':'/':_) -> True - _ -> False - -isAbsolutePath :: String -> Bool -isAbsolutePath ('.':_) = False -isAbsolutePath s = ':' `elem` s - -normalizeDots :: String -> String -normalizeDots path@('.':_) = - case dropWhile (== '.') path of - ':':_ -> path - _ -> takeWhile (== '.') path ++ ':':dropWhile (== '.') path -normalizeDots path = path +isExternalLink :: Text -> Bool +isExternalLink s = "://" `T.isPrefixOf` sSuff + where + sSuff = T.dropWhile (\c -> isAlphaNum c || (c `elem` ['-', '.', '+'])) s + +isAbsolutePath :: Text -> Bool +isAbsolutePath (T.uncons -> Just ('.', _)) = False +isAbsolutePath s = T.any (== ':') s + +normalizeDots :: Text -> Text +normalizeDots path + | not (T.null pref) = case T.uncons suff of + Just (':', _) -> path + _ -> pref <> ":" <> suff + | otherwise = path + where + (pref, suff) = T.span (== '.') path -normalizeInternalPath :: String -> String +normalizeInternalPath :: Text -> Text normalizeInternalPath path = if isAbsolutePath path then ensureAbsolute normalizedPath else normalizedPath where - normalizedPath = intercalate "/" $ dropWhile (== ".") $ splitOn ":" $ normalizeDots path - ensureAbsolute s@('/':_) = s - ensureAbsolute s = '/':s + normalizedPath = T.intercalate "/" $ dropWhile (== ".") $ T.splitOn ":" $ normalizeDots path + ensureAbsolute s@(T.uncons -> Just ('/', _)) = s + ensureAbsolute s = "/" <> s -normalizePath :: String -> String +normalizePath :: Text -> Text normalizePath path = if isExternalLink path then path else normalizeInternalPath path -urlToText :: String -> String +urlToText :: Text -> Text urlToText url = if isExternalLink url then url - else reverse $ takeWhile (/= ':') $ reverse url + else T.takeWhileEnd (/= ':') url -- Parse link or image parseLink :: PandocMonad m - => (String -> Maybe B.Inlines -> B.Inlines) - -> String - -> String + => (Text -> Maybe B.Inlines -> B.Inlines) + -> Text + -> Text -> DWParser m B.Inlines parseLink f l r = f - <$ string l - <*> many1Till anyChar (lookAhead (void (char '|') <|> try (void $ string r))) - <*> optionMaybe (B.trimInlines . mconcat <$> (char '|' *> manyTill inline (try $ lookAhead $ string r))) - <* string r + <$ textStr l + <*> many1TillChar anyChar (lookAhead (void (char '|') <|> try (void $ textStr r))) + <*> optionMaybe (B.trimInlines . mconcat <$> (char '|' *> manyTill inline (try $ lookAhead $ textStr r))) + <* textStr r -- | Split Interwiki link into left and right part -- | Return Nothing if it is not Interwiki link -splitInterwiki :: String -> Maybe (String, String) +splitInterwiki :: Text -> Maybe (Text, Text) splitInterwiki path = - case span (\c -> isAlphaNum c || c == '.') path of - (l, '>':r) -> Just (l, r) + case T.span (\c -> isAlphaNum c || c == '.') path of + (l, T.uncons -> Just ('>', r)) -> Just (l, r) _ -> Nothing -interwikiToUrl :: String -> String -> String -interwikiToUrl "callto" page = "callto://" ++ page -interwikiToUrl "doku" page = "https://www.dokuwiki.org/" ++ page -interwikiToUrl "phpfn" page = "https://secure.php.net/" ++ page -interwikiToUrl "tel" page = "tel:" ++ page -interwikiToUrl "wp" page = "https://en.wikipedia.org/wiki/" ++ page -interwikiToUrl "wpde" page = "https://de.wikipedia.org/wiki/" ++ page -interwikiToUrl "wpes" page = "https://es.wikipedia.org/wiki/" ++ page -interwikiToUrl "wpfr" page = "https://fr.wikipedia.org/wiki/" ++ page -interwikiToUrl "wpjp" page = "https://jp.wikipedia.org/wiki/" ++ page -interwikiToUrl "wppl" page = "https://pl.wikipedia.org/wiki/" ++ page -interwikiToUrl _ page = "https://www.google.com/search?q=" ++ page ++ "&btnI=lucky" +interwikiToUrl :: Text -> Text -> Text +interwikiToUrl "callto" page = "callto://" <> page +interwikiToUrl "doku" page = "https://www.dokuwiki.org/" <> page +interwikiToUrl "phpfn" page = "https://secure.php.net/" <> page +interwikiToUrl "tel" page = "tel:" <> page +interwikiToUrl "wp" page = "https://en.wikipedia.org/wiki/" <> page +interwikiToUrl "wpde" page = "https://de.wikipedia.org/wiki/" <> page +interwikiToUrl "wpes" page = "https://es.wikipedia.org/wiki/" <> page +interwikiToUrl "wpfr" page = "https://fr.wikipedia.org/wiki/" <> page +interwikiToUrl "wpjp" page = "https://jp.wikipedia.org/wiki/" <> page +interwikiToUrl "wppl" page = "https://pl.wikipedia.org/wiki/" <> page +interwikiToUrl _ page = "https://www.google.com/search?q=" <> page <> "&btnI=lucky" linkText :: PandocMonad m => DWParser m B.Inlines linkText = parseLink fromRaw "[[" "]]" @@ -338,23 +339,23 @@ linkText = parseLink fromRaw "[[" "]]" Just (_, r) -> r -- Matches strings like "100x100" (width x height) and "50" (width) -isWidthHeightParameter :: String -> Bool +isWidthHeightParameter :: Text -> Bool isWidthHeightParameter s = - case s of - (x:xs) -> - isDigit x && case dropWhile isDigit xs of - ('x':ys@(_:_)) -> all isDigit ys - "" -> True + case T.uncons s of + Just (x, xs) -> + isDigit x && case T.uncons $ T.dropWhile isDigit xs of + Just ('x', ys) | not (T.null ys) -> T.all isDigit ys + Nothing -> True _ -> False _ -> False -parseWidthHeight :: String -> (Maybe String, Maybe String) +parseWidthHeight :: Text -> (Maybe Text, Maybe Text) parseWidthHeight s = (width, height) where - width = Just $ takeWhile isDigit s + width = Just $ T.takeWhile isDigit s height = - case dropWhile isDigit s of - ('x':xs) -> Just xs + case T.uncons $ T.dropWhile isDigit s of + Just ('x', xs) -> Just xs _ -> Nothing image :: PandocMonad m => DWParser m B.Inlines @@ -365,17 +366,17 @@ image = try $ parseLink fromRaw "{{" "}}" then B.link normalizedPath "" (fromMaybe defaultDescription description) else B.imageWith ("", classes, attributes) normalizedPath "" (fromMaybe defaultDescription description) where - (path', parameters) = span (/= '?') $ trim path + (path', parameters) = T.span (/= '?') $ trim path normalizedPath = normalizePath path' - leftPadding = " " `isPrefixOf` path - rightPadding = " " `isSuffixOf` path + leftPadding = " " `T.isPrefixOf` path + rightPadding = " " `T.isSuffixOf` path classes = case (leftPadding, rightPadding) of (False, False) -> [] (False, True) -> ["align-left"] (True, False) -> ["align-right"] (True, True) -> ["align-center"] - parameterList = splitOn "&" $ drop 1 parameters + parameterList = T.splitOn "&" $ T.drop 1 parameters linkOnly = "linkonly" `elem` parameterList (width, height) = maybe (Nothing, Nothing) parseWidthHeight (F.find isWidthHeightParameter parameterList) attributes = catMaybes [fmap ("width",) width, fmap ("height",) height] @@ -389,7 +390,7 @@ block = do <|> blockElements <|> para skipMany blankline - trace (take 60 $ show $ B.toList res) + trace (T.take 60 $ tshow $ B.toList res) return res blockElements :: PandocMonad m => DWParser m B.Blocks @@ -417,30 +418,30 @@ header = try $ do attr <- registerHeader nullAttr contents return $ B.headerWith attr (7 - lev) contents -list :: PandocMonad m => String -> DWParser m B.Blocks +list :: PandocMonad m => Text -> DWParser m B.Blocks list prefix = bulletList prefix <|> orderedList prefix -bulletList :: PandocMonad m => String -> DWParser m B.Blocks +bulletList :: PandocMonad m => Text -> DWParser m B.Blocks bulletList prefix = try $ B.bulletList <$> parseList prefix '*' -orderedList :: PandocMonad m => String -> DWParser m B.Blocks +orderedList :: PandocMonad m => Text -> DWParser m B.Blocks orderedList prefix = try $ B.orderedList <$> parseList prefix '-' parseList :: PandocMonad m - => String + => Text -> Char -> DWParser m [B.Blocks] parseList prefix marker = many1 ((<>) <$> item <*> fmap mconcat (many continuation)) where - continuation = try $ list (" " ++ prefix) - item = try $ string prefix *> char marker *> char ' ' *> itemContents + continuation = try $ list (" " <> prefix) + item = try $ textStr prefix *> char marker *> char ' ' *> itemContents itemContents = B.plain . mconcat <$> many1Till inline' eol indentedCode :: PandocMonad m => DWParser m B.Blocks -indentedCode = try $ B.codeBlock . unlines <$> many1 indentedLine +indentedCode = try $ B.codeBlock . T.unlines <$> many1 indentedLine where - indentedLine = try $ string " " *> manyTill anyChar eol + indentedLine = try $ string " " *> manyTillChar anyChar eol quote :: PandocMonad m => DWParser m B.Blocks quote = try $ nestedQuote 0 @@ -456,13 +457,13 @@ blockHtml :: PandocMonad m => DWParser m B.Blocks blockHtml = try $ B.rawBlock "html" <$ string "<HTML>" <* optional (manyTill spaceChar eol) - <*> manyTill anyChar (try $ string "</HTML>") + <*> manyTillChar anyChar (try $ string "</HTML>") blockPhp :: PandocMonad m => DWParser m B.Blocks blockPhp = try $ B.codeBlockWith ("", ["php"], []) <$ string "<PHP>" <* optional (manyTill spaceChar eol) - <*> manyTill anyChar (try $ string "</PHP>") + <*> manyTillChar anyChar (try $ string "</PHP>") table :: PandocMonad m => DWParser m B.Blocks table = do diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index 8e9746090..93ddeb9ee 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -1,6 +1,7 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.EPUB Copyright : Copyright (C) 2014-2019 Matthew Pickering @@ -24,7 +25,8 @@ import Control.DeepSeq (NFData, deepseq) import Control.Monad (guard, liftM, liftM2, mplus) import Control.Monad.Except (throwError) import qualified Data.ByteString.Lazy as BL (ByteString) -import Data.List (isInfixOf, isPrefixOf) +import Data.List (isInfixOf) +import qualified Data.Text as T import qualified Data.Map as M (Map, elems, fromList, lookup) import Data.Maybe (fromMaybe, mapMaybe) import qualified Data.Text.Lazy as TL @@ -67,9 +69,9 @@ archiveToEPUB os archive = do -- No need to collapse here as the image path is from the manifest file let coverDoc = fromMaybe mempty (imageToPandoc <$> cover) spine <- parseSpine items content - let escapedSpine = map (escapeURI . takeFileName . fst) spine + let escapedSpine = map (escapeURI . T.pack . takeFileName . fst) spine Pandoc _ bs <- - foldM' (\a b -> ((a <>) . walk (prependHash escapedSpine)) + foldM' (\a b -> ((a <>) . walk (prependHash $ escapedSpine)) `liftM` parseSpineElem root b) mempty spine let ast = coverDoc <> Pandoc meta bs fetchImages (M.elems items) root archive ast @@ -79,7 +81,7 @@ archiveToEPUB os archive = do parseSpineElem :: PandocMonad m => FilePath -> (FilePath, MimeType) -> m Pandoc parseSpineElem (normalise -> r) (normalise -> path, mime) = do doc <- mimeToReader mime r path - let docSpan = B.doc $ B.para $ B.spanWith (takeFileName path, [], []) mempty + let docSpan = B.doc $ B.para $ B.spanWith (T.pack $ takeFileName path, [], []) mempty return $ docSpan <> doc mimeToReader :: PandocMonad m => MimeType -> FilePath -> FilePath -> m Pandoc mimeToReader "application/xhtml+xml" (unEscapeString -> root) @@ -108,18 +110,19 @@ fetchImages mimes root arc (query iq -> links) = <$> findEntryByPath abslink arc iq :: Inline -> [FilePath] -iq (Image _ _ (url, _)) = [url] +iq (Image _ _ (url, _)) = [T.unpack url] iq _ = [] -- Remove relative paths renameImages :: FilePath -> Inline -> Inline renameImages root img@(Image attr a (url, b)) - | "data:" `isPrefixOf` url = img - | otherwise = Image attr a (collapseFilePath (root </> url), b) + | "data:" `T.isPrefixOf` url = img + | otherwise = Image attr a ( T.pack $ collapseFilePath (root </> T.unpack url) + , b) renameImages _ x = x imageToPandoc :: FilePath -> Pandoc -imageToPandoc s = B.doc . B.para $ B.image s "" mempty +imageToPandoc s = B.doc . B.para $ B.image (T.pack s) "" mempty imageMimes :: [MimeType] imageMimes = ["image/gif", "image/jpeg", "image/png"] @@ -144,7 +147,7 @@ parseManifest content coverId = do uid <- findAttrE (emptyName "id") e href <- findAttrE (emptyName "href") e mime <- findAttrE (emptyName "media-type") e - return (uid, (href, mime)) + return (uid, (href, T.pack mime)) parseSpine :: PandocMonad m => Items -> Element -> m [(FilePath, MimeType)] parseSpine is e = do @@ -172,11 +175,11 @@ parseMeta content = do -- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-metadata-elem parseMetaItem :: Element -> Meta -> Meta parseMetaItem e@(stripNamespace . elName -> field) meta = - addMetaField (renameMeta field) (B.str $ strContent e) meta + addMetaField (renameMeta field) (B.str $ T.pack $ strContent e) meta -renameMeta :: String -> String +renameMeta :: String -> T.Text renameMeta "creator" = "author" -renameMeta s = s +renameMeta s = T.pack s getManifest :: PandocMonad m => Archive -> m (String, Element) getManifest archive = do @@ -197,26 +200,26 @@ getManifest archive = do fixInternalReferences :: FilePath -> Pandoc -> Pandoc fixInternalReferences pathToFile = walk (renameImages root) - . walk (fixBlockIRs filename) + . walk (fixBlockIRs filename) . walk (fixInlineIRs filename) where - (root, escapeURI -> filename) = splitFileName pathToFile + (root, T.unpack . escapeURI . T.pack -> filename) = splitFileName pathToFile fixInlineIRs :: String -> Inline -> Inline fixInlineIRs s (Span as v) = Span (fixAttrs s as) v fixInlineIRs s (Code as code) = Code (fixAttrs s as) code -fixInlineIRs s (Link as is ('#':url, tit)) = +fixInlineIRs s (Link as is (T.uncons -> Just ('#', url), tit)) = Link (fixAttrs s as) is (addHash s url, tit) fixInlineIRs s (Link as is t) = Link (fixAttrs s as) is t fixInlineIRs _ v = v -prependHash :: [String] -> Inline -> Inline +prependHash :: [T.Text] -> Inline -> Inline prependHash ps l@(Link attr is (url, tit)) - | or [s `isPrefixOf` url | s <- ps] = - Link attr is ('#':url, tit) + | or [s `T.isPrefixOf` url | s <- ps] = + Link attr is ("#" <> url, tit) | otherwise = l prependHash _ i = i @@ -230,17 +233,17 @@ fixBlockIRs s (CodeBlock as code) = fixBlockIRs _ b = b fixAttrs :: FilePath -> B.Attr -> B.Attr -fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . null) cs, removeEPUBAttrs kvs) +fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . T.null) cs, removeEPUBAttrs kvs) -addHash :: String -> String -> String +addHash :: String -> T.Text -> T.Text addHash _ "" = "" -addHash s ident = takeFileName s ++ "#" ++ ident +addHash s ident = T.pack (takeFileName s) <> "#" <> ident -removeEPUBAttrs :: [(String, String)] -> [(String, String)] +removeEPUBAttrs :: [(T.Text, T.Text)] -> [(T.Text, T.Text)] removeEPUBAttrs kvs = filter (not . isEPUBAttr) kvs -isEPUBAttr :: (String, String) -> Bool -isEPUBAttr (k, _) = "epub:" `isPrefixOf` k +isEPUBAttr :: (T.Text, a) -> Bool +isEPUBAttr (k, _) = "epub:" `T.isPrefixOf` k -- Library @@ -291,4 +294,4 @@ findElementE :: PandocMonad m => QName -> Element -> m Element findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x mkE :: PandocMonad m => String -> Maybe a -> m a -mkE s = maybe (throwError . PandocParseError $ s) return +mkE s = maybe (throwError . PandocParseError $ T.pack $ s) return diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs index 0b25b9fed..6eed3c104 100644 --- a/src/Text/Pandoc/Readers/FB2.hs +++ b/src/Text/Pandoc/Readers/FB2.hs @@ -1,5 +1,6 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.FB2 Copyright : Copyright (C) 2018-2019 Alexander Krotov @@ -27,12 +28,11 @@ import Control.Monad.Except (throwError) import Control.Monad.State.Strict import Data.ByteString.Lazy.Char8 ( pack ) import Data.ByteString.Base64.Lazy -import Data.Char (isSpace, toUpper) import Data.Functor -import Data.List (dropWhileEnd, intersperse) -import Data.List.Split (splitOn) +import Data.List (intersperse) import qualified Data.Map as M import Data.Text (Text) +import qualified Data.Text as T import Data.Default import Data.Maybe import Text.HTML.TagSoup.Entity (lookupEntity) @@ -48,8 +48,8 @@ type FB2 m = StateT FB2State m data FB2State = FB2State{ fb2SectionLevel :: Int , fb2Meta :: Meta - , fb2Authors :: [String] - , fb2Notes :: M.Map String Blocks + , fb2Authors :: [Text] + , fb2Notes :: M.Map Text Blocks } deriving Show instance Default FB2State where @@ -76,19 +76,20 @@ readFB2 _ inp = -- * Utility functions -trim :: String -> String -trim = dropWhileEnd isSpace . dropWhile isSpace +trim :: Text -> Text +trim = T.strip -removeHash :: String -> String -removeHash ('#':xs) = xs -removeHash xs = xs +removeHash :: Text -> Text +removeHash t = case T.uncons t of + Just ('#', xs) -> xs + _ -> t -convertEntity :: String -> String -convertEntity e = fromMaybe (map toUpper e) (lookupEntity e) +convertEntity :: String -> Text +convertEntity e = maybe (T.toUpper $ T.pack e) T.pack $ lookupEntity e parseInline :: PandocMonad m => Content -> FB2 m Inlines parseInline (Elem e) = - case qName $ elName e of + case T.pack $ qName $ elName e of "strong" -> strong <$> parseStyleType e "emphasis" -> emph <$> parseStyleType e "style" -> parseNamedStyle e @@ -96,12 +97,12 @@ parseInline (Elem e) = "strikethrough" -> strikeout <$> parseStyleType e "sub" -> subscript <$> parseStyleType e "sup" -> superscript <$> parseStyleType e - "code" -> pure $ code $ strContent e + "code" -> pure $ code $ T.pack $ strContent e "image" -> parseInlineImageElement e name -> do report $ IgnoredElement name pure mempty -parseInline (Text x) = pure $ text $ cdData x +parseInline (Text x) = pure $ text $ T.pack $ cdData x parseInline (CRef r) = pure $ str $ convertEntity r parseSubtitle :: PandocMonad m => Element -> FB2 m Blocks @@ -111,7 +112,7 @@ parseSubtitle e = headerWith ("", ["unnumbered"], []) <$> gets fb2SectionLevel < parseRootElement :: PandocMonad m => Element -> FB2 m Blocks parseRootElement e = - case qName $ elName e of + case T.pack $ qName $ elName e of "FictionBook" -> do -- Parse notes before parsing the rest of the content. case filterChild isNotesBody e of @@ -144,7 +145,7 @@ parseNote e = Just sectionId -> do content <- mconcat <$> mapM parseSectionChild (dropTitle $ elChildren e) oldNotes <- gets fb2Notes - modify $ \s -> s { fb2Notes = M.insert ("#" ++ sectionId) content oldNotes } + modify $ \s -> s { fb2Notes = M.insert ("#" <> T.pack sectionId) content oldNotes } pure () where isTitle x = qName (elName x) == "title" @@ -156,7 +157,7 @@ parseNote e = -- | Parse a child of @\<FictionBook>@ element. parseFictionBookChild :: PandocMonad m => Element -> FB2 m Blocks parseFictionBookChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "stylesheet" -> pure mempty -- stylesheet is ignored "description" -> mempty <$ mapM_ parseDescriptionChild (elChildren e) "body" -> if isNotesBody e @@ -168,7 +169,7 @@ parseFictionBookChild e = -- | Parse a child of @\<description>@ element. parseDescriptionChild :: PandocMonad m => Element -> FB2 m () parseDescriptionChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "title-info" -> mapM_ parseTitleInfoChild (elChildren e) "src-title-info" -> pure () -- ignore "document-info" -> pure () @@ -176,13 +177,13 @@ parseDescriptionChild e = "custom-info" -> pure () "output" -> pure () name -> do - report $ IgnoredElement $ name ++ " in description" + report $ IgnoredElement $ name <> " in description" pure mempty -- | Parse a child of @\<body>@ element. parseBodyChild :: PandocMonad m => Element -> FB2 m Blocks parseBodyChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "image" -> parseImageElement e "title" -> header <$> gets fb2SectionLevel <*> parseTitleType (elContent e) "epigraph" -> parseEpigraph e @@ -196,25 +197,25 @@ parseBinaryElement e = (Nothing, _) -> report $ IgnoredElement "binary without id attribute" (Just _, Nothing) -> report $ IgnoredElement "binary without content-type attribute" - (Just filename, contentType) -> insertMedia filename contentType (decodeLenient (pack (strContent e))) + (Just filename, contentType) -> insertMedia filename (T.pack <$> contentType) (decodeLenient (pack (strContent e))) -- * Type parsers -- | Parse @authorType@ -parseAuthor :: PandocMonad m => Element -> FB2 m String -parseAuthor e = unwords . catMaybes <$> mapM parseAuthorChild (elChildren e) +parseAuthor :: PandocMonad m => Element -> FB2 m Text +parseAuthor e = T.unwords . catMaybes <$> mapM parseAuthorChild (elChildren e) -parseAuthorChild :: PandocMonad m => Element -> FB2 m (Maybe String) +parseAuthorChild :: PandocMonad m => Element -> FB2 m (Maybe Text) parseAuthorChild e = - case qName $ elName e of - "first-name" -> pure $ Just $ strContent e - "middle-name" -> pure $ Just $ strContent e - "last-name" -> pure $ Just $ strContent e - "nickname" -> pure $ Just $ strContent e - "home-page" -> pure $ Just $ strContent e - "email" -> pure $ Just $ strContent e + case T.pack $ qName $ elName e of + "first-name" -> pure $ Just $ T.pack $ strContent e + "middle-name" -> pure $ Just $ T.pack $ strContent e + "last-name" -> pure $ Just $ T.pack $ strContent e + "nickname" -> pure $ Just $ T.pack $ strContent e + "home-page" -> pure $ Just $ T.pack $ strContent e + "email" -> pure $ Just $ T.pack $ strContent e name -> do - report $ IgnoredElement $ name ++ " in author" + report $ IgnoredElement $ name <> " in author" pure Nothing -- | Parse @titleType@ @@ -236,13 +237,13 @@ parseTitleContent _ = pure Nothing parseImageElement :: PandocMonad m => Element -> FB2 m Blocks parseImageElement e = case href of - Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash src) title alt + Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash $ T.pack src) title alt Nothing -> do report $ IgnoredElement " image without href" pure mempty - where alt = maybe mempty str $ findAttr (unqual "alt") e - title = fromMaybe "" $ findAttr (unqual "title") e - imgId = fromMaybe "" $ findAttr (unqual "id") e + where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e + title = maybe "" T.pack $ findAttr (unqual "title") e + imgId = maybe "" T.pack $ findAttr (unqual "id") e href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e -- | Parse @pType@ @@ -256,7 +257,7 @@ parseCite e = blockQuote . mconcat <$> mapM parseCiteChild (elChildren e) -- | Parse @citeType@ child parseCiteChild :: PandocMonad m => Element -> FB2 m Blocks parseCiteChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "p" -> para <$> parsePType e "poem" -> parsePoem e "empty-line" -> pure horizontalRule @@ -271,13 +272,13 @@ parsePoem e = mconcat <$> mapM parsePoemChild (elChildren e) parsePoemChild :: PandocMonad m => Element -> FB2 m Blocks parsePoemChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "title" -> parseTitle e "subtitle" -> parseSubtitle e "epigraph" -> parseEpigraph e "stanza" -> parseStanza e "text-author" -> para <$> parsePType e - "date" -> pure $ para $ text $ strContent e + "date" -> pure $ para $ text $ T.pack $ strContent e name -> report (UnexpectedXmlElement name "poem") $> mempty parseStanza :: PandocMonad m => Element -> FB2 m Blocks @@ -290,7 +291,7 @@ joinLineBlocks [] = [] parseStanzaChild :: PandocMonad m => Element -> FB2 m Blocks parseStanzaChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "title" -> parseTitle e "subtitle" -> parseSubtitle e "v" -> lineBlock . (:[]) <$> parsePType e @@ -300,11 +301,11 @@ parseStanzaChild e = parseEpigraph :: PandocMonad m => Element -> FB2 m Blocks parseEpigraph e = divWith (divId, ["epigraph"], []) . mconcat <$> mapM parseEpigraphChild (elChildren e) - where divId = fromMaybe "" $ findAttr (unqual "id") e + where divId = maybe "" T.pack $ findAttr (unqual "id") e parseEpigraphChild :: PandocMonad m => Element -> FB2 m Blocks parseEpigraphChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "p" -> para <$> parsePType e "poem" -> parsePoem e "cite" -> parseCite e @@ -318,7 +319,7 @@ parseAnnotation e = mconcat <$> mapM parseAnnotationChild (elChildren e) parseAnnotationChild :: PandocMonad m => Element -> FB2 m Blocks parseAnnotationChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "p" -> para <$> parsePType e "poem" -> parsePoem e "cite" -> parseCite e @@ -332,14 +333,14 @@ parseSection :: PandocMonad m => Element -> FB2 m Blocks parseSection e = do n <- gets fb2SectionLevel modify $ \st -> st{ fb2SectionLevel = n + 1 } - let sectionId = fromMaybe "" $ findAttr (unqual "id") e + let sectionId = maybe "" T.pack $ findAttr (unqual "id") e bs <- divWith (sectionId, ["section"], []) . mconcat <$> mapM parseSectionChild (elChildren e) modify $ \st -> st{ fb2SectionLevel = n } pure bs parseSectionChild :: PandocMonad m => Element -> FB2 m Blocks parseSectionChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "title" -> parseBodyChild e "epigraph" -> parseEpigraph e "image" -> parseImageElement e @@ -361,16 +362,16 @@ parseStyleType e = mconcat <$> mapM parseInline (elContent e) parseNamedStyle :: PandocMonad m => Element -> FB2 m Inlines parseNamedStyle e = do content <- mconcat <$> mapM parseNamedStyleChild (elContent e) - let lang = maybeToList $ ("lang",) <$> findAttr (QName "lang" Nothing (Just "xml")) e + let lang = maybeToList $ ("lang",) . T.pack <$> findAttr (QName "lang" Nothing (Just "xml")) e case findAttr (unqual "name") e of - Just name -> pure $ spanWith ("", [name], lang) content + Just name -> pure $ spanWith ("", [T.pack name], lang) content Nothing -> do report $ IgnoredElement "link without required name" pure mempty parseNamedStyleChild :: PandocMonad m => Content -> FB2 m Inlines parseNamedStyleChild (Elem e) = - case qName (elName e) of + case T.pack $ qName (elName e) of "strong" -> strong <$> parseStyleType e "emphasis" -> emph <$> parseStyleType e "style" -> parseNamedStyle e @@ -378,10 +379,10 @@ parseNamedStyleChild (Elem e) = "strikethrough" -> strikeout <$> parseStyleType e "sub" -> subscript <$> parseStyleType e "sup" -> superscript <$> parseStyleType e - "code" -> pure $ code $ strContent e + "code" -> pure $ code $ T.pack $ strContent e "image" -> parseInlineImageElement e name -> do - report $ IgnoredElement $ name ++ " in style" + report $ IgnoredElement $ name <> " in style" pure mempty parseNamedStyleChild x = parseInline x @@ -390,7 +391,7 @@ parseLinkType :: PandocMonad m => Element -> FB2 m Inlines parseLinkType e = do content <- mconcat <$> mapM parseStyleLinkType (elContent e) notes <- gets fb2Notes - case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of + case T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of Just href -> case findAttr (QName "type" Nothing Nothing) e of Just "note" -> case M.lookup href notes of Nothing -> pure $ link href "" content @@ -417,19 +418,21 @@ parseTable _ = pure mempty -- TODO: tables are not supported yet -- | Parse @title-infoType@ parseTitleInfoChild :: PandocMonad m => Element -> FB2 m () parseTitleInfoChild e = - case qName (elName e) of + case T.pack $ qName (elName e) of "genre" -> pure () "author" -> parseAuthor e >>= \author -> modify (\st -> st {fb2Authors = author:fb2Authors st}) - "book-title" -> modify (setMeta "title" (text $ strContent e)) + "book-title" -> modify (setMeta "title" (text $ T.pack $ strContent e)) "annotation" -> parseAnnotation e >>= modify . setMeta "abstract" - "keywords" -> modify (setMeta "keywords" (map (MetaString . trim) $ splitOn "," $ strContent e)) - "date" -> modify (setMeta "date" (text $ strContent e)) + "keywords" -> modify (setMeta "keywords" (map (MetaString . trim) $ T.splitOn "," + $ T.pack + $ strContent e)) + "date" -> modify (setMeta "date" (text $ T.pack $ strContent e)) "coverpage" -> parseCoverPage e "lang" -> pure () "src-lang" -> pure () "translator" -> pure () "sequence" -> pure () - name -> report $ IgnoredElement $ name ++ " in title-info" + name -> report $ IgnoredElement $ name <> " in title-info" parseCoverPage :: PandocMonad m => Element -> FB2 m () parseCoverPage e = @@ -437,7 +440,7 @@ parseCoverPage e = Just img -> case href of Just src -> modify (setMeta "cover-image" (MetaString $ removeHash src)) Nothing -> pure () - where href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img + where href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img Nothing -> pure () -- | Parse @inlineImageType@ element @@ -450,5 +453,5 @@ parseInlineImageElement e = Nothing -> do report $ IgnoredElement "inline image without href" pure mempty - where alt = maybe mempty str $ findAttr (unqual "alt") e - href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e + where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e + href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index e03ac6a97..1c2892d6a 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -1,5 +1,5 @@ -{-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} @@ -35,8 +35,7 @@ import Control.Monad.Reader (ReaderT, ask, asks, lift, local, runReaderT) import Data.Char (isAlphaNum, isLetter) import Data.Default (Default (..), def) import Data.Foldable (for_) -import Data.List (isPrefixOf) -import Data.List.Split (wordsBy, splitWhen) +import Data.List.Split (splitWhen) import qualified Data.Map as M import Data.Maybe (fromMaybe, isJust, isNothing) import Data.Monoid (First (..)) @@ -62,8 +61,8 @@ import Text.Pandoc.Options ( extensionEnabled) import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Shared (addMetaField, blocksToInlines', crFilter, escapeURI, - extractSpaces, htmlSpanLikeElements, - onlySimpleTableCells, safeRead, underlineSpan) + extractSpaces, htmlSpanLikeElements, elemText, splitTextBy, + onlySimpleTableCells, safeRead, underlineSpan, tshow) import Text.Pandoc.Walk import Text.Parsec.Error import Text.TeXMath (readMathML, writeTeX) @@ -93,14 +92,14 @@ readHtml opts inp = do "source" tags case result of Right doc -> return doc - Left err -> throwError $ PandocParseError $ getError err + Left err -> throwError $ PandocParseError $ T.pack $ getError err replaceNotes :: PandocMonad m => [Block] -> TagParser m [Block] replaceNotes bs = do st <- getState return $ walk (replaceNotes' (noteTable st)) bs -replaceNotes' :: [(String, Blocks)] -> Inline -> Inline +replaceNotes' :: [(Text, Blocks)] -> Inline -> Inline replaceNotes' noteTbl (RawInline (Format "noteref") ref) = maybe (Str "") (Note . B.toList) $ lookup ref noteTbl replaceNotes' _ x = x @@ -108,9 +107,9 @@ replaceNotes' _ x = x data HTMLState = HTMLState { parserState :: ParserState, - noteTable :: [(String, Blocks)], + noteTable :: [(Text, Blocks)], baseHref :: Maybe URI, - identifiers :: Set.Set String, + identifiers :: Set.Set Text, logMessages :: [LogMessage], macros :: M.Map Text Macro } @@ -134,7 +133,7 @@ pHtml :: PandocMonad m => TagParser m Blocks pHtml = try $ do (TagOpen "html" attr) <- lookAhead pAny for_ (lookup "lang" attr) $ - updateState . B.setMeta "lang" . B.text . T.unpack + updateState . B.setMeta "lang" . B.text pInTags "html" block pBody :: PandocMonad m => TagParser m Blocks @@ -146,11 +145,11 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAny) setTitle t = mempty <$ updateState (B.setMeta "title" t) pMetaTag = do mt <- pSatisfy (matchTagOpen "meta" []) - let name = T.unpack $ fromAttrib "name" mt - if null name + let name = fromAttrib "name" mt + if T.null name then return mempty else do - let content = T.unpack $ fromAttrib "content" mt + let content = fromAttrib "content" mt updateState $ \s -> let ps = parserState s in s{ parserState = ps{ @@ -187,13 +186,13 @@ block = do , pFigure , pRawHtmlBlock ] - trace (take 60 $ show $ B.toList res) + trace (T.take 60 $ tshow $ B.toList res) return res -namespaces :: PandocMonad m => [(String, TagParser m Inlines)] +namespaces :: PandocMonad m => [(Text, TagParser m Inlines)] namespaces = [(mathMLNamespace, pMath True)] -mathMLNamespace :: String +mathMLNamespace :: Text mathMLNamespace = "http://www.w3.org/1998/Math/MathML" eSwitch :: (PandocMonad m, Monoid a) @@ -233,7 +232,7 @@ eFootnote = try $ do content <- pInTags tag block addNote ident content -addNote :: PandocMonad m => String -> Blocks -> TagParser m () +addNote :: PandocMonad m => Text -> Blocks -> TagParser m () addNote uid cont = updateState (\s -> s {noteTable = (uid, cont) : noteTable s}) eNoteref :: PandocMonad m => TagParser m Inlines @@ -245,8 +244,8 @@ eNoteref = try $ do -> (lookup "type" as <|> lookup "epub:type" as) == Just "noteref" _ -> False) - ident <- case T.unpack <$> lookup "href" attr of - Just ('#':rest) -> return rest + ident <- case lookup "href" attr >>= T.uncons of + Just ('#', rest) -> return rest _ -> mzero _ <- manyTill pAny (pSatisfy (\case TagClose t -> t == tag @@ -287,7 +286,7 @@ pListItem nonItem = do maybe id addId (lookup "id" attr) <$> pInTags "li" block <* skipMany nonItem -parseListStyleType :: String -> ListNumberStyle +parseListStyleType :: Text -> ListNumberStyle parseListStyleType "lower-roman" = LowerRoman parseListStyleType "upper-roman" = UpperRoman parseListStyleType "lower-alpha" = LowerAlpha @@ -295,7 +294,7 @@ parseListStyleType "upper-alpha" = UpperAlpha parseListStyleType "decimal" = Decimal parseListStyleType _ = DefaultStyle -parseTypeAttr :: String -> ListNumberStyle +parseTypeAttr :: Text -> ListNumberStyle parseTypeAttr "i" = LowerRoman parseTypeAttr "I" = UpperRoman parseTypeAttr "a" = LowerAlpha @@ -404,20 +403,19 @@ pDiv = try $ do pRawHtmlBlock :: PandocMonad m => TagParser m Blocks pRawHtmlBlock = do - raw <- T.unpack <$> - (pHtmlBlock "script" <|> pHtmlBlock "style" <|> pHtmlBlock "textarea" - <|> pRawTag) + raw <- (pHtmlBlock "script" <|> pHtmlBlock "style" <|> pHtmlBlock "textarea" + <|> pRawTag) exts <- getOption readerExtensions - if extensionEnabled Ext_raw_html exts && not (null raw) + if extensionEnabled Ext_raw_html exts && not (T.null raw) then return $ B.rawBlock "html" raw else ignore raw -ignore :: (Monoid a, PandocMonad m) => String -> TagParser m a +ignore :: (Monoid a, PandocMonad m) => Text -> TagParser m a ignore raw = do pos <- getPosition -- raw can be null for tags like <!DOCTYPE>; see paRawTag -- in this case we don't want a warning: - unless (null raw) $ + unless (T.null raw) $ logMessage $ SkippedContent raw pos return mempty @@ -438,7 +436,7 @@ eSection = try $ do headerLevel :: Text -> TagParser m Int headerLevel tagtype = - case safeRead (T.unpack (T.drop 1 tagtype)) of + case safeRead (T.drop 1 tagtype) of Just level -> -- try (do -- guardEnabled Ext_epub_html_exts @@ -468,7 +466,7 @@ pHeader = try $ do level <- headerLevel tagtype contents <- trimInlines . mconcat <$> manyTill inline (pCloses tagtype <|> eof) let ident = fromMaybe "" $ lookup "id" attr - let classes = maybe [] words $ lookup "class" attr + let classes = maybe [] T.words $ lookup "class" attr let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"] attr'' <- registerHeader (ident, classes, keyvals) contents return $ if bodyTitle @@ -529,14 +527,14 @@ pCol = try $ do optional $ pSatisfy (matchTagClose "col") skipMany pBlank let width = case lookup "width" attribs of - Nothing -> case lookup "style" attribs of - Just ('w':'i':'d':'t':'h':':':xs) | '%' `elem` xs -> - fromMaybe 0.0 $ safeRead (filter - (`notElem` (" \t\r\n%'\";" :: [Char])) xs) - _ -> 0.0 - Just x | not (null x) && last x == '%' -> - fromMaybe 0.0 $ safeRead (init x) - _ -> 0.0 + Nothing -> case lookup "style" attribs of + Just (T.stripPrefix "width:" -> Just xs) | T.any (== '%') xs -> + fromMaybe 0.0 $ safeRead (T.filter + (`notElem` (" \t\r\n%'\";" :: [Char])) xs) + _ -> 0.0 + Just (T.unsnoc -> Just (xs, '%')) -> + fromMaybe 0.0 $ safeRead xs + _ -> 0.0 if width > 0.0 then return $ width / 100.0 else return 0.0 @@ -562,7 +560,7 @@ pCell celltype = try $ do let extractAlign' [] = "" extractAlign' ("text-align":x:_) = x extractAlign' (_:xs) = extractAlign' xs - let extractAlign = extractAlign' . wordsBy (`elem` [' ','\t',';',':']) + let extractAlign = extractAlign' . splitTextBy (`elemText` " \t;:") let align = case maybeFromAttrib "align" tag `mplus` (extractAlign <$> maybeFromAttrib "style" tag) of Just "left" -> AlignLeft @@ -610,7 +608,7 @@ pFigure = try $ do let caption = fromMaybe mempty mbcap case B.toList <$> mbimg of Just [Image attr _ (url, tit)] -> - return $ B.para $ B.imageWith attr url ("fig:" ++ tit) caption + return $ B.para $ B.imageWith attr url ("fig:" <> tit) caption _ -> mzero pCodeBlock :: PandocMonad m => TagParser m Blocks @@ -618,21 +616,21 @@ pCodeBlock = try $ do TagOpen _ attr' <- pSatisfy (matchTagOpen "pre" []) let attr = toStringAttr attr' contents <- manyTill pAny (pCloses "pre" <|> eof) - let rawText = concatMap tagToString contents + let rawText = T.concat $ map tagToText contents -- drop leading newline if any - let result' = case rawText of - '\n':xs -> xs - _ -> rawText + let result' = case T.uncons rawText of + Just ('\n', xs) -> xs + _ -> rawText -- drop trailing newline if any - let result = case reverse result' of - '\n':_ -> init result' - _ -> result' + let result = case T.unsnoc result' of + Just (result'', '\n') -> result'' + _ -> result' return $ B.codeBlockWith (mkAttr attr) result -tagToString :: Tag Text -> String -tagToString (TagText s) = T.unpack s -tagToString (TagOpen "br" _) = "\n" -tagToString _ = "" +tagToText :: Tag Text -> Text +tagToText (TagText s) = s +tagToText (TagOpen "br" _) = "\n" +tagToText _ = "" inline :: PandocMonad m => TagParser m Inlines inline = choice @@ -667,7 +665,7 @@ pLocation = do pSat :: PandocMonad m => (Tag Text -> Bool) -> TagParser m (Tag Text) pSat f = do pos <- getPosition - token show (const pos) (\x -> if f x then Just x else Nothing) + token tshow (const pos) (\x -> if f x then Just x else Nothing) pSatisfy :: PandocMonad m => (Tag Text -> Bool) -> TagParser m (Tag Text) pSatisfy f = try $ optional pLocation >> pSat f @@ -688,10 +686,10 @@ pQ = choice $ map try [citedQuote, normalQuote] where citedQuote = do tag <- pSatisfy $ tagOpenLit "q" (any ((=="cite") . fst)) - url <- canonicalizeUrl $ T.unpack $ fromAttrib "cite" tag - let uid = fromMaybe (T.unpack $ fromAttrib "name" tag) $ + url <- canonicalizeUrl $ fromAttrib "cite" tag + let uid = fromMaybe (fromAttrib "name" tag) $ maybeFromAttrib "id" tag - let cls = words $ T.unpack $ fromAttrib "class" tag + let cls = T.words $ fromAttrib "class" tag makeQuote $ B.spanWith (uid, cls, [("cite", escapeURI url)]) normalQuote = do @@ -729,7 +727,7 @@ pSpanLike = TagOpen _ attrs <- pSatisfy $ tagOpenLit tagName (const True) let (ids, cs, kvs) = mkAttr . toStringAttr $ attrs content <- mconcat <$> manyTill inline (pCloses tagName <|> eof) - return $ B.spanWith (ids, T.unpack tagName : cs, kvs) content + return $ B.spanWith (ids, tagName : cs, kvs) content pSmall :: PandocMonad m => TagParser m Inlines pSmall = pInlinesInTags "small" (B.spanWith ("",["small"],[])) @@ -753,19 +751,18 @@ pLineBreak = do -- Unlike fromAttrib from tagsoup, this distinguishes -- between a missing attribute and an attribute with empty content. -maybeFromAttrib :: String -> Tag Text -> Maybe String -maybeFromAttrib name (TagOpen _ attrs) = - T.unpack <$> lookup (T.pack name) attrs +maybeFromAttrib :: Text -> Tag Text -> Maybe Text +maybeFromAttrib name (TagOpen _ attrs) = lookup name attrs maybeFromAttrib _ _ = Nothing pLink :: PandocMonad m => TagParser m Inlines pLink = try $ do tag <- pSatisfy $ tagOpenLit "a" (const True) - let title = T.unpack $ fromAttrib "title" tag + let title = fromAttrib "title" tag -- take id from id attribute if present, otherwise name - let uid = fromMaybe (T.unpack $ fromAttrib "name" tag) $ + let uid = fromMaybe (fromAttrib "name" tag) $ maybeFromAttrib "id" tag - let cls = words $ T.unpack $ fromAttrib "class" tag + let cls = T.words $ fromAttrib "class" tag lab <- mconcat <$> manyTill inline (pCloses "a") -- check for href; if href, then a link, otherwise a span case maybeFromAttrib "href" tag of @@ -778,34 +775,33 @@ pLink = try $ do pImage :: PandocMonad m => TagParser m Inlines pImage = do tag <- pSelfClosing (=="img") (isJust . lookup "src") - url <- canonicalizeUrl $ T.unpack $ fromAttrib "src" tag - let title = T.unpack $ fromAttrib "title" tag - let alt = T.unpack $ fromAttrib "alt" tag - let uid = T.unpack $ fromAttrib "id" tag - let cls = words $ T.unpack $ fromAttrib "class" tag + url <- canonicalizeUrl $ fromAttrib "src" tag + let title = fromAttrib "title" tag + let alt = fromAttrib "alt" tag + let uid = fromAttrib "id" tag + let cls = T.words $ fromAttrib "class" tag let getAtt k = case fromAttrib k tag of "" -> [] - v -> [(T.unpack k, T.unpack v)] + v -> [(k, v)] let kvs = concatMap getAtt ["width", "height", "sizes", "srcset"] return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt) -pCodeWithClass :: PandocMonad m => [(T.Text,String)] -> TagParser m Inlines -pCodeWithClass elemToClass = try $ do +pCodeWithClass :: PandocMonad m => [(T.Text,Text)] -> TagParser m Inlines +pCodeWithClass elemToClass = try $ do let tagTest = flip elem . fmap fst $ elemToClass TagOpen open attr' <- pSatisfy $ tagOpen tagTest (const True) result <- manyTill pAny (pCloses open) let (ids,cs,kvs) = mkAttr . toStringAttr $ attr' cs' = maybe cs (:cs) . lookup open $ elemToClass return . B.codeWith (ids,cs',kvs) . - unwords . lines . T.unpack . innerText $ result + T.unwords . T.lines . innerText $ result pCode :: PandocMonad m => TagParser m Inlines pCode = try $ do (TagOpen open attr') <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True) let attr = toStringAttr attr' result <- manyTill pAny (pCloses open) - return $ B.codeWith (mkAttr attr) $ unwords $ lines $ T.unpack $ - innerText result + return $ B.codeWith (mkAttr attr) $ T.unwords $ T.lines $ innerText result pSpan :: PandocMonad m => TagParser m Inlines pSpan = try $ do @@ -817,7 +813,7 @@ pSpan = try $ do where styleAttr = fromMaybe "" $ lookup "style" attr fontVariant = fromMaybe "" $ pickStyleAttrProps ["font-variant"] styleAttr classes = fromMaybe [] $ - words <$> lookup "class" attr + T.words <$> lookup "class" attr let tag = if isSmallCaps then B.smallcaps else B.spanWith (mkAttr attr) return $ tag contents @@ -829,18 +825,17 @@ pRawHtmlInline = do then pSatisfy (not . isBlockTag) else pSatisfy isInlineTag exts <- getOption readerExtensions - let raw = T.unpack $ renderTags' [result] + let raw = renderTags' [result] if extensionEnabled Ext_raw_html exts then return $ B.rawInline "html" raw else ignore raw -mathMLToTeXMath :: String -> Either String String +mathMLToTeXMath :: Text -> Either Text Text mathMLToTeXMath s = writeTeX <$> readMathML s -toStringAttr :: [(Text, Text)] -> [(String, String)] +toStringAttr :: [(Text, Text)] -> [(Text, Text)] toStringAttr = map go - where go (x,y) = (T.unpack (fromMaybe x $ T.stripPrefix "data-" x), - T.unpack y) + where go (x,y) = (fromMaybe x $ T.stripPrefix "data-" x, y) pScriptMath :: PandocMonad m => TagParser m Inlines pScriptMath = try $ do @@ -849,8 +844,7 @@ pScriptMath = try $ do Just x | "math/tex" `T.isPrefixOf` x -> return $ "display" `T.isSuffixOf` x _ -> mzero - contents <- T.unpack . innerText <$> - manyTill pAny (pSatisfy (matchTagClose "script")) + contents <- innerText <$> manyTill pAny (pSatisfy (matchTagClose "script")) return $ (if isdisplay then B.displayMath else B.math) contents pMath :: PandocMonad m => Bool -> TagParser m Inlines @@ -862,11 +856,11 @@ pMath inCase = try $ do unless inCase $ guard (maybe True (== mathMLNamespace) (lookup "xmlns" attr)) contents <- manyTill pAny (pSatisfy (matchTagClose "math")) - case mathMLToTeXMath (T.unpack $ renderTags $ + case mathMLToTeXMath (renderTags $ [open] <> contents <> [TagClose "math"]) of Left _ -> return $ B.spanWith ("",["math"],attr) $ B.text $ - T.unpack $ innerText contents - Right [] -> return mempty + innerText contents + Right "" -> return mempty Right x -> return $ case lookup "display" attr of Just "block" -> B.displayMath x _ -> B.math x @@ -925,7 +919,7 @@ pTagText = try $ do parsed <- lift $ lift $ flip runReaderT qu $ runParserT (many pTagContents) st "text" str case parsed of - Left _ -> throwError $ PandocParseError $ "Could not parse `" <> T.unpack str <> "'" + Left _ -> throwError $ PandocParseError $ "Could not parse `" <> str <> "'" Right result -> return $ mconcat result pBlank :: PandocMonad m => TagParser m () @@ -954,11 +948,11 @@ pRawTeX = do guardEnabled Ext_raw_tex inp <- getInput st <- getState - res <- lift $ runParserT (withRaw rawLaTeXInline) st "chunk" (T.unpack inp) + res <- lift $ runParserT (withRaw rawLaTeXInline) st "chunk" inp case res of Left _ -> mzero Right (contents, raw) -> do - _ <- count (length raw) anyChar + _ <- count (T.length raw) anyChar return $ B.rawInline "tex" contents pStr :: PandocMonad m => InlinesParser m Inlines @@ -966,7 +960,7 @@ pStr = do result <- many1 $ satisfy $ \c -> not (isSpace c) && not (isSpecial c) && not (isBad c) updateLastStrPos - return $ B.str result + return $ B.str $ T.pack result isSpecial :: Char -> Bool isSpecial '"' = True @@ -982,7 +976,7 @@ isSpecial '\8221' = True isSpecial _ = False pSymbol :: PandocMonad m => InlinesParser m Inlines -pSymbol = satisfy isSpecial >>= return . B.str . (:[]) +pSymbol = satisfy isSpecial >>= return . B.str . T.singleton isBad :: Char -> Bool isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML @@ -1019,7 +1013,7 @@ pBad = do '\158' -> '\382' '\159' -> '\376' _ -> '?' - return $ B.str [c'] + return $ B.str $ T.singleton c' pSpace :: PandocMonad m => InlinesParser m Inlines pSpace = many1 (satisfy isSpace) >>= \xs -> @@ -1156,8 +1150,8 @@ _ `closes` _ = False -- | Matches a stretch of HTML in balanced tags. htmlInBalanced :: Monad m - => (Tag String -> Bool) - -> ParserT String st m String + => (Tag Text -> Bool) + -> ParserT Text st m Text htmlInBalanced f = try $ do lookAhead (char '<') inp <- getInput @@ -1174,21 +1168,21 @@ htmlInBalanced f = try $ do (TagClose _ : TagPosition er ec : _) -> do let ls = er - sr let cs = ec - sc - lscontents <- unlines <$> count ls anyLine + lscontents <- T.unlines <$> count ls anyLine cscontents <- count cs anyChar closetag <- do x <- many (satisfy (/='>')) char '>' return (x <> ">") - return (lscontents <> cscontents <> closetag) + return $ lscontents <> T.pack cscontents <> T.pack closetag _ -> mzero _ -> mzero -htmlInBalanced' :: String - -> [Tag String] - -> [Tag String] +htmlInBalanced' :: Text + -> [Tag Text] + -> [Tag Text] htmlInBalanced' tagname ts = fromMaybe [] $ go 0 ts - where go :: Int -> [Tag String] -> Maybe [Tag String] + where go :: Int -> [Tag Text] -> Maybe [Tag Text] go n (t@(TagOpen tn' _):rest) | tn' == tagname = (t :) <$> go (n + 1) rest go 1 (t@(TagClose tn'):_) | tn' == tagname = @@ -1204,8 +1198,8 @@ hasTagWarning _ = False -- | Matches a tag meeting a certain condition. htmlTag :: (HasReaderOptions st, Monad m) - => (Tag String -> Bool) - -> ParserT [Char] st m (Tag String, String) + => (Tag Text -> Bool) + -> ParserT Text st m (Tag Text, Text) htmlTag f = try $ do lookAhead (char '<') startpos <- getPosition @@ -1213,7 +1207,7 @@ htmlTag f = try $ do let ts = canonicalizeTags $ parseTagsOptions parseOptions{ optTagWarning = False , optTagPosition = True } - (inp ++ " ") -- add space to ensure that + (inp <> " ") -- add space to ensure that -- we get a TagPosition after the tag (next, ln, col) <- case ts of (TagPosition{} : next : TagPosition ln col : _) @@ -1225,13 +1219,12 @@ htmlTag f = try $ do -- so we exclude . even though it's a valid character -- in XML element names let isNameChar c = isAlphaNum c || c == ':' || c == '-' || c == '_' - let isName s = case s of - [] -> False - (c:cs) -> isLetter c && all isNameChar cs - let isPI s = case s of - ('?':_) -> True -- processing instruction - _ -> False - + let isName s = case T.uncons s of + Nothing -> False + Just (c, cs) -> isLetter c && T.all isNameChar cs + let isPI s = case T.uncons s of + Just ('?', _) -> True -- processing instruction + _ -> False let endpos = if ln == 1 then setSourceColumn startpos (sourceColumn startpos + (col - 1)) @@ -1247,18 +1240,18 @@ htmlTag f = try $ do -- basic sanity check, since the parser is very forgiving -- and finds tags in stuff like x<y) guard $ isName tagname || isPI tagname - guard $ not $ null tagname + guard $ not $ T.null tagname -- <https://example.org> should NOT be a tag either. -- tagsoup will parse it as TagOpen "https:" [("example.org","")] - guard $ last tagname /= ':' + guard $ T.last tagname /= ':' char '<' rendered <- manyTill anyChar endAngle - return (next, "<" ++ rendered ++ ">") + return (next, T.pack $ "<" ++ rendered ++ ">") case next of TagComment s - | "<!--" `isPrefixOf` inp -> do + | "<!--" `T.isPrefixOf` inp -> do string "<!--" - count (length s) anyChar + count (T.length s) anyChar string "-->" stripComments <- getOption readerStripComments if stripComments @@ -1272,12 +1265,12 @@ htmlTag f = try $ do handleTag tagname _ -> mzero -mkAttr :: [(String, String)] -> Attr +mkAttr :: [(Text, Text)] -> Attr mkAttr attr = (attribsId, attribsClasses, attribsKV) where attribsId = fromMaybe "" $ lookup "id" attr - attribsClasses = words (fromMaybe "" $ lookup "class" attr) <> epubTypes + attribsClasses = T.words (fromMaybe "" $ lookup "class" attr) <> epubTypes attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr - epubTypes = words $ fromMaybe "" $ lookup "epub:type" attr + epubTypes = T.words $ fromMaybe "" $ lookup "epub:type" attr -- Strip namespace prefixes stripPrefixes :: [Tag Text] -> [Tag Text] @@ -1304,11 +1297,11 @@ isSpace _ = False -- Utilities -- | Adjusts a url according to the document's base URL. -canonicalizeUrl :: PandocMonad m => String -> TagParser m String +canonicalizeUrl :: PandocMonad m => Text -> TagParser m Text canonicalizeUrl url = do mbBaseHref <- baseHref <$> getState - return $ case (parseURIReference url, mbBaseHref) of - (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs) + return $ case (parseURIReference (T.unpack url), mbBaseHref) of + (Just rel, Just bs) -> tshow (rel `nonStrictRelativeTo` bs) _ -> url diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index 0a048b6e6..3fc2f9715 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE CPP #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Haddock Copyright : Copyright (C) 2013 David Lazar @@ -17,9 +18,10 @@ module Text.Pandoc.Readers.Haddock import Prelude import Control.Monad.Except (throwError) -import Data.List (intersperse, stripPrefix) +import Data.List (intersperse) import Data.Maybe (fromMaybe) import Data.Text (Text, unpack) +import qualified Data.Text as T import Documentation.Haddock.Parser import Documentation.Haddock.Types as H import Text.Pandoc.Builder (Blocks, Inlines) @@ -28,7 +30,7 @@ import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Error import Text.Pandoc.Options -import Text.Pandoc.Shared (crFilter, splitBy, trim) +import Text.Pandoc.Shared (crFilter, splitTextBy, trim) -- | Parse Haddock markup and return a 'Pandoc' document. @@ -51,7 +53,7 @@ docHToBlocks d' = case d' of DocEmpty -> mempty DocAppend (DocParagraph (DocHeader h)) (DocParagraph (DocAName ident)) -> - B.headerWith (ident,[],[]) (headerLevel h) + B.headerWith (T.pack ident,[],[]) (headerLevel h) (docHToInlines False $ headerTitle h) DocAppend d1 d2 -> mappend (docHToBlocks d1) (docHToBlocks d2) DocString _ -> inlineFallback @@ -73,12 +75,12 @@ docHToBlocks d' = DocDefList items -> B.definitionList (map (\(d,t) -> (docHToInlines False d, [consolidatePlains $ docHToBlocks t])) items) - DocCodeBlock (DocString s) -> B.codeBlockWith ("",[],[]) s + DocCodeBlock (DocString s) -> B.codeBlockWith ("",[],[]) $ T.pack s DocCodeBlock d -> B.para $ docHToInlines True d DocHyperlink _ -> inlineFallback DocPic _ -> inlineFallback DocAName _ -> inlineFallback - DocProperty s -> B.codeBlockWith ("",["property","haskell"],[]) (trim s) + DocProperty s -> B.codeBlockWith ("",["property","haskell"],[]) (trim $ T.pack s) DocExamples es -> mconcat $ map (\e -> makeExample ">>>" (exampleExpression e) (exampleResult e)) es DocTable H.Table{ tableHeaderRows = headerRows @@ -114,58 +116,58 @@ docHToInlines isCode d' = (docHToInlines isCode d2) DocString s | isCode -> mconcat $ intersperse B.linebreak - $ map B.code $ splitBy (=='\n') s - | otherwise -> B.text s + $ map B.code $ splitTextBy (=='\n') $ T.pack s + | otherwise -> B.text $ T.pack s DocParagraph _ -> mempty DocIdentifier ident -> case toRegular (DocIdentifier ident) of - DocIdentifier s -> B.codeWith ("",["haskell","identifier"],[]) s + DocIdentifier s -> B.codeWith ("",["haskell","identifier"],[]) $ T.pack s _ -> mempty - DocIdentifierUnchecked s -> B.codeWith ("",["haskell","identifier"],[]) s - DocModule s -> B.codeWith ("",["haskell","module"],[]) s + DocIdentifierUnchecked s -> B.codeWith ("",["haskell","identifier"],[]) $ T.pack s + DocModule s -> B.codeWith ("",["haskell","module"],[]) $ T.pack s DocWarning _ -> mempty -- TODO DocEmphasis d -> B.emph (docHToInlines isCode d) - DocMonospaced (DocString s) -> B.code s + DocMonospaced (DocString s) -> B.code $ T.pack s DocMonospaced d -> docHToInlines True d DocBold d -> B.strong (docHToInlines isCode d) - DocMathInline s -> B.math s - DocMathDisplay s -> B.displayMath s + DocMathInline s -> B.math $ T.pack s + DocMathDisplay s -> B.displayMath $ T.pack s DocHeader _ -> mempty DocUnorderedList _ -> mempty DocOrderedList _ -> mempty DocDefList _ -> mempty DocCodeBlock _ -> mempty - DocHyperlink h -> B.link (hyperlinkUrl h) (hyperlinkUrl h) - (maybe (B.text $ hyperlinkUrl h) (docHToInlines isCode) + DocHyperlink h -> B.link (T.pack $ hyperlinkUrl h) (T.pack $ hyperlinkUrl h) + (maybe (B.text $ T.pack $ hyperlinkUrl h) (docHToInlines isCode) (hyperlinkLabel h)) - DocPic p -> B.image (pictureUri p) (fromMaybe (pictureUri p) $ pictureTitle p) - (maybe mempty B.text $ pictureTitle p) - DocAName s -> B.spanWith (s,["anchor"],[]) mempty + DocPic p -> B.image (T.pack $ pictureUri p) (T.pack $ fromMaybe (pictureUri p) $ pictureTitle p) + (maybe mempty (B.text . T.pack) $ pictureTitle p) + DocAName s -> B.spanWith (T.pack s,["anchor"],[]) mempty DocProperty _ -> mempty DocExamples _ -> mempty DocTable _ -> mempty -- | Create an 'Example', stripping superfluous characters as appropriate -makeExample :: String -> String -> [String] -> Blocks +makeExample :: T.Text -> String -> [String] -> Blocks makeExample prompt expression result = B.para $ B.codeWith ("",["prompt"],[]) prompt <> B.space - <> B.codeWith ([], ["haskell","expr"], []) (trim expression) + <> B.codeWith ("", ["haskell","expr"], []) (trim $ T.pack expression) <> B.linebreak <> mconcat (intersperse B.linebreak $ map coder result') where -- 1. drop trailing whitespace from the prompt, remember the prefix - prefix = takeWhile (`elem` " \t") prompt + prefix = T.takeWhile (`elem` (" \t" :: String)) prompt -- 2. drop, if possible, the exact same sequence of whitespace -- characters from each result line -- -- 3. interpret lines that only contain the string "<BLANKLINE>" as an -- empty line - result' = map (substituteBlankLine . tryStripPrefix prefix) result + result' = map (substituteBlankLine . tryStripPrefix prefix . T.pack) result where - tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys + tryStripPrefix xs ys = fromMaybe ys $ T.stripPrefix xs ys substituteBlankLine "<BLANKLINE>" = "" substituteBlankLine line = line - coder = B.codeWith ([], ["result"], []) + coder = B.codeWith ("", ["result"], []) diff --git a/src/Text/Pandoc/Readers/Ipynb.hs b/src/Text/Pandoc/Readers/Ipynb.hs index dbca5a59f..8efc230cc 100644 --- a/src/Text/Pandoc/Readers/Ipynb.hs +++ b/src/Text/Pandoc/Readers/Ipynb.hs @@ -4,6 +4,7 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Ipynb Copyright : Copyright (C) 2019 John MacFarlane @@ -19,7 +20,6 @@ module Text.Pandoc.Readers.Ipynb ( readIpynb ) where import Prelude import Data.Char (isDigit) -import Data.List (isPrefixOf) import Data.Maybe (fromMaybe) import Data.Digest.Pure.SHA (sha1, showDigest) import Text.Pandoc.Options @@ -30,6 +30,7 @@ import Text.Pandoc.Definition import Data.Ipynb as Ipynb import Text.Pandoc.Class import Text.Pandoc.MIME (extensionFromMimeType) +import Text.Pandoc.Shared (tshow) import Text.Pandoc.UTF8 import Text.Pandoc.Walk (walk) import Text.Pandoc.Error @@ -51,15 +52,15 @@ readIpynb opts t = do Left _ -> case eitherDecode src of Right (notebook3 :: Notebook NbV3) -> notebookToPandoc opts notebook3 - Left err -> throwError $ PandocIpynbDecodingError err + Left err -> throwError $ PandocIpynbDecodingError $ T.pack err notebookToPandoc :: PandocMonad m => ReaderOptions -> Notebook a -> m Pandoc notebookToPandoc opts notebook = do let cells = notebookCells notebook let (fmt,fmtminor) = notebookFormat notebook - let m = M.insert "nbformat" (MetaString $ show fmt) $ - M.insert "nbformat_minor" (MetaString $ show fmtminor) $ + let m = M.insert "nbformat" (MetaString $ tshow fmt) $ + M.insert "nbformat_minor" (MetaString $ tshow fmtminor) $ jsonMetaToMeta (notebookMetadata notebook) let lang = case M.lookup "kernelspec" m of Just (MetaMap ks) -> @@ -72,7 +73,7 @@ notebookToPandoc opts notebook = do return $ Pandoc (Meta $ M.insert "jupyter" (MetaMap m) mempty) blocks cellToBlocks :: PandocMonad m - => ReaderOptions -> String -> Cell a -> m B.Blocks + => ReaderOptions -> Text -> Cell a -> m B.Blocks cellToBlocks opts lang c = do let Source ts = cellSource c let source = mconcat ts @@ -100,19 +101,18 @@ cellToBlocks opts lang c = do "text/markdown" -> "markdown" "text/x-rsrt" -> "rst" _ -> format - return $ B.divWith ("",["cell","raw"],kvs) $ B.rawBlock format' - $ T.unpack source + return $ B.divWith ("",["cell","raw"],kvs) $ B.rawBlock format' source Ipynb.Code{ codeOutputs = outputs, codeExecutionCount = ec } -> do outputBlocks <- mconcat <$> mapM outputToBlock outputs - let kvs' = maybe kvs (\x -> ("execution_count", show x):kvs) ec + let kvs' = maybe kvs (\x -> ("execution_count", tshow x):kvs) ec return $ B.divWith ("",["cell","code"],kvs') $ - B.codeBlockWith ("",[lang],[]) (T.unpack source) + B.codeBlockWith ("",[lang],[]) source <> outputBlocks -- Remove attachment: prefix from images... fixImage :: Inline -> Inline fixImage (Image attr lab (src,tit)) - | "attachment:" `isPrefixOf` src = Image attr lab (drop 11 src, tit) + | "attachment:" `T.isPrefixOf` src = Image attr lab (T.drop 11 src, tit) fixImage x = x addAttachment :: PandocMonad m => (Text, MimeBundle) -> m () @@ -120,19 +120,19 @@ addAttachment (fname, mimeBundle) = do let fp = T.unpack fname case M.toList (unMimeBundle mimeBundle) of (mimeType, BinaryData bs):_ -> - insertMedia fp (Just $ T.unpack mimeType) (BL.fromStrict bs) + insertMedia fp (Just mimeType) (BL.fromStrict bs) (mimeType, TextualData t):_ -> - insertMedia fp (Just $ T.unpack mimeType) + insertMedia fp (Just mimeType) (BL.fromStrict $ TE.encodeUtf8 t) (mimeType, JsonData v):_ -> - insertMedia fp (Just $ T.unpack mimeType) (encode v) - [] -> report $ CouldNotFetchResource fp "no attachment" + insertMedia fp (Just mimeType) (encode v) + [] -> report $ CouldNotFetchResource fname "no attachment" outputToBlock :: PandocMonad m => Output a -> m B.Blocks outputToBlock Stream{ streamName = sName, streamText = Source text } = do - return $ B.divWith ("",["output","stream",T.unpack sName],[]) - $ B.codeBlock $ T.unpack . mconcat $ text + return $ B.divWith ("",["output","stream",sName],[]) + $ B.codeBlock $ T.concat $ text outputToBlock DisplayData{ displayData = data', displayMetadata = metadata' } = B.divWith ("",["output", "display_data"],[]) <$> @@ -140,15 +140,15 @@ outputToBlock DisplayData{ displayData = data', outputToBlock ExecuteResult{ executeCount = ec, executeData = data', executeMetadata = metadata' } = - B.divWith ("",["output", "execute_result"],[("execution_count",show ec)]) + B.divWith ("",["output", "execute_result"],[("execution_count",tshow ec)]) <$> handleData metadata' data' outputToBlock Err{ errName = ename, errValue = evalue, errTraceback = traceback } = do return $ B.divWith ("",["output","error"], - [("ename",T.unpack ename), - ("evalue",T.unpack evalue)]) - $ B.codeBlock $ T.unpack . T.unlines $ traceback + [("ename",ename), + ("evalue",evalue)]) + $ B.codeBlock $ T.unlines $ traceback -- We want to display the richest output possible given -- the output format. @@ -174,54 +174,53 @@ handleData metadata (MimeBundle mb) = let metaPairs = jsonMetaToPairs meta let bl = BL.fromStrict bs -- SHA1 hash for filename - let mt' = T.unpack mt - let fname = showDigest (sha1 bl) ++ - case extensionFromMimeType mt' of + let fname = T.pack (showDigest (sha1 bl)) <> + case extensionFromMimeType mt of Nothing -> "" - Just ext -> '.':ext - insertMedia fname (Just mt') bl + Just ext -> "." <> ext + insertMedia (T.unpack fname) (Just mt) bl return $ B.para $ B.imageWith ("",[],metaPairs) fname "" mempty | otherwise = return mempty dataBlock ("text/html", TextualData t) - = return $ B.rawBlock "html" $ T.unpack t + = return $ B.rawBlock "html" $ t dataBlock ("text/latex", TextualData t) - = return $ B.rawBlock "latex" $ T.unpack t + = return $ B.rawBlock "latex" $ t dataBlock ("text/plain", TextualData t) = - return $ B.codeBlock $ T.unpack t + return $ B.codeBlock $ t dataBlock (_, JsonData v) = - return $ B.codeBlockWith ("",["json"],[]) $ toStringLazy $ encode v + return $ B.codeBlockWith ("",["json"],[]) $ T.pack $ toStringLazy $ encode v dataBlock _ = return mempty -jsonMetaToMeta :: JSONMeta -> M.Map String MetaValue -jsonMetaToMeta = M.mapKeys T.unpack . M.map valueToMetaValue +jsonMetaToMeta :: JSONMeta -> M.Map Text MetaValue +jsonMetaToMeta = M.map valueToMetaValue where valueToMetaValue :: Value -> MetaValue valueToMetaValue x@(Object{}) = case fromJSON x of - Error s -> MetaString s + Error s -> MetaString $ T.pack s Success jm' -> MetaMap $ jsonMetaToMeta jm' valueToMetaValue x@(Array{}) = case fromJSON x of - Error s -> MetaString s + Error s -> MetaString $ T.pack s Success xs -> MetaList $ map valueToMetaValue xs valueToMetaValue (Bool b) = MetaBool b - valueToMetaValue (String t) = MetaString (T.unpack t) + valueToMetaValue (String t) = MetaString t valueToMetaValue (Number n) - | Scientific.isInteger n = MetaString (show (floor n :: Integer)) - | otherwise = MetaString (show n) + | Scientific.isInteger n = MetaString (tshow (floor n :: Integer)) + | otherwise = MetaString (tshow n) valueToMetaValue Aeson.Null = MetaString "" -jsonMetaToPairs :: JSONMeta -> [(String, String)] -jsonMetaToPairs = M.toList . M.mapKeys T.unpack . M.map +jsonMetaToPairs :: JSONMeta -> [(Text, Text)] +jsonMetaToPairs = M.toList . M.map (\case String t | not (T.all isDigit t) , t /= "true" , t /= "false" - -> T.unpack t - x -> UTF8.toStringLazy $ Aeson.encode x) + -> t + x -> T.pack $ UTF8.toStringLazy $ Aeson.encode x) diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs index e074599eb..320b9c1dd 100644 --- a/src/Text/Pandoc/Readers/JATS.hs +++ b/src/Text/Pandoc/Readers/JATS.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.JATS Copyright : Copyright (C) 2017-2019 Hamish Mackenzie @@ -76,13 +77,13 @@ convertEntity :: String -> String convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e) -- convenience function to get an attribute value, defaulting to "" -attrValue :: String -> Element -> String +attrValue :: String -> Element -> Text attrValue attr = fromMaybe "" . maybeAttrValue attr -maybeAttrValue :: String -> Element -> Maybe String +maybeAttrValue :: String -> Element -> Maybe Text maybeAttrValue attr elt = - lookupAttrBy (\x -> qName x == attr) (elAttribs elt) + T.pack <$> lookupAttrBy (\x -> qName x == attr) (elAttribs elt) -- convenience function named :: String -> Element -> Bool @@ -90,7 +91,7 @@ named s e = qName (elName e) == s -- -addMeta :: PandocMonad m => ToMetaValue a => String -> a -> JATS m () +addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> JATS m () addMeta field val = modify (setMeta field val) instance HasMeta JATSState where @@ -126,15 +127,13 @@ isBlockElement (Elem e) = qName (elName e) `S.member` blocktags isBlockElement _ = False -- Trim leading and trailing newline characters -trimNl :: String -> String -trimNl = reverse . go . reverse . go - where go ('\n':xs) = xs - go xs = xs +trimNl :: Text -> Text +trimNl = T.dropAround (== '\n') -- function that is used by both graphic (in parseBlock) -- and inline-graphic (in parseInline) getGraphic :: PandocMonad m - => Maybe (Inlines, String) -> Element -> JATS m Inlines + => Maybe (Inlines, Text) -> Element -> JATS m Inlines getGraphic mbfigdata e = do let atVal a = attrValue a e (ident, title, caption) = @@ -142,7 +141,7 @@ getGraphic mbfigdata e = do Just (capt, i) -> (i, "fig:" <> atVal "title", capt) Nothing -> (atVal "id", atVal "title", text (atVal "alt-text")) - attr = (ident, words $ atVal "role", []) + attr = (ident, T.words $ atVal "role", []) imageUrl = atVal "href" return $ imageWith attr imageUrl title caption @@ -155,8 +154,8 @@ parseBlock :: PandocMonad m => Content -> JATS m Blocks parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE parseBlock (Text (CData _ s _)) = if all isSpace s then return mempty - else return $ plain $ trimInlines $ text s -parseBlock (CRef x) = return $ plain $ str $ map toUpper x + else return $ plain $ trimInlines $ text $ T.pack s +parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x parseBlock (Elem e) = case qName (elName e) of "p" -> parseMixed para (elContent e) @@ -167,7 +166,7 @@ parseBlock (Elem e) = "bullet" -> bulletList <$> listitems listType -> do let start = fromMaybe 1 $ - (strContent <$> (filterElement (named "list-item") e + (textContent <$> (filterElement (named "list-item") e >>= filterElement (named "label"))) >>= safeRead orderedListWith (start, parseListStyleType listType, DefaultDelim) @@ -204,7 +203,7 @@ parseBlock (Elem e) = "" -> [] x -> [x] return $ codeBlockWith (attrValue "id" e, classes', []) - $ trimNl $ strContentRecursive e + $ trimNl $ textContentRecursive e parseBlockquote = do attrib <- case filterChild (named "attribution") e of Nothing -> return mempty @@ -267,9 +266,9 @@ parseBlock (Elem e) = Just "right" -> AlignRight Just "center" -> AlignCenter _ -> AlignDefault - let toWidth c = case findAttr (unqual "colwidth") c of + let toWidth c = case findAttrText (unqual "colwidth") c of Just w -> fromMaybe 0 - $ safeRead $ '0': filter (\x -> + $ safeRead $ "0" <> T.filter (\x -> isDigit x || x == '.') w Nothing -> 0 :: Double let numrows = foldl' max 0 $ map length bodyrows @@ -363,7 +362,7 @@ parseRefList e = do return mempty parseRef :: PandocMonad m - => Element -> JATS m (Map.Map String MetaValue) + => Element -> JATS m (Map.Map Text MetaValue) parseRef e = do let refId = text $ attrValue "id" e let getInlineText n = maybe (return mempty) getInlines . filterChild (named n) @@ -396,7 +395,7 @@ parseRef e = do family <- maybe (return mempty) getInlines $ filterChild (named "surname") nm return $ toMetaValue $ Map.fromList [ - ("given", given) + ("given" :: Text, given) , ("family", family) ] personGroups <- mapM (\pg -> @@ -406,7 +405,7 @@ parseRef e = do toMetaValue names)) personGroups' return $ Map.fromList $ - [ ("id", toMetaValue refId) + [ ("id" :: Text, toMetaValue refId) , ("type", toMetaValue refType) , ("title", toMetaValue refTitle) , ("container-title", toMetaValue refContainerTitle) @@ -415,7 +414,7 @@ parseRef e = do , ("title", toMetaValue refTitle) , ("issued", toMetaValue $ Map.fromList [ - ("year", refYear) + ("year" :: Text, refYear) ]) , ("volume", toMetaValue refVolume) , ("page", toMetaValue refPages) @@ -424,6 +423,15 @@ parseRef e = do Nothing -> return $ Map.insert "id" (toMetaValue refId) mempty -- TODO handle mixed-citation +findAttrText :: QName -> Element -> Maybe Text +findAttrText x = fmap T.pack . findAttr x + +textContent :: Element -> Text +textContent = T.pack . strContent + +textContentRecursive :: Element -> Text +textContentRecursive = T.pack . strContentRecursive + strContentRecursive :: Element -> String strContentRecursive = strContent . (\e' -> e'{ elContent = map elementToStr $ elContent e' }) @@ -433,9 +441,9 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing elementToStr x = x parseInline :: PandocMonad m => Content -> JATS m Inlines -parseInline (Text (CData _ s _)) = return $ text s +parseInline (Text (CData _ s _)) = return $ text $ T.pack s parseInline (CRef ref) = - return $ maybe (text $ map toUpper ref) text $ lookupEntity ref + return $ maybe (text $ T.toUpper $ T.pack ref) text $ T.pack <$> lookupEntity ref parseInline (Elem e) = case qName (elName e) of "italic" -> emph <$> innerInlines @@ -464,7 +472,7 @@ parseInline (Elem e) = "xref" -> do ils <- innerInlines let rid = attrValue "rid" e - let rids = words rid + let rids = T.words rid let refType = ("ref-type",) <$> maybeAttrValue "ref-type" e let attr = (attrValue "id" e, [], maybeToList refType) return $ if refType == Just ("ref-type","bibr") @@ -477,13 +485,13 @@ parseInline (Elem e) = , citationNoteNum = 0 , citationHash = 0}) rids) ils - else linkWith attr ('#' : rid) "" ils + else linkWith attr ("#" <> rid) "" ils "ext-link" -> do ils <- innerInlines - let title = fromMaybe "" $ findAttr (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e + let title = fromMaybe "" $ findAttrText (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of - Just h -> h - _ -> '#' : attrValue "rid" e + Just h -> T.pack h + _ -> "#" <> attrValue "rid" e let ils' = if ils == mempty then str href else ils let attr = (attrValue "id" e, [], []) return $ linkWith attr href title ils' @@ -491,23 +499,23 @@ parseInline (Elem e) = "disp-formula" -> formula displayMath "inline-formula" -> formula math "math" | qPrefix (elName e) == Just "mml" -> return . math $ mathML e - "tex-math" -> return . math $ strContent e + "tex-math" -> return . math $ textContent e - "email" -> return $ link ("mailto:" ++ strContent e) "" - $ str $ strContent e - "uri" -> return $ link (strContent e) "" $ str $ strContent e + "email" -> return $ link ("mailto:" <> textContent e) "" + $ str $ textContent e + "uri" -> return $ link (textContent e) "" $ str $ textContent e "fn" -> (note . mconcat) <$> mapM parseBlock (elContent e) _ -> innerInlines where innerInlines = (trimInlines . mconcat) <$> mapM parseInline (elContent e) mathML x = - case readMathML . showElement $ everywhere (mkT removePrefix) x of + case readMathML . T.pack . showElement $ everywhere (mkT removePrefix) x of Left _ -> mempty Right m -> writeTeX m formula constructor = do let whereToLook = fromMaybe e $ filterElement (named "alternatives") e - texMaths = map strContent $ + texMaths = map textContent $ filterChildren (named "tex-math") whereToLook mathMLs = map mathML $ filterChildren isMathML whereToLook @@ -520,4 +528,4 @@ parseInline (Elem e) = let classes' = case attrValue "language" e of "" -> [] l -> [l] - return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e + return $ codeWith (attrValue "id" e,classes',[]) $ textContentRecursive e diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index be19964a4..5c9a3e69c 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1,9 +1,10 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.LaTeX Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -29,9 +30,9 @@ import Prelude import Control.Applicative (many, optional, (<|>)) import Control.Monad import Control.Monad.Except (throwError) -import Data.Char (isDigit, isLetter, toLower, toUpper, chr) +import Data.Char (isDigit, isLetter, toUpper, chr) import Data.Default -import Data.List (intercalate, isPrefixOf) +import Data.List (intercalate) import qualified Data.Map as M import Data.Maybe (fromMaybe, maybeToList) import qualified Data.Set as Set @@ -44,7 +45,7 @@ import Text.Pandoc.Builder import Text.Pandoc.Class (PandocMonad, PandocPure, getResourcePath, lookupEnv, readFileFromDirs, report, setResourcePath, setTranslations, translateTerm, trace, fileExists) -import Text.Pandoc.Error (PandocError ( PandocParseError, PandocParsecError)) +import Text.Pandoc.Error (PandocError (PandocParseError, PandocParsecError)) import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension) import Text.Pandoc.ImageSize (numUnit, showFl) import Text.Pandoc.Logging @@ -77,7 +78,7 @@ readLaTeX opts ltx = do (tokenize "source" (crFilter ltx)) case parsed of Right result -> return result - Left e -> throwError $ PandocParsecError (T.unpack ltx) e + Left e -> throwError $ PandocParsecError ltx e parseLaTeX :: PandocMonad m => LP m Pandoc parseLaTeX = do @@ -99,13 +100,13 @@ parseLaTeX = do walk (resolveRefs (sLabels st)) doc' return $ Pandoc meta bs' -resolveRefs :: M.Map String [Inline] -> Inline -> Inline +resolveRefs :: M.Map Text [Inline] -> Inline -> Inline resolveRefs labels x@(Link (ident,classes,kvs) _ _) = case (lookup "reference-type" kvs, lookup "reference" kvs) of (Just "ref", Just lab) -> case M.lookup lab labels of - Just txt -> Link (ident,classes,kvs) txt ('#':lab, "") + Just txt -> Link (ident,classes,kvs) txt ("#" <> lab, "") Nothing -> x _ -> x resolveRefs _ x = x @@ -123,11 +124,11 @@ resolveRefs _ x = x rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => ParserT String s m String + => ParserT Text s m Text rawLaTeXBlock = do lookAhead (try (char '\\' >> letter)) inp <- getInput - let toks = tokenize "source" $ T.pack inp + let toks = tokenize "source" inp snd <$> (rawLaTeXParser toks False (macroDef (const mempty)) blocks <|> (rawLaTeXParser toks True (do choice (map controlSeq @@ -151,14 +152,14 @@ beginOrEndCommand = try $ do (inlineEnvironments :: M.Map Text (LP PandocPure Inlines)) then mzero else return $ rawBlock "latex" - (T.unpack (txt <> untokenize rawargs)) + (txt <> untokenize rawargs) rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => ParserT String s m String + => ParserT Text s m Text rawLaTeXInline = do lookAhead (try (char '\\' >> letter)) inp <- getInput - let toks = tokenize "source" $ T.pack inp + let toks = tokenize "source" inp raw <- snd <$> ( rawLaTeXParser toks True (mempty <$ (controlSeq "input" >> skipMany opt >> braced)) @@ -167,23 +168,23 @@ rawLaTeXInline = do inlines ) finalbraces <- mconcat <$> many (try (string "{}")) -- see #5439 - return $ raw <> finalbraces + return $ raw <> T.pack finalbraces -inlineCommand :: PandocMonad m => ParserT String ParserState m Inlines +inlineCommand :: PandocMonad m => ParserT Text ParserState m Inlines inlineCommand = do lookAhead (try (char '\\' >> letter)) inp <- getInput - let toks = tokenize "source" $ T.pack inp + let toks = tokenize "source" inp fst <$> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand') inlines -- inline elements: word :: PandocMonad m => LP m Inlines -word = (str . T.unpack . untoken) <$> satisfyTok isWordTok +word = (str . untoken) <$> satisfyTok isWordTok regularSymbol :: PandocMonad m => LP m Inlines -regularSymbol = (str . T.unpack . untoken) <$> satisfyTok isRegularSymbol +regularSymbol = (str . untoken) <$> satisfyTok isRegularSymbol where isRegularSymbol (Tok _ Symbol t) = not $ T.any isSpecial t isRegularSymbol _ = False isSpecial c = c `Set.member` specialChars @@ -199,14 +200,14 @@ inlineGroup = do doLHSverb :: PandocMonad m => LP m Inlines doLHSverb = - (codeWith ("",["haskell"],[]) . T.unpack . untokenize) + (codeWith ("",["haskell"],[]) . untokenize) <$> manyTill (satisfyTok (not . isNewlineTok)) (symbol '|') -mkImage :: PandocMonad m => [(String, String)] -> String -> LP m Inlines -mkImage options src = do +mkImage :: PandocMonad m => [(Text, Text)] -> Text -> LP m Inlines +mkImage options (T.unpack -> src) = do let replaceTextwidth (k,v) = case numUnit v of - Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%") + Just (num, "\\textwidth") -> (k, showFl (num * 100) <> "%") _ -> (k, v) let kvs = map replaceTextwidth $ filter (\(k,_) -> k `elem` ["width", "height"]) options @@ -223,10 +224,10 @@ mkImage options src = do then return s' else findFile s es src' <- case takeExtension src of - "" | not (null defaultExt) -> return $ addExtension src defaultExt + "" | not (T.null defaultExt) -> return $ addExtension src $ T.unpack defaultExt | otherwise -> findFile src exts _ -> return src - return $ imageWith attr src' "" alt + return $ imageWith attr (T.pack src') "" alt doxspace :: PandocMonad m => LP m Inlines doxspace = @@ -435,7 +436,7 @@ siUnitMap = M.fromList , ("zetta", str "Z") ] -lit :: String -> LP m Inlines +lit :: Text -> LP m Inlines lit = pure . str removeDoubleQuotes :: Text -> Text @@ -471,7 +472,7 @@ quoted' :: PandocMonad m -> LP m () -> LP m Inlines quoted' f starter ender = do - startchs <- (T.unpack . untokenize) <$> starter + startchs <- untokenize <$> starter smart <- extensionEnabled Ext_smart <$> getOption readerExtensions if smart then do @@ -487,7 +488,7 @@ quoted' f starter ender = do enquote :: PandocMonad m => Bool -> Maybe Text -> LP m Inlines enquote starred mblang = do skipopts - let lang = (T.unpack <$> mblang) >>= babelLangToBCP47 + let lang = mblang >>= babelLangToBCP47 let langspan = case lang of Nothing -> id Just l -> spanWith ("",[],[("lang", renderLang l)]) @@ -503,27 +504,27 @@ blockquote citations mblang = do cs <- cites NormalCitation False return $ para (cite cs mempty) else return mempty - let lang = (T.unpack <$> mblang) >>= babelLangToBCP47 + let lang = mblang >>= babelLangToBCP47 let langdiv = case lang of Nothing -> id Just l -> divWith ("",[],[("lang", renderLang l)]) bs <- grouped block return $ blockQuote . langdiv $ (bs <> citePar) -doAcronym :: PandocMonad m => String -> LP m Inlines +doAcronym :: PandocMonad m => Text -> LP m Inlines doAcronym form = do acro <- braced - return . mconcat $ [spanWith ("",[],[("acronym-label", toksToString acro), - ("acronym-form", "singular+" ++ form)]) - $ str $ toksToString acro] + return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro), + ("acronym-form", "singular+" <> form)]) + $ str $ untokenize acro] -doAcronymPlural :: PandocMonad m => String -> LP m Inlines +doAcronymPlural :: PandocMonad m => Text -> LP m Inlines doAcronymPlural form = do acro <- braced plural <- lit "s" - return . mconcat $ [spanWith ("",[],[("acronym-label", toksToString acro), - ("acronym-form", "plural+" ++ form)]) $ - mconcat [str $ toksToString acro, plural]] + return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro), + ("acronym-form", "plural+" <> form)]) $ + mconcat [str $ untokenize acro, plural]] doverb :: PandocMonad m => LP m Inlines doverb = do @@ -532,7 +533,7 @@ doverb = do Just (c, ts) | T.null ts -> return c _ -> mzero withVerbatimMode $ - (code . T.unpack . untokenize) <$> + (code . untokenize) <$> manyTill (notFollowedBy newlineTok >> verbTok marker) (symbol marker) verbTok :: PandocMonad m => Char -> LP m Tok @@ -547,7 +548,7 @@ verbTok stopchar = do : totoks (incSourceColumn pos (i + 1)) (T.drop 1 t2) ++ inp return $ Tok pos toktype t1 -listingsLanguage :: [(String, String)] -> Maybe String +listingsLanguage :: [(Text, Text)] -> Maybe Text listingsLanguage opts = case lookup "language" opts of Nothing -> Nothing @@ -562,10 +563,10 @@ dolstinline = do domintinline :: PandocMonad m => LP m Inlines domintinline = do skipopts - cls <- toksToString <$> braced + cls <- untokenize <$> braced doinlinecode [cls] -doinlinecode :: PandocMonad m => [String] -> LP m Inlines +doinlinecode :: PandocMonad m => [Text] -> LP m Inlines doinlinecode classes = do Tok _ Symbol t <- anySymbol marker <- case T.uncons t of @@ -573,14 +574,14 @@ doinlinecode classes = do _ -> mzero let stopchar = if marker == '{' then '}' else marker withVerbatimMode $ - (codeWith ("",classes,[]) . map nlToSpace . T.unpack . untokenize) <$> + (codeWith ("",classes,[]) . T.map nlToSpace . untokenize) <$> manyTill (verbTok stopchar) (symbol stopchar) nlToSpace :: Char -> Char nlToSpace '\n' = ' ' nlToSpace x = x -keyval :: PandocMonad m => LP m (String, String) +keyval :: PandocMonad m => LP m (Text, Text) keyval = try $ do Tok _ Word key <- satisfyTok isWordTok optional sp @@ -601,35 +602,34 @@ keyval = try $ do _ -> True)))))) optional (symbol ',') optional sp - return (T.unpack key, T.unpack $ T.strip val) + return (key, T.strip val) -keyvals :: PandocMonad m => LP m [(String, String)] +keyvals :: PandocMonad m => LP m [(Text, Text)] keyvals = try $ symbol '[' >> manyTill keyval (symbol ']') accent :: PandocMonad m => Char -> Maybe Char -> LP m Inlines accent combiningAccent fallBack = try $ do ils <- tok case toList ils of - (Str (x:xs) : ys) -> return $ fromList $ + (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $ -- try to normalize to the combined character: - Str (T.unpack - (Normalize.normalize Normalize.NFC - (T.pack [x, combiningAccent])) ++ xs) : ys - [Space] -> return $ str [fromMaybe combiningAccent fallBack] - [] -> return $ str [fromMaybe combiningAccent fallBack] + Str (Normalize.normalize Normalize.NFC + (T.pack [x, combiningAccent]) <> xs) : ys + [Space] -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack + [] -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack _ -> return ils -mathDisplay :: String -> Inlines +mathDisplay :: Text -> Inlines mathDisplay = displayMath . trimMath -mathInline :: String -> Inlines +mathInline :: Text -> Inlines mathInline = math . trimMath dollarsMath :: PandocMonad m => LP m Inlines dollarsMath = do symbol '$' display <- option False (True <$ symbol '$') - (do contents <- try $ T.unpack . untokenize <$> pDollarsMath 0 + (do contents <- try $ untokenize <$> pDollarsMath 0 if display then (mathDisplay contents <$ symbol '$') else return $ mathInline contents) @@ -682,10 +682,10 @@ simpleCiteArgs = try $ do } return $ addPrefix pre $ addSuffix suf $ map conv keys -citationLabel :: PandocMonad m => LP m String +citationLabel :: PandocMonad m => LP m Text citationLabel = do optional spaces - toksToString <$> + untokenize <$> (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar) <* optional spaces <* optional (symbol ',') @@ -729,10 +729,10 @@ cites mode multi = try $ do addMprenote _ _ = [] addMpostnote = addSuffix . mpostnote -citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines +citation :: PandocMonad m => Text -> CitationMode -> Bool -> LP m Inlines citation name mode multi = do (c,raw) <- withRaw $ cites mode multi - return $ cite c (rawInline "latex" $ "\\" ++ name ++ toksToString raw) + return $ cite c (rawInline "latex" $ "\\" <> name <> untokenize raw) handleCitationPart :: Inlines -> [Citation] handleCitationPart ils = @@ -756,7 +756,7 @@ complexNatbibCitation mode = try $ do case cs of [] -> mzero (c:cits) -> return $ cite (c{ citationMode = mode }:cits) - (rawInline "latex" $ "\\citetext" ++ toksToString raw) + (rawInline "latex" $ "\\citetext" <> untokenize raw) inNote :: Inlines -> Inlines inNote ils = @@ -780,10 +780,10 @@ tok :: PandocMonad m => LP m Inlines tok = try $ spaces >> grouped inline <|> inlineCommand' <|> singleChar' where singleChar' = do Tok _ _ t <- singleChar - return (str (T.unpack t)) + return $ str t opt :: PandocMonad m => LP m Inlines -opt = bracketed inline <|> (str . T.unpack <$> rawopt) +opt = bracketed inline <|> (str <$> rawopt) paropt :: PandocMonad m => LP m Inlines paropt = parenWrapped inline @@ -822,26 +822,31 @@ overlayTok = inBrackets :: Inlines -> Inlines inBrackets x = str "[" <> x <> str "]" -unescapeURL :: String -> String -unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs - where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String) -unescapeURL (x:xs) = x:unescapeURL xs -unescapeURL [] = "" +unescapeURL :: Text -> Text +unescapeURL = T.concat . go . T.splitOn "\\" + where + isEscapable c = c `elemText` "#$%&~_^\\{}" + go (x:xs) = x : map unescapeInterior xs + go [] = [] + unescapeInterior t + | Just (c, _) <- T.uncons t + , isEscapable c = t + | otherwise = "\\" <> t mathEnvWith :: PandocMonad m => (Inlines -> a) -> Maybe Text -> Text -> LP m a mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name where inner x = case innerEnv of Nothing -> x - Just y -> "\\begin{" ++ T.unpack y ++ "}\n" ++ x ++ - "\\end{" ++ T.unpack y ++ "}" + Just y -> "\\begin{" <> y <> "}\n" <> x <> + "\\end{" <> y <> "}" -mathEnv :: PandocMonad m => Text -> LP m String +mathEnv :: PandocMonad m => Text -> LP m Text mathEnv name = do skipopts optional blankline res <- manyTill anyTok (end_ name) - return $ stripTrailingNewlines $ T.unpack $ untokenize res + return $ stripTrailingNewlines $ untokenize res inlineEnvironment :: PandocMonad m => LP m Inlines inlineEnvironment = try $ do @@ -914,9 +919,9 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList , ("mbox", rawInlineOr "mbox" $ processHBox <$> tok) , ("hbox", rawInlineOr "hbox" $ processHBox <$> tok) , ("lettrine", optional opt >> extractSpaces (spanWith ("",["lettrine"],[])) <$> tok) - , ("(", mathInline . toksToString <$> manyTill anyTok (controlSeq ")")) - , ("[", mathDisplay . toksToString <$> manyTill anyTok (controlSeq "]")) - , ("ensuremath", mathInline . toksToString <$> braced) + , ("(", mathInline . untokenize <$> manyTill anyTok (controlSeq ")")) + , ("[", mathDisplay . untokenize <$> manyTill anyTok (controlSeq "]")) + , ("ensuremath", mathInline . untokenize <$> braced) , ("texorpdfstring", const <$> tok <*> tok) , ("P", lit "¶") , ("S", lit "§") @@ -1008,16 +1013,15 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList , ("lstinline", dolstinline) , ("mintinline", domintinline) , ("Verb", doverb) - , ("url", ((unescapeURL . T.unpack . untokenize) <$> bracedUrl) >>= \url -> + , ("url", ((unescapeURL . untokenize) <$> bracedUrl) >>= \url -> pure (link url "" (str url))) - , ("nolinkurl", ((unescapeURL . T.unpack . untokenize) <$> bracedUrl) >>= \url -> + , ("nolinkurl", ((unescapeURL . untokenize) <$> bracedUrl) >>= \url -> pure (code url)) - , ("href", (unescapeURL . toksToString <$> + , ("href", (unescapeURL . untokenize <$> bracedUrl <* optional sp) >>= \url -> tok >>= \lab -> pure (link url "" lab)) , ("includegraphics", do options <- option [] keyvals - src <- unescapeURL . T.unpack . - removeDoubleQuotes . untokenize <$> braced + src <- unescapeURL . removeDoubleQuotes . untokenize <$> braced mkImage options src) , ("enquote*", enquote True Nothing) , ("enquote", enquote False Nothing) @@ -1172,22 +1176,21 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList ifdim :: PandocMonad m => LP m Inlines ifdim = do contents <- manyTill anyTok (controlSeq "fi") - return $ rawInline "latex" $ T.unpack $ - "\\ifdim" <> untokenize contents <> "\\fi" + return $ rawInline "latex" $ "\\ifdim" <> untokenize contents <> "\\fi" makeUppercase :: Inlines -> Inlines -makeUppercase = fromList . walk (alterStr (map toUpper)) . toList +makeUppercase = fromList . walk (alterStr T.toUpper) . toList makeLowercase :: Inlines -> Inlines -makeLowercase = fromList . walk (alterStr (map toLower)) . toList +makeLowercase = fromList . walk (alterStr T.toLower) . toList -alterStr :: (String -> String) -> Inline -> Inline +alterStr :: (Text -> Text) -> Inline -> Inline alterStr f (Str xs) = Str (f xs) alterStr _ x = x foreignlanguage :: PandocMonad m => LP m Inlines foreignlanguage = do - babelLang <- T.unpack . untokenize <$> braced + babelLang <- untokenize <$> braced case babelLangToBCP47 babelLang of Just lang -> spanWith ("", [], [("lang", renderLang lang)]) <$> tok _ -> tok @@ -1196,24 +1199,24 @@ inlineLanguageCommands :: PandocMonad m => M.Map Text (LP m Inlines) inlineLanguageCommands = M.fromList $ mk <$> M.toList polyglossiaLangToBCP47 where mk (polyglossia, bcp47Func) = - ("text" <> T.pack polyglossia, inlineLanguage bcp47Func) + ("text" <> polyglossia, inlineLanguage bcp47Func) -inlineLanguage :: PandocMonad m => (String -> Lang) -> LP m Inlines +inlineLanguage :: PandocMonad m => (Text -> Lang) -> LP m Inlines inlineLanguage bcp47Func = do - o <- option "" $ (T.unpack . T.filter (\c -> c /= '[' && c /= ']')) + o <- option "" $ T.filter (\c -> c /= '[' && c /= ']') <$> rawopt let lang = renderLang $ bcp47Func o extractSpaces (spanWith ("", [], [("lang", lang)])) <$> tok hyperlink :: PandocMonad m => LP m Inlines hyperlink = try $ do - src <- toksToString <$> braced + src <- untokenize <$> braced lab <- tok - return $ link ('#':src) "" lab + return $ link ("#" <> src) "" lab hypertargetBlock :: PandocMonad m => LP m Blocks hypertargetBlock = try $ do - ref <- toksToString <$> braced + ref <- untokenize <$> braced bs <- grouped block case toList bs of [Header 1 (ident,_,_) _] | ident == ref -> return bs @@ -1221,7 +1224,7 @@ hypertargetBlock = try $ do hypertargetInline :: PandocMonad m => LP m Inlines hypertargetInline = try $ do - ref <- toksToString <$> braced + ref <- untokenize <$> braced ils <- grouped inline return $ spanWith (ref, [], []) ils @@ -1231,7 +1234,7 @@ romanNumeralUpper = romanNumeralLower :: (PandocMonad m) => LP m Inlines romanNumeralLower = - str . map toLower . toRomanNumeral <$> romanNumeralArg + str . T.toLower . toRomanNumeral <$> romanNumeralArg romanNumeralArg :: (PandocMonad m) => LP m Int romanNumeralArg = spaces *> (parser <|> inBraces) @@ -1248,18 +1251,18 @@ romanNumeralArg = spaces *> (parser <|> inBraces) let (digits, rest) = T.span isDigit s unless (T.null rest) $ Prelude.fail "Non-digits in argument to \\Rn or \\RN" - safeRead $ T.unpack digits + safeRead digits newToggle :: (Monoid a, PandocMonad m) => [Tok] -> LP m a newToggle name = do updateState $ \st -> - st{ sToggles = M.insert (toksToString name) False (sToggles st) } + st{ sToggles = M.insert (untokenize name) False (sToggles st) } return mempty setToggle :: (Monoid a, PandocMonad m) => Bool -> [Tok] -> LP m a setToggle on name = do updateState $ \st -> - st{ sToggles = M.adjust (const on) (toksToString name) (sToggles st) } + st{ sToggles = M.adjust (const on) (untokenize name) (sToggles st) } return mempty ifToggle :: PandocMonad m => LP m () @@ -1271,7 +1274,7 @@ ifToggle = do no <- braced toggles <- sToggles <$> getState inp <- getInput - let name' = toksToString name + let name' = untokenize name case M.lookup name' toggles of Just True -> setInput (yes ++ inp) Just False -> setInput (no ++ inp) @@ -1294,11 +1297,11 @@ ifstrequal = do else getInput >>= setInput . (ifnotequal ++) return mempty -coloredInline :: PandocMonad m => String -> LP m Inlines +coloredInline :: PandocMonad m => Text -> LP m Inlines coloredInline stylename = do skipopts color <- braced - spanWith ("",[],[("style",stylename ++ ": " ++ toksToString color)]) <$> tok + spanWith ("",[],[("style",stylename <> ": " <> untokenize color)]) <$> tok ttfamily :: PandocMonad m => LP m Inlines ttfamily = (code . stringify . toList) <$> tok @@ -1313,12 +1316,12 @@ rawInlineOr name' fallback = do processHBox :: Inlines -> Inlines processHBox = walk convert where - convert Space = Str [chr 160] -- non-breakable space - convert SoftBreak = Str [chr 160] -- non-breakable space + convert Space = Str $ T.singleton $ chr 160 -- non-breakable space + convert SoftBreak = Str $ T.singleton $ chr 160 -- non-breakable space convert LineBreak = Str "" convert x = x -getRawCommand :: PandocMonad m => Text -> Text -> LP m String +getRawCommand :: PandocMonad m => Text -> Text -> LP m Text getRawCommand name txt = do (_, rawargs) <- withRaw $ case name of @@ -1336,7 +1339,7 @@ getRawCommand name txt = do skipopts option "" (try dimenarg) void $ many braced - return $ T.unpack (txt <> untokenize rawargs) + return $ txt <> untokenize rawargs isFontSizeCommand :: Text -> Bool isFontSizeCommand "tiny" = True @@ -1396,17 +1399,17 @@ treatAsInline = Set.fromList dolabel :: PandocMonad m => LP m Inlines dolabel = do v <- braced - let refstr = toksToString v + let refstr = untokenize v return $ spanWith (refstr,[],[("label", refstr)]) - $ inBrackets $ str $ toksToString v + $ inBrackets $ str $ untokenize v -doref :: PandocMonad m => String -> LP m Inlines +doref :: PandocMonad m => Text -> LP m Inlines doref cls = do v <- braced - let refstr = toksToString v + let refstr = untokenize v return $ linkWith ("",[],[ ("reference-type", cls) , ("reference", refstr)]) - ('#':refstr) + ("#" <> refstr) "" (inBrackets $ str refstr) @@ -1435,11 +1438,11 @@ inline = (mempty <$ comment) <|> (str "\160" <$ symbol '~') <|> dollarsMath <|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb) - <|> (str . (:[]) <$> primEscape) + <|> (str . T.singleton <$> primEscape) <|> regularSymbol <|> (do res <- symbolIn "#^'`\"[]&" pos <- getPosition - let s = T.unpack (untoken res) + let s = untoken res report $ ParsingUnescaped s pos return $ str s) @@ -1498,7 +1501,7 @@ include name = do -- note, we can have cc_by_4.0 for example... _ | name == "usepackage" -> addExtension f ".sty" | otherwise -> addExtension f ".tex" - dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" + dirs <- (map T.unpack . splitTextBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" mapM_ (insertIncluded dirs) (map addExt fs) return mempty @@ -1509,19 +1512,19 @@ insertIncluded :: PandocMonad m insertIncluded dirs f = do pos <- getPosition containers <- getIncludeFiles <$> getState - when (f `elem` containers) $ - throwError $ PandocParseError $ "Include file loop at " ++ show pos - updateState $ addIncludeFile f + when (T.pack f `elem` containers) $ + throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show pos + updateState $ addIncludeFile $ T.pack f mbcontents <- readFileFromDirs dirs f contents <- case mbcontents of Just s -> return s Nothing -> do - report $ CouldNotLoadIncludeFile f pos + report $ CouldNotLoadIncludeFile (T.pack f) pos return "" - getInput >>= setInput . (tokenize f (T.pack contents) ++) + getInput >>= setInput . (tokenize f contents ++) updateState dropLatestIncludeFile -addMeta :: PandocMonad m => ToMetaValue a => String -> a -> LP m () +addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> LP m () addMeta field val = updateState $ \st -> st{ sMeta = addMetaField field val $ sMeta st } @@ -1536,10 +1539,10 @@ authors = try $ do egroup addMeta "author" (map trimInlines auths) -macroDef :: (PandocMonad m, Monoid a) => (String -> a) -> LP m a +macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a macroDef constructor = do (_, s) <- withRaw (commandDef <|> environmentDef) - (constructor (T.unpack $ untokenize s) <$ + (constructor (untokenize s) <$ guardDisabled Ext_latex_macros) <|> return mempty where commandDef = do @@ -1632,7 +1635,7 @@ newcommand = do case M.lookup name macros of Just macro | mtype == "newcommand" -> do - report $ MacroAlreadyDefined (T.unpack txt) pos + report $ MacroAlreadyDefined txt pos return (name, macro) | mtype == "providecommand" -> return (name, macro) _ -> return (name, Macro ExpandWhenUsed argspecs optarg contents) @@ -1658,7 +1661,7 @@ newenvironment = do case M.lookup name macros of Just _ | mtype == "newenvironment" -> do - report $ MacroAlreadyDefined (T.unpack name) pos + report $ MacroAlreadyDefined name pos return Nothing | mtype == "provideenvironment" -> do return Nothing @@ -1669,7 +1672,7 @@ newenvironment = do bracketedNum :: PandocMonad m => LP m Int bracketedNum = do ds <- untokenize <$> bracketedToks - case safeRead (T.unpack ds) of + case safeRead ds of Just i -> return i _ -> return 0 @@ -1709,7 +1712,7 @@ section (ident, classes, kvs) lvl = do contents <- grouped inline lab <- option ident $ try (spaces >> controlSeq "label" - >> spaces >> toksToString <$> braced) + >> spaces >> untokenize <$> braced) when (lvl == 0) $ updateState $ \st -> st{ sHasChapters = True } unless ("unnumbered" `elem` classes) $ do @@ -1836,9 +1839,9 @@ blockCommands = M.fromList , ("centerline", (para . trimInlines) <$> (skipopts *> tok)) , ("caption", skipopts *> setCaption) , ("bibliography", mempty <$ (skipopts *> braced >>= - addMeta "bibliography" . splitBibs . toksToString)) + addMeta "bibliography" . splitBibs . untokenize)) , ("addbibresource", mempty <$ (skipopts *> braced >>= - addMeta "bibliography" . splitBibs . toksToString)) + addMeta "bibliography" . splitBibs . untokenize)) , ("endinput", mempty <$ skipMany tok) -- includes , ("lstinputlisting", inputListing) @@ -1941,18 +1944,18 @@ rawEnv name = do (bs, raw) <- withRaw $ env name blocks if parseRaw then return $ rawBlock "latex" - $ T.unpack $ beginCommand <> untokenize raw + $ beginCommand <> untokenize raw else do - report $ SkippedContent (T.unpack beginCommand) pos1 + report $ SkippedContent beginCommand pos1 pos2 <- getPosition - report $ SkippedContent ("\\end{" ++ T.unpack name ++ "}") pos2 + report $ SkippedContent ("\\end{" <> name <> "}") pos2 return bs rawVerbEnv :: PandocMonad m => Text -> LP m Blocks rawVerbEnv name = do pos <- getPosition (_, raw) <- withRaw $ verbEnv name - let raw' = "\\begin{" ++ T.unpack name ++ "}" ++ toksToString raw + let raw' = "\\begin{" <> name <> "}" <> untokenize raw exts <- getOption readerExtensions let parseRaw = extensionEnabled Ext_raw_tex exts if parseRaw @@ -1961,12 +1964,11 @@ rawVerbEnv name = do report $ SkippedContent raw' pos return mempty -verbEnv :: PandocMonad m => Text -> LP m String +verbEnv :: PandocMonad m => Text -> LP m Text verbEnv name = withVerbatimMode $ do optional blankline res <- manyTill anyTok (end_ name) - return $ T.unpack - $ stripTrailingNewline + return $ stripTrailingNewline $ untokenize $ res @@ -2010,11 +2012,11 @@ minted = do mintedAttr :: PandocMonad m => LP m Attr mintedAttr = do options <- option [] keyvals - lang <- toksToString <$> braced + lang <- untokenize <$> braced let kvs = [ (if k == "firstnumber" then "startFrom" else k, v) | (k,v) <- options ] - let classes = [ lang | not (null lang) ] ++ + let classes = [ lang | not (T.null lang) ] ++ [ "numberLines" | lookup "linenos" options == Just "true" ] return ("",classes,kvs) @@ -2023,14 +2025,14 @@ inputMinted :: PandocMonad m => LP m Blocks inputMinted = do pos <- getPosition attr <- mintedAttr - f <- filter (/='"') . toksToString <$> braced - dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" - mbCode <- readFileFromDirs dirs f + f <- T.filter (/='"') . untokenize <$> braced + dirs <- (map T.unpack . splitTextBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" + mbCode <- readFileFromDirs dirs (T.unpack f) rawcode <- case mbCode of Just s -> return s Nothing -> do report $ CouldNotLoadIncludeFile f pos - return [] + return "" return $ B.codeBlockWith attr rawcode letterContents :: PandocMonad m => LP m Blocks @@ -2052,10 +2054,10 @@ figure = try $ do addImageCaption :: PandocMonad m => Blocks -> LP m Blocks addImageCaption = walkM go where go (Image attr@(_, cls, kvs) alt (src,tit)) - | not ("fig:" `isPrefixOf` tit) = do + | not ("fig:" `T.isPrefixOf` tit) = do (mbcapt, mblab) <- sCaption <$> getState let (alt', tit') = case mbcapt of - Just ils -> (toList ils, "fig:" ++ tit) + Just ils -> (toList ils, "fig:" <> tit) Nothing -> (alt, tit) attr' = case mblab of Just lab -> (lab, cls, kvs) @@ -2090,23 +2092,23 @@ addImageCaption = walkM go return $ Image attr' alt' (src, tit') go x = return x -coloredBlock :: PandocMonad m => String -> LP m Blocks +coloredBlock :: PandocMonad m => Text -> LP m Blocks coloredBlock stylename = try $ do skipopts color <- braced notFollowedBy (grouped inline) - let constructor = divWith ("",[],[("style",stylename ++ ": " ++ toksToString color)]) + let constructor = divWith ("",[],[("style",stylename <> ": " <> untokenize color)]) constructor <$> grouped block graphicsPath :: PandocMonad m => LP m Blocks graphicsPath = do - ps <- map toksToString <$> + ps <- map (T.unpack . untokenize) <$> (bgroup *> spaces *> manyTill (braced <* spaces) egroup) - getResourcePath >>= setResourcePath . (++ ps) + getResourcePath >>= setResourcePath . (<> ps) return mempty -splitBibs :: String -> [Inlines] -splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',') +splitBibs :: Text -> [Inlines] +splitBibs = map (str . T.pack . flip replaceExtension "bib" . T.unpack . trim) . splitTextBy (==',') alltt :: Blocks -> Blocks alltt = walk strToCode @@ -2115,7 +2117,7 @@ alltt = walk strToCode strToCode SoftBreak = LineBreak strToCode x = x -parseListingsOptions :: [(String, String)] -> Attr +parseListingsOptions :: [(Text, Text)] -> Attr parseListingsOptions options = let kvs = [ (if k == "firstnumber" then "startFrom" @@ -2129,23 +2131,23 @@ inputListing :: PandocMonad m => LP m Blocks inputListing = do pos <- getPosition options <- option [] keyvals - f <- filter (/='"') . toksToString <$> braced - dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" - mbCode <- readFileFromDirs dirs f + f <- T.filter (/='"') . untokenize <$> braced + dirs <- (map T.unpack . splitTextBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" + mbCode <- readFileFromDirs dirs (T.unpack f) codeLines <- case mbCode of - Just s -> return $ lines s + Just s -> return $ T.lines s Nothing -> do report $ CouldNotLoadIncludeFile f pos return [] let (ident,classes,kvs) = parseListingsOptions options let classes' = (case listingsLanguage options of - Nothing -> (take 1 (languagesByExtension (takeExtension f)) ++) + Nothing -> (take 1 (languagesByExtension (T.pack $ takeExtension $ T.unpack f)) <>) Just _ -> id) classes let firstline = fromMaybe 1 $ lookup "firstline" options >>= safeRead let lastline = fromMaybe (length codeLines) $ lookup "lastline" options >>= safeRead - let codeContents = intercalate "\n" $ take (1 + lastline - firstline) $ + let codeContents = T.intercalate "\n" $ take (1 + lastline - firstline) $ drop (firstline - 1) codeLines return $ codeBlockWith (ident,classes',kvs) codeContents @@ -2176,12 +2178,12 @@ orderedList' = try $ do spaces let markerSpec = do symbol '[' - ts <- toksToString <$> manyTill anyTok (symbol ']') + ts <- untokenize <$> manyTill anyTok (symbol ']') case runParser anyOrderedListMarker def "option" ts of Right r -> return r Left _ -> do pos <- getPosition - report $ SkippedContent ("[" ++ ts ++ "]") pos + report $ SkippedContent ("[" <> ts <> "]") pos return (1, DefaultStyle, DefaultDelim) (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) markerSpec spaces @@ -2191,17 +2193,17 @@ orderedList' = try $ do spaces start <- option 1 $ try $ do pos <- getPosition controlSeq "setcounter" - ctr <- toksToString <$> braced - guard $ "enum" `isPrefixOf` ctr - guard $ all (`elem` ['i','v']) (drop 4 ctr) + ctr <- untokenize <$> braced + guard $ "enum" `T.isPrefixOf` ctr + guard $ T.all (`elem` ['i','v']) (T.drop 4 ctr) optional sp - num <- toksToString <$> braced + num <- untokenize <$> braced case safeRead num of Just i -> return (i + 1 :: Int) Nothing -> do report $ SkippedContent - ("\\setcounter{" ++ ctr ++ - "}{" ++ num ++ "}") pos + ("\\setcounter{" <> ctr <> + "}{" <> num <> "}") pos return 1 bs <- listenv "enumerate" (many item) return $ orderedListWith (start, style, delim) bs @@ -2235,7 +2237,7 @@ splitWordTok = do inp <- getInput case inp of (Tok spos Word t : rest) -> - setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) ++ rest + setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest _ -> return () parseAligns :: PandocMonad m => LP m [(Alignment, Double, ([Tok], [Tok]))] @@ -2256,7 +2258,7 @@ parseAligns = try $ do let alignSuffix = symbol '<' >> braced let colWidth = try $ do symbol '{' - ds <- trim . toksToString <$> manyTill anyTok (controlSeq "linewidth") + ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth") spaces symbol '}' case safeRead ds of @@ -2266,7 +2268,7 @@ parseAligns = try $ do pref <- option [] alignPrefix spaces al <- alignChar - width <- colWidth <|> option 0.0 (do s <- toksToString <$> braced + width <- colWidth <|> option 0.0 (do s <- untokenize <$> braced pos <- getPosition report $ SkippedContent s pos return 0.0) @@ -2276,13 +2278,13 @@ parseAligns = try $ do let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro symbol '*' spaces - ds <- trim . toksToString <$> braced + ds <- trim . untokenize <$> braced spaces spec <- braced case safeRead ds of Just n -> getInput >>= setInput . (mconcat (replicate n spec) ++) - Nothing -> Prelude.fail $ "Could not parse " ++ ds ++ " as number" + Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number" bgroup spaces maybeBar @@ -2379,7 +2381,7 @@ block = do <|> blockCommand <|> paragraph <|> grouped block - trace (take 60 $ show $ B.toList res) + trace (T.take 60 $ tshow $ B.toList res) return res blocks :: PandocMonad m => LP m Blocks @@ -2387,9 +2389,9 @@ blocks = mconcat <$> many block setDefaultLanguage :: PandocMonad m => LP m Blocks setDefaultLanguage = do - o <- option "" $ (T.unpack . T.filter (\c -> c /= '[' && c /= ']')) + o <- option "" $ T.filter (\c -> c /= '[' && c /= ']') <$> rawopt - polylang <- toksToString <$> braced + polylang <- untokenize <$> braced case M.lookup polylang polyglossiaLangToBCP47 of Nothing -> return mempty -- TODO mzero? warning? Just langFunc -> do diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs index b21398f93..7ec432a4a 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.LaTeX.Lang Copyright : Copyright (C) 2018-2019 John MacFarlane @@ -18,11 +19,12 @@ module Text.Pandoc.Readers.LaTeX.Lang where import Prelude import qualified Data.Map as M +import qualified Data.Text as T import Text.Pandoc.BCP47 (Lang(..)) -polyglossiaLangToBCP47 :: M.Map String (String -> Lang) +polyglossiaLangToBCP47 :: M.Map T.Text (T.Text -> Lang) polyglossiaLangToBCP47 = M.fromList - [ ("arabic", \o -> case filter (/=' ') o of + [ ("arabic", \o -> case T.filter (/=' ') o of "locale=algeria" -> Lang "ar" "" "DZ" [] "locale=mashriq" -> Lang "ar" "" "SY" [] "locale=libya" -> Lang "ar" "" "LY" [] @@ -30,7 +32,7 @@ polyglossiaLangToBCP47 = M.fromList "locale=mauritania" -> Lang "ar" "" "MR" [] "locale=tunisia" -> Lang "ar" "" "TN" [] _ -> Lang "ar" "" "" []) - , ("german", \o -> case filter (/=' ') o of + , ("german", \o -> case T.filter (/=' ') o of "spelling=old" -> Lang "de" "" "DE" ["1901"] "variant=austrian,spelling=old" -> Lang "de" "" "AT" ["1901"] @@ -40,11 +42,11 @@ polyglossiaLangToBCP47 = M.fromList "variant=swiss" -> Lang "de" "" "CH" [] _ -> Lang "de" "" "" []) , ("lsorbian", \_ -> Lang "dsb" "" "" []) - , ("greek", \o -> case filter (/=' ') o of + , ("greek", \o -> case T.filter (/=' ') o of "variant=poly" -> Lang "el" "" "polyton" [] "variant=ancient" -> Lang "grc" "" "" [] _ -> Lang "el" "" "" []) - , ("english", \o -> case filter (/=' ') o of + , ("english", \o -> case T.filter (/=' ') o of "variant=australian" -> Lang "en" "" "AU" [] "variant=canadian" -> Lang "en" "" "CA" [] "variant=british" -> Lang "en" "" "GB" [] @@ -52,7 +54,7 @@ polyglossiaLangToBCP47 = M.fromList "variant=american" -> Lang "en" "" "US" [] _ -> Lang "en" "" "" []) , ("usorbian", \_ -> Lang "hsb" "" "" []) - , ("latin", \o -> case filter (/=' ') o of + , ("latin", \o -> case T.filter (/=' ') o of "variant=classic" -> Lang "la" "" "" ["x-classic"] _ -> Lang "la" "" "" []) , ("slovenian", \_ -> Lang "sl" "" "" []) @@ -133,7 +135,7 @@ polyglossiaLangToBCP47 = M.fromList , ("vietnamese", \_ -> Lang "vi" "" "" []) ] -babelLangToBCP47 :: String -> Maybe Lang +babelLangToBCP47 :: T.Text -> Maybe Lang babelLangToBCP47 s = case s of "austrian" -> Just $ Lang "de" "" "AT" ["1901"] diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index 14cb408b0..a01abda46 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -97,8 +97,8 @@ import Text.Parsec.Pos newtype DottedNum = DottedNum [Int] deriving (Show) -renderDottedNum :: DottedNum -> String -renderDottedNum (DottedNum xs) = +renderDottedNum :: DottedNum -> T.Text +renderDottedNum (DottedNum xs) = T.pack $ intercalate "." (map show xs) incrementDottedNum :: Int -> DottedNum -> DottedNum @@ -111,18 +111,18 @@ data LaTeXState = LaTeXState{ sOptions :: ReaderOptions , sMeta :: Meta , sQuoteContext :: QuoteContext , sMacros :: M.Map Text Macro - , sContainers :: [String] + , sContainers :: [Text] , sLogMessages :: [LogMessage] - , sIdentifiers :: Set.Set String + , sIdentifiers :: Set.Set Text , sVerbatimMode :: Bool - , sCaption :: (Maybe Inlines, Maybe String) + , sCaption :: (Maybe Inlines, Maybe Text) , sInListItem :: Bool , sInTableCell :: Bool , sLastHeaderNum :: DottedNum , sLastFigureNum :: DottedNum - , sLabels :: M.Map String [Inline] + , sLabels :: M.Map Text [Inline] , sHasChapters :: Bool - , sToggles :: M.Map String Bool + , sToggles :: M.Map Text Bool , sExpanded :: Bool } deriving Show @@ -202,7 +202,7 @@ withVerbatimMode parser = do rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s) => [Tok] -> Bool -> LP m a -> LP m a - -> ParserT String s m (a, String) + -> ParserT Text s m (a, Text) rawLaTeXParser toks retokenize parser valParser = do pstate <- getState let lstate = def{ sOptions = extractReaderOptions pstate } @@ -233,16 +233,16 @@ rawLaTeXParser toks retokenize parser valParser = do , not (" " `T.isSuffixOf` result) -> result <> " " _ -> result - return (val, T.unpack result') + return (val, result') applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => String -> ParserT String s m String + => Text -> ParserT Text s m Text applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|> - do let retokenize = toksToString <$> many (satisfyTok (const True)) + do let retokenize = untokenize <$> many (satisfyTok (const True)) pstate <- getState let lstate = def{ sOptions = extractReaderOptions pstate , sMacros = extractMacros pstate } - res <- runParserT retokenize lstate "math" (tokenize "math" (T.pack s)) + res <- runParserT retokenize lstate "math" (tokenize "math" s) case res of Left e -> Prelude.fail (show e) Right s' -> return s' @@ -307,7 +307,7 @@ totoks pos t = : totoks (incSourceColumn pos 2) rest' | c == '#' -> let (t1, t2) = T.span (\d -> d >= '0' && d <= '9') rest - in case safeRead (T.unpack t1) of + in case safeRead t1 of Just i -> Tok pos (Arg i) ("#" <> t1) : totoks (incSourceColumn pos (1 + T.length t1)) t2 @@ -447,7 +447,7 @@ doMacros' n inp = do handleMacros n' spos name ts = do when (n' > 20) -- detect macro expansion loops - $ throwError $ PandocMacroLoop (T.unpack name) + $ throwError $ PandocMacroLoop name macros <- sMacros <$> getState case M.lookup name macros of Nothing -> mzero @@ -588,7 +588,7 @@ primEscape = do | c >= '\64' && c <= '\127' -> return (chr (ord c - 64)) | otherwise -> return (chr (ord c + 64)) Nothing -> Prelude.fail "Empty content of Esc1" - Esc2 -> case safeRead ('0':'x':T.unpack (T.drop 2 t)) of + Esc2 -> case safeRead ("0x" <> T.drop 2 t) of Just x -> return (chr x) Nothing -> Prelude.fail $ "Could not read: " ++ T.unpack t _ -> Prelude.fail "Expected an Esc1 or Esc2 token" -- should not happen @@ -677,7 +677,7 @@ dimenarg = try $ do guard $ rest `elem` ["", "pt","pc","in","bp","cm","mm","dd","cc","sp"] return $ T.pack ['=' | ch] <> minus <> s -ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a +ignore :: (Monoid a, PandocMonad m) => Text -> ParserT s u m a ignore raw = do pos <- getPosition report $ SkippedContent raw pos diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs index ddf469222..feacb8450 100644 --- a/src/Text/Pandoc/Readers/Man.hs +++ b/src/Text/Pandoc/Readers/Man.hs @@ -1,4 +1,6 @@ -{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Man Copyright : Copyright (C) 2018-2019 Yan Pashkovsky and John MacFarlane @@ -63,7 +65,7 @@ readWithMTokens :: PandocMonad m -> [RoffToken] -- ^ input -> m (Either PandocError a) readWithMTokens parser state input = - let leftF = PandocParsecError . intercalate "\n" $ show <$> input + let leftF = PandocParsecError . T.pack . intercalate "\n" $ show <$> input in mapLeft leftF `liftM` runParserT parser state "source" input parseMan :: PandocMonad m => ManParser m Pandoc @@ -141,7 +143,7 @@ parseTable = do isHrule ([cellfmt], _) = columnType cellfmt `elem` ['_','-','='] isHrule (_, [RoffTokens ss]) = case Foldable.toList ss of - [TextLine [RoffStr [c]]] -> c `elem` ['_','-','='] + [TextLine [RoffStr (T.unpack -> [c])]] -> c `elem` ['_','-','='] _ -> False isHrule _ = False @@ -191,7 +193,7 @@ memptyLine = msatisfy isEmptyLine where isEmptyLine EmptyLine = True isEmptyLine _ = False -mmacro :: PandocMonad m => String -> ManParser m RoffToken +mmacro :: PandocMonad m => T.Text -> ManParser m RoffToken mmacro mk = msatisfy isControlLine where isControlLine (ControlLine mk' _ _) | mk == mk' = True | otherwise = False @@ -284,7 +286,7 @@ parseInline = try $ do _ -> mzero handleInlineMacro :: PandocMonad m - => String -> [Arg] -> SourcePos -> ManParser m Inlines + => T.Text -> [Arg] -> SourcePos -> ManParser m Inlines handleInlineMacro mname args _pos = do case mname of "UR" -> parseLink args @@ -339,7 +341,7 @@ bareIP = msatisfy isBareIP where isBareIP (ControlLine "IP" [] _) = True isBareIP _ = False -endmacro :: PandocMonad m => String -> ManParser m () +endmacro :: PandocMonad m => T.Text -> ManParser m () endmacro name = void (mmacro name) <|> lookAhead (void newBlockMacro) <|> lookAhead eof @@ -356,7 +358,7 @@ parseCodeBlock = try $ do toks <- (mmacro "nf" *> manyTill codeline (endmacro "fi")) <|> (mmacro "EX" *> manyTill codeline (endmacro "EE")) optional (mmacro "in") - return $ codeBlock (intercalate "\n" $ catMaybes toks) + return $ codeBlock (T.intercalate "\n" $ catMaybes toks) where @@ -366,7 +368,7 @@ parseCodeBlock = try $ do ControlLine "PP" _ _ -> return $ Just "" -- .PP sometimes used for blank line ControlLine mname args pos -> do (Just . query getText <$> handleInlineMacro mname args pos) <|> - do report $ SkippedContent ('.':mname) pos + do report $ SkippedContent ("." <> mname) pos return Nothing Tbl _ _ pos -> do report $ SkippedContent "TABLE" pos @@ -375,12 +377,12 @@ parseCodeBlock = try $ do TextLine ss | not (null ss) , all isFontToken ss -> return Nothing - | otherwise -> return $ Just $ linePartsToString ss + | otherwise -> return $ Just $ linePartsToText ss isFontToken Font{} = True isFontToken _ = False - getText :: Inline -> String + getText :: Inline -> T.Text getText (Str s) = s getText Space = " " getText (Code _ s) = s @@ -416,8 +418,8 @@ listItem mbListType = try $ do (ControlLine _ args _) <- mmacro "IP" case args of (arg1 : _) -> do - let cs = linePartsToString arg1 - let cs' = if not ('.' `elem` cs || ')' `elem` cs) then cs ++ "." else cs + let cs = linePartsToText arg1 + let cs' = if not (T.any (== '.') cs || T.any (== ')') cs) then cs <> "." else cs let lt = case Parsec.runParser anyOrderedListMarker defaultParserState "list marker" cs' of Right (start, listtype, listdelim) @@ -467,7 +469,7 @@ parseLink args = do ControlLine _ endargs _ <- mmacro "UE" let url = case args of [] -> "" - (x:_) -> linePartsToString x + (x:_) -> linePartsToText x return $ link url "" contents <> case endargs of [] -> mempty @@ -479,7 +481,7 @@ parseEmailLink args = do ControlLine _ endargs _ <- mmacro "ME" let url = case args of [] -> "" - (x:_) -> "mailto:" ++ linePartsToString x + (x:_) -> "mailto:" <> linePartsToText x return $ link url "" contents <> case endargs of [] -> mempty @@ -490,6 +492,6 @@ skipUnknownMacro = do tok <- mmacroAny case tok of ControlLine mkind _ pos -> do - report $ SkippedContent ('.':mkind) pos + report $ SkippedContent ("." <> mkind) pos return mempty _ -> Prelude.fail "the impossible happened" diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 4807baada..f8349ea99 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1,7 +1,9 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE RelaxedPolyRec #-} {-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TupleSections #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.Markdown Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -19,14 +21,15 @@ import Prelude import Control.Monad import Control.Monad.Except (throwError) import qualified Data.ByteString.Lazy as BS -import Data.Char (isAlphaNum, isPunctuation, isSpace, toLower) -import Data.List (intercalate, sortBy, transpose, elemIndex) +import Data.Char (isAlphaNum, isPunctuation, isSpace) +import Data.List (sortBy, transpose, elemIndex) import qualified Data.Map as M import Data.Maybe import Data.Ord (comparing) import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.Lazy as TL import qualified Data.YAML as YAML import qualified Data.YAML.Event as YE import System.FilePath (addExtension, takeExtension) @@ -47,7 +50,7 @@ import Text.Pandoc.Shared import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.XML (fromEntities) -type MarkdownParser m = ParserT [Char] ParserState m +type MarkdownParser m = ParserT Text ParserState m -- | Read markdown from an input string and return a Pandoc document. readMarkdown :: PandocMonad m @@ -56,7 +59,7 @@ readMarkdown :: PandocMonad m -> m Pandoc readMarkdown opts s = do parsed <- readWithM parseMarkdown def{ stateOptions = opts } - (T.unpack (crFilter s) ++ "\n\n") + (crFilter s <> "\n\n") case parsed of Right result -> return result Left e -> throwError e @@ -77,7 +80,7 @@ isHruleChar '-' = True isHruleChar '_' = True isHruleChar _ = False -setextHChars :: String +setextHChars :: [Char] setextHChars = "=-" isBlank :: Char -> Bool @@ -96,30 +99,30 @@ inList = do ctx <- stateParserContext <$> getState guard (ctx == ListItemState) -spnl :: PandocMonad m => ParserT [Char] st m () +spnl :: PandocMonad m => ParserT Text st m () spnl = try $ do skipSpaces optional newline skipSpaces notFollowedBy (char '\n') -spnl' :: PandocMonad m => ParserT [Char] st m String +spnl' :: PandocMonad m => ParserT Text st m Text spnl' = try $ do xs <- many spaceChar ys <- option "" $ try $ (:) <$> newline <*> (many spaceChar <* notFollowedBy (char '\n')) - return (xs ++ ys) + return $ T.pack $ xs ++ ys -indentSpaces :: PandocMonad m => MarkdownParser m String +indentSpaces :: PandocMonad m => MarkdownParser m Text indentSpaces = try $ do tabStop <- getOption readerTabStop - count tabStop (char ' ') <|> - string "\t" <?> "indentation" + countChar tabStop (char ' ') <|> + textStr "\t" <?> "indentation" -nonindentSpaces :: PandocMonad m => MarkdownParser m String +nonindentSpaces :: PandocMonad m => MarkdownParser m Text nonindentSpaces = do n <- skipNonindentSpaces - return $ replicate n ' ' + return $ T.replicate n " " -- returns number of spaces parsed skipNonindentSpaces :: PandocMonad m => MarkdownParser m Int @@ -139,8 +142,9 @@ inlinesInBalancedBrackets :: PandocMonad m => MarkdownParser m (F Inlines) inlinesInBalancedBrackets = try $ char '[' >> withRaw (go 1) >>= parseFromString inlines . stripBracket . snd - where stripBracket [] = [] - stripBracket xs = if last xs == ']' then init xs else xs + where stripBracket t = case T.unsnoc t of + Just (t', ']') -> t' + _ -> t go :: PandocMonad m => Int -> MarkdownParser m () go 0 = return () go openBrackets = @@ -160,7 +164,7 @@ inlinesInBalancedBrackets = -- document structure -- -rawTitleBlockLine :: PandocMonad m => MarkdownParser m String +rawTitleBlockLine :: PandocMonad m => MarkdownParser m Text rawTitleBlockLine = do char '%' skipSpaces @@ -169,7 +173,7 @@ rawTitleBlockLine = do notFollowedBy blankline skipSpaces anyLine - return $ trim $ unlines (first:rest) + return $ trim $ T.unlines (first:rest) titleLine :: PandocMonad m => MarkdownParser m (F Inlines) titleLine = try $ do @@ -222,9 +226,9 @@ yamlMetaBlock = try $ do notFollowedBy blankline -- if --- is followed by a blank it's an HRULE rawYamlLines <- manyTill anyLine stopLine -- by including --- and ..., we allow yaml blocks with just comments: - let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."])) + let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."])) optional blanklines - newMetaF <- yamlBsToMeta $ UTF8.fromStringLazy rawYaml + newMetaF <- yamlBsToMeta $ UTF8.fromTextLazy $ TL.fromStrict rawYaml -- Since `<>` is left-biased, existing values are not touched: updateState $ \st -> st{ stateMeta' = (stateMeta' st) <> newMetaF } return mempty @@ -255,7 +259,7 @@ yamlBsToMeta bstr = do return . return $ mempty Left (_pos, err') -> do logMessage $ CouldNotParseYamlMetadata - err' pos + (T.pack err') pos return . return $ mempty nodeToKey :: PandocMonad m => YAML.Node YE.Pos -> m Text @@ -270,11 +274,11 @@ toMetaValue x = -- Note: a standard quoted or unquoted YAML value will -- not end in a newline, but a "block" set off with -- `|` or `>` will. - if (T.pack "\n") `T.isSuffixOf` x - then parseFromString' (asBlocks <$> parseBlocks) (xstring <> "\n") + if "\n" `T.isSuffixOf` x + then parseFromString' (asBlocks <$> parseBlocks) (x <> "\n") else parseFromString' ((asInlines <$> try pInlines) <|> (asBlocks <$> parseBlocks)) - xstring + x where pInlines = trimInlinesF . mconcat <$> manyTill inline eof asBlocks p = do p' <- p @@ -282,7 +286,6 @@ toMetaValue x = asInlines p = do p' <- p return $ MetaInlines (B.toList p') - xstring = T.unpack x checkBoolean :: Text -> Maybe Bool checkBoolean t = @@ -298,8 +301,8 @@ yamlToMetaValue (YAML.Scalar _ x) = case x of YAML.SStr t -> toMetaValue t YAML.SBool b -> return $ return $ MetaBool b - YAML.SFloat d -> return $ return $ MetaString (show d) - YAML.SInt i -> return $ return $ MetaString (show i) + YAML.SFloat d -> return $ return $ MetaString $ tshow d + YAML.SInt i -> return $ return $ MetaString $ tshow i YAML.SUnknown _ t -> case checkBoolean t of Just b -> return $ return $ MetaBool b @@ -315,7 +318,7 @@ yamlToMetaValue _ = return $ return $ MetaString "" yamlMap :: PandocMonad m => M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos) - -> MarkdownParser m (F (M.Map String MetaValue)) + -> MarkdownParser m (F (M.Map Text MetaValue)) yamlMap o = do kvs <- forM (M.toList o) $ \(key, v) -> do k <- nodeToKey key @@ -323,12 +326,12 @@ yamlMap o = do let kvs' = filter (not . ignorable . fst) kvs (fmap M.fromList . sequence) <$> mapM toMeta kvs' where - ignorable t = (T.pack "_") `T.isSuffixOf` t + ignorable t = "_" `T.isSuffixOf` t toMeta (k, v) = do fv <- yamlToMetaValue v return $ do v' <- fv - return (T.unpack k, v') + return (k, v') stopLine :: PandocMonad m => MarkdownParser m () stopLine = try $ (string "---" <|> string "...") >> blankline >> return () @@ -343,14 +346,14 @@ mmdTitleBlock = try $ do updateState $ \st -> st{ stateMeta' = stateMeta' st <> return (Meta $ M.fromList kvPairs) } -kvPair :: PandocMonad m => Bool -> MarkdownParser m (String, MetaValue) +kvPair :: PandocMonad m => Bool -> MarkdownParser m (Text, MetaValue) kvPair allowEmpty = try $ do - key <- many1Till (alphaNum <|> oneOf "_- ") (char ':') - val <- trim <$> manyTill anyChar + key <- many1TillChar (alphaNum <|> oneOf "_- ") (char ':') + val <- trim <$> manyTillChar anyChar (try $ newline >> lookAhead (blankline <|> nonspaceChar)) - guard $ allowEmpty || not (null val) - let key' = concat $ words $ map toLower key - let val' = MetaBlocks $ B.toList $ B.plain $B.text val + guard $ allowEmpty || not (T.null val) + let key' = T.concat $ T.words $ T.toLower key + let val' = MetaBlocks $ B.toList $ B.plain $ B.text val return (key',val') parseMarkdown :: PandocMonad m => MarkdownParser m Pandoc @@ -380,13 +383,13 @@ referenceKey = try $ do (_,raw) <- reference char ':' skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[') - let sourceURL = fmap unwords $ many $ try $ do + let sourceURL = fmap T.unwords $ many $ try $ do skipMany spaceChar notFollowedBy' referenceTitle notFollowedBy' $ guardEnabled Ext_link_attributes >> attributes notFollowedBy' (() <$ reference) - many1 $ notFollowedBy space >> litChar - let betweenAngles = try $ char '<' >> manyTill litChar (char '>') + many1Char $ notFollowedBy space >> litChar + let betweenAngles = try $ char '<' >> manyTillChar litChar (char '>') src <- try betweenAngles <|> sourceURL tit <- option "" referenceTitle attr <- option nullAttr $ try $ @@ -411,20 +414,20 @@ referenceKey = try $ do updateState $ \s -> s { stateKeys = M.insert key (target, attr') oldkeys } return $ return mempty -referenceTitle :: PandocMonad m => MarkdownParser m String +referenceTitle :: PandocMonad m => MarkdownParser m Text referenceTitle = try $ do skipSpaces >> optional newline >> skipSpaces quotedTitle '"' <|> quotedTitle '\'' <|> charsInBalanced '(' ')' litChar -- A link title in quotes -quotedTitle :: PandocMonad m => Char -> MarkdownParser m String +quotedTitle :: PandocMonad m => Char -> MarkdownParser m Text quotedTitle c = try $ do char c notFollowedBy spaces let pEnder = try $ char c >> notFollowedBy (satisfy isAlphaNum) - let regChunk = many1 (noneOf ['\\','\n','&',c]) <|> count 1 litChar - let nestedChunk = (\x -> [c] ++ x ++ [c]) <$> quotedTitle c - unwords . words . concat <$> manyTill (nestedChunk <|> regChunk) pEnder + let regChunk = many1Char (noneOf ['\\','\n','&',c]) <|> countChar 1 litChar + let nestedChunk = (\x -> T.singleton c <> x <> T.singleton c) <$> quotedTitle c + T.unwords . T.words . T.concat <$> manyTill (nestedChunk <|> regChunk) pEnder -- | PHP Markdown Extra style abbreviation key. Currently -- we just skip them, since Pandoc doesn't have an element for @@ -440,21 +443,21 @@ abbrevKey = do blanklines return $ return mempty -noteMarker :: PandocMonad m => MarkdownParser m String -noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']') +noteMarker :: PandocMonad m => MarkdownParser m Text +noteMarker = string "[^" >> many1TillChar (satisfy $ not . isBlank) (char ']') -rawLine :: PandocMonad m => MarkdownParser m String +rawLine :: PandocMonad m => MarkdownParser m Text rawLine = try $ do notFollowedBy blankline notFollowedBy' $ try $ skipNonindentSpaces >> noteMarker optional indentSpaces anyLine -rawLines :: PandocMonad m => MarkdownParser m String +rawLines :: PandocMonad m => MarkdownParser m Text rawLines = do first <- anyLine rest <- many rawLine - return $ unlines (first:rest) + return $ T.unlines (first:rest) noteBlock :: PandocMonad m => MarkdownParser m (F Blocks) noteBlock = try $ do @@ -466,7 +469,7 @@ noteBlock = try $ do optional indentSpaces first <- rawLines rest <- many $ try $ blanklines >> indentSpaces >> rawLines - let raw = unlines (first:rest) ++ "\n" + let raw = T.unlines (first:rest) <> "\n" optional blanklines parsed <- parseFromString' parseBlocks raw oldnotes <- stateNotes' <$> getState @@ -510,7 +513,7 @@ block = do , para , plain ] <?> "block" - trace (take 60 $ show $ B.toList $ runF res defaultParserState) + trace (T.take 60 $ tshow $ B.toList $ runF res defaultParserState) return res -- @@ -570,7 +573,7 @@ mmdHeaderIdentifier :: PandocMonad m => MarkdownParser m Attr mmdHeaderIdentifier = do (_, raw) <- reference let raw' = trim $ stripFirstAndLast raw - let ident = concat $ words $ map toLower raw' + let ident = T.concat $ T.words $ T.toLower raw' let attr = (ident, [], []) guardDisabled Ext_implicit_header_references <|> registerImplicitHeader raw' attr @@ -600,20 +603,20 @@ setextHeader = try $ do <|> registerImplicitHeader raw attr' return $ B.headerWith attr' level <$> text -registerImplicitHeader :: PandocMonad m => String -> Attr -> MarkdownParser m () +registerImplicitHeader :: PandocMonad m => Text -> Attr -> MarkdownParser m () registerImplicitHeader raw attr@(ident, _, _) - | null raw = return () + | T.null raw = return () | otherwise = do - let key = toKey $ "[" ++ raw ++ "]" + let key = toKey $ "[" <> raw <> "]" updateState $ \s -> - s { stateHeaderKeys = M.insert key (('#':ident,""), attr) + s { stateHeaderKeys = M.insert key (("#" <> ident,""), attr) (stateHeaderKeys s) } -- -- hrule block -- -hrule :: PandocMonad m => ParserT [Char] st m (F Blocks) +hrule :: PandocMonad m => ParserT Text st m (F Blocks) hrule = try $ do skipSpaces start <- satisfy isHruleChar @@ -627,13 +630,13 @@ hrule = try $ do -- code blocks -- -indentedLine :: PandocMonad m => MarkdownParser m String +indentedLine :: PandocMonad m => MarkdownParser m Text indentedLine = indentSpaces >> anyLineNewline blockDelimiter :: PandocMonad m => (Char -> Bool) -> Maybe Int - -> ParserT [Char] ParserState m Int + -> ParserT Text ParserState m Int blockDelimiter f len = try $ do skipNonindentSpaces c <- lookAhead (satisfy f) @@ -652,11 +655,11 @@ attributes = try $ do attribute :: PandocMonad m => MarkdownParser m (Attr -> Attr) attribute = identifierAttr <|> classAttr <|> keyValAttr <|> specialAttr -identifier :: PandocMonad m => MarkdownParser m String +identifier :: PandocMonad m => MarkdownParser m Text identifier = do first <- letter rest <- many $ alphaNum <|> oneOf "-_:." - return (first:rest) + return $ T.pack (first:rest) identifierAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr) identifierAttr = try $ do @@ -674,15 +677,15 @@ keyValAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr) keyValAttr = try $ do key <- identifier char '=' - val <- enclosed (char '"') (char '"') litChar - <|> enclosed (char '\'') (char '\'') litChar + val <- T.pack <$> enclosed (char '"') (char '"') litChar + <|> T.pack <$> enclosed (char '\'') (char '\'') litChar <|> ("" <$ try (string "\"\"")) <|> ("" <$ try (string "''")) - <|> many (escapedChar' <|> noneOf " \t\n\r}") + <|> manyChar (escapedChar' <|> noneOf " \t\n\r}") return $ \(id',cs,kvs) -> case key of "id" -> (val,cs,kvs) - "class" -> (id',cs ++ words val,kvs) + "class" -> (id',cs ++ T.words val,kvs) _ -> (id',cs,kvs ++ [(key,val)]) specialAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr) @@ -690,12 +693,12 @@ specialAttr = do char '-' return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs) -rawAttribute :: PandocMonad m => MarkdownParser m String +rawAttribute :: PandocMonad m => MarkdownParser m Text rawAttribute = do char '{' skipMany spaceChar char '=' - format <- many1 $ satisfy (\c -> isAlphaNum c || c `elem` "-_") + format <- many1Char $ satisfy (\c -> isAlphaNum c || c `elem` ['-', '_']) skipMany spaceChar char '}' return format @@ -703,7 +706,7 @@ rawAttribute = do codeBlockFenced :: PandocMonad m => MarkdownParser m (F Blocks) codeBlockFenced = try $ do indentchars <- nonindentSpaces - let indentLevel = length indentchars + let indentLevel = T.length indentchars c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~')) <|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`')) size <- blockDelimiter (== c) Nothing @@ -713,9 +716,9 @@ codeBlockFenced = try $ do <|> (Right <$> option ("",[],[]) (try (guardEnabled Ext_fenced_code_attributes >> attributes) - <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar))) + <|> ((\x -> ("",[toLanguageId x],[])) <$> many1Char nonspaceChar))) blankline - contents <- intercalate "\n" <$> + contents <- T.intercalate "\n" <$> manyTill (gobbleAtMostSpaces indentLevel >> anyLine) (try $ do blockDelimiter (== c) (Just size) @@ -726,8 +729,8 @@ codeBlockFenced = try $ do Right attr -> B.codeBlockWith attr contents -- correctly handle github language identifiers -toLanguageId :: String -> String -toLanguageId = map toLower . go +toLanguageId :: Text -> Text +toLanguageId = T.toLower . go where go "c++" = "cpp" go "objective-c" = "objectivec" go x = x @@ -737,11 +740,11 @@ codeBlockIndented = do contents <- many1 (indentedLine <|> try (do b <- blanklines l <- indentedLine - return $ b ++ l)) + return $ b <> l)) optional blanklines classes <- getOption readerIndentedCodeClasses return $ return $ B.codeBlockWith ("", classes, []) $ - stripTrailingNewlines $ concat contents + stripTrailingNewlines $ T.concat contents lhsCodeBlock :: PandocMonad m => MarkdownParser m (F Blocks) lhsCodeBlock = do @@ -751,33 +754,33 @@ lhsCodeBlock = do <|> (return . B.codeBlockWith ("",["haskell"],[]) <$> lhsCodeBlockInverseBird) -lhsCodeBlockLaTeX :: PandocMonad m => MarkdownParser m String +lhsCodeBlockLaTeX :: PandocMonad m => MarkdownParser m Text lhsCodeBlockLaTeX = try $ do string "\\begin{code}" manyTill spaceChar newline - contents <- many1Till anyChar (try $ string "\\end{code}") + contents <- many1TillChar anyChar (try $ string "\\end{code}") blanklines return $ stripTrailingNewlines contents -lhsCodeBlockBird :: PandocMonad m => MarkdownParser m String +lhsCodeBlockBird :: PandocMonad m => MarkdownParser m Text lhsCodeBlockBird = lhsCodeBlockBirdWith '>' -lhsCodeBlockInverseBird :: PandocMonad m => MarkdownParser m String +lhsCodeBlockInverseBird :: PandocMonad m => MarkdownParser m Text lhsCodeBlockInverseBird = lhsCodeBlockBirdWith '<' -lhsCodeBlockBirdWith :: PandocMonad m => Char -> MarkdownParser m String +lhsCodeBlockBirdWith :: PandocMonad m => Char -> MarkdownParser m Text lhsCodeBlockBirdWith c = try $ do pos <- getPosition when (sourceColumn pos /= 1) $ Prelude.fail "Not in first column" lns <- many1 $ birdTrackLine c -- if (as is normal) there is always a space after >, drop it - let lns' = if all (\ln -> null ln || take 1 ln == " ") lns - then map (drop 1) lns + let lns' = if all (\ln -> T.null ln || T.take 1 ln == " ") lns + then map (T.drop 1) lns else lns blanklines - return $ intercalate "\n" lns' + return $ T.intercalate "\n" lns' -birdTrackLine :: PandocMonad m => Char -> ParserT [Char] st m String +birdTrackLine :: PandocMonad m => Char -> ParserT Text st m Text birdTrackLine c = try $ do char c -- allow html tags on left margin: @@ -791,12 +794,12 @@ birdTrackLine c = try $ do emailBlockQuoteStart :: PandocMonad m => MarkdownParser m Char emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' <* optional (char ' ') -emailBlockQuote :: PandocMonad m => MarkdownParser m [String] +emailBlockQuote :: PandocMonad m => MarkdownParser m [Text] emailBlockQuote = try $ do emailBlockQuoteStart - let emailLine = many $ nonEndline <|> try - (endline >> notFollowedBy emailBlockQuoteStart >> - return '\n') + let emailLine = manyChar $ nonEndline <|> try + (endline >> notFollowedBy emailBlockQuoteStart >> + return '\n') let emailSep = try (newline >> emailBlockQuoteStart) first <- emailLine rest <- many $ try $ emailSep >> emailLine @@ -809,7 +812,7 @@ blockQuote :: PandocMonad m => MarkdownParser m (F Blocks) blockQuote = do raw <- emailBlockQuote -- parse the extracted block, which may contain various block elements: - contents <- parseFromString' parseBlocks $ intercalate "\n" raw ++ "\n\n" + contents <- parseFromString' parseBlocks $ T.intercalate "\n" raw <> "\n\n" return $ B.blockQuote <$> contents -- @@ -833,7 +836,7 @@ orderedListStart mbstydelim = try $ do skipNonindentSpaces notFollowedBy $ string "p." >> spaceChar >> digit -- page number (do guardDisabled Ext_fancy_lists - start <- many1 digit >>= safeRead + start <- many1Char digit >>= safeRead char '.' gobbleSpaces 1 <|> () <$ lookAhead newline optional $ try (gobbleAtMostSpaces 3 >> notFollowedBy spaceChar) @@ -857,7 +860,7 @@ orderedListStart mbstydelim = try $ do listStart :: PandocMonad m => MarkdownParser m () listStart = bulletListStart <|> Control.Monad.void (orderedListStart Nothing) -listLine :: PandocMonad m => Int -> MarkdownParser m String +listLine :: PandocMonad m => Int -> MarkdownParser m Text listLine continuationIndent = try $ do notFollowedBy' (do gobbleSpaces continuationIndent skipMany spaceChar @@ -867,19 +870,19 @@ listLine continuationIndent = try $ do optional (() <$ gobbleSpaces continuationIndent) listLineCommon -listLineCommon :: PandocMonad m => MarkdownParser m String -listLineCommon = concat <$> manyTill - ( many1 (satisfy $ \c -> c `notElem` ['\n', '<', '`']) +listLineCommon :: PandocMonad m => MarkdownParser m Text +listLineCommon = T.concat <$> manyTill + ( many1Char (satisfy $ \c -> c `notElem` ['\n', '<', '`']) <|> fmap snd (withRaw code) <|> fmap snd (htmlTag isCommentTag) - <|> count 1 anyChar + <|> countChar 1 anyChar ) newline -- parse raw text for one list item, excluding start marker and continuations rawListItem :: PandocMonad m => Bool -- four space rule -> MarkdownParser m a - -> MarkdownParser m (String, Int) + -> MarkdownParser m (Text, Int) rawListItem fourSpaceRule start = try $ do pos1 <- getPosition start @@ -892,14 +895,14 @@ rawListItem fourSpaceRule start = try $ do notFollowedBy (() <$ codeBlockFenced) notFollowedBy blankline listLine continuationIndent) - blanks <- many blankline - let result = unlines (first:rest) ++ blanks + blanks <- manyChar blankline + let result = T.unlines (first:rest) <> blanks return (result, continuationIndent) -- continuation of a list item - indented and separated by blankline -- or (in compact lists) endline. -- note: nested lists are parsed as continuations -listContinuation :: PandocMonad m => Int -> MarkdownParser m String +listContinuation :: PandocMonad m => Int -> MarkdownParser m Text listContinuation continuationIndent = try $ do x <- try $ do notFollowedBy blankline @@ -913,12 +916,12 @@ listContinuation continuationIndent = try $ do notFollowedByDivCloser gobbleSpaces continuationIndent <|> notFollowedBy' listStart anyLineNewline - blanks <- many blankline - return $ concat (x:xs) ++ blanks + blanks <- manyChar blankline + return $ T.concat (x:xs) <> blanks -- Variant of blanklines that doesn't require blank lines -- before a fence or eof. -blanklines' :: PandocMonad m => MarkdownParser m [Char] +blanklines' :: PandocMonad m => MarkdownParser m Text blanklines' = blanklines <|> try checkDivCloser where checkDivCloser = do guardEnabled Ext_fenced_divs @@ -954,7 +957,7 @@ listItem fourSpaceRule start = try $ do (first, continuationIndent) <- rawListItem fourSpaceRule start continuations <- many (listContinuation continuationIndent) -- parse the extracted block, which may contain various block elements: - let raw = concat (first:continuations) + let raw = T.concat (first:continuations) contents <- parseFromString' parseBlocks raw updateState (\st -> st {stateParserContext = oldContext}) exts <- getOption readerExtensions @@ -990,7 +993,7 @@ defListMarker = do sps <- nonindentSpaces char ':' <|> char '~' tabStop <- getOption readerTabStop - let remaining = tabStop - (length sps + 1) + let remaining = tabStop - (T.length sps + 1) if remaining > 0 then try (count remaining (char ' ')) <|> string "\t" <|> many1 spaceChar else mzero @@ -1001,11 +1004,11 @@ definitionListItem compact = try $ do rawLine' <- anyLine raw <- many1 $ defRawBlock compact term <- parseFromString' (trimInlinesF <$> inlines) rawLine' - contents <- mapM (parseFromString' parseBlocks . (++"\n")) raw + contents <- mapM (parseFromString' parseBlocks . (<> "\n")) raw optional blanklines return $ liftM2 (,) term (sequence contents) -defRawBlock :: PandocMonad m => Bool -> MarkdownParser m String +defRawBlock :: PandocMonad m => Bool -> MarkdownParser m Text defRawBlock compact = try $ do hasBlank <- option False $ blankline >> return True defListMarker @@ -1020,13 +1023,13 @@ defRawBlock compact = try $ do <|> notFollowedBy defListMarker anyLine ) rawlines <- many dline - cont <- fmap concat $ many $ try $ do + cont <- fmap T.concat $ many $ try $ do trailing <- option "" blanklines ln <- indentSpaces >> notFollowedBy blankline >> anyLine lns <- many dline - return $ trailing ++ unlines (ln:lns) - return $ trimr (firstline ++ unlines rawlines ++ cont) ++ - if hasBlank || not (null cont) then "\n\n" else "" + return $ trailing <> T.unlines (ln:lns) + return $ trimr (firstline <> T.unlines rawlines <> cont) <> + if hasBlank || not (T.null cont) then "\n\n" else "" definitionList :: PandocMonad m => MarkdownParser m (F Blocks) definitionList = try $ do @@ -1063,7 +1066,7 @@ para = try $ do | not (null alt) -> -- the fig: at beginning of title indicates a figure return $ B.singleton - $ Image attr alt (src,'f':'i':'g':':':tit) + $ Image attr alt (src, "fig:" <> tit) _ -> return x' | otherwise = x result <- implicitFigures . trimInlinesF <$> inlines1 @@ -1082,7 +1085,7 @@ para = try $ do inHtmlBlock <- stateInHtmlBlock <$> getState case inHtmlBlock of Just "div" -> () <$ - lookAhead (htmlTag (~== TagClose "div")) + lookAhead (htmlTag (~== TagClose ("div" :: Text))) _ -> mzero <|> do guardEnabled Ext_fenced_divs divLevel <- stateFencedDivLevel <$> getState @@ -1098,7 +1101,7 @@ plain = fmap B.plain . trimInlinesF <$> inlines1 -- raw html -- -htmlElement :: PandocMonad m => MarkdownParser m String +htmlElement :: PandocMonad m => MarkdownParser m Text htmlElement = rawVerbatimBlock <|> strictHtmlBlock <|> fmap snd (htmlTag isBlockTag) @@ -1132,14 +1135,14 @@ htmlBlock' = try $ do first <- htmlElement skipMany spaceChar optional blanklines - return $ if null first + return $ if T.null first then mempty else return $ B.rawBlock "html" first -strictHtmlBlock :: PandocMonad m => MarkdownParser m String +strictHtmlBlock :: PandocMonad m => MarkdownParser m Text strictHtmlBlock = htmlInBalanced (not . isInlineTag) -rawVerbatimBlock :: PandocMonad m => MarkdownParser m String +rawVerbatimBlock :: PandocMonad m => MarkdownParser m Text rawVerbatimBlock = htmlInBalanced isVerbTag where isVerbTag (TagOpen "pre" _) = True isVerbTag (TagOpen "style" _) = True @@ -1150,13 +1153,13 @@ rawVerbatimBlock = htmlInBalanced isVerbTag rawTeXBlock :: PandocMonad m => MarkdownParser m (F Blocks) rawTeXBlock = do guardEnabled Ext_raw_tex - result <- (B.rawBlock "tex" . trim . concat <$> - many1 ((++) <$> rawConTeXtEnvironment <*> spnl')) - <|> (B.rawBlock "tex" . trim . concat <$> - many1 ((++) <$> rawLaTeXBlock <*> spnl')) + result <- (B.rawBlock "tex" . trim . T.concat <$> + many1 ((<>) <$> rawConTeXtEnvironment <*> spnl')) + <|> (B.rawBlock "tex" . trim . T.concat <$> + many1 ((<>) <$> rawLaTeXBlock <*> spnl')) return $ case B.toList result of [RawBlock _ cs] - | all (`elem` [' ','\t','\n']) cs -> return mempty + | T.all (`elem` [' ','\t','\n']) cs -> return mempty -- don't create a raw block for suppressed macro defs _ -> return result @@ -1186,7 +1189,7 @@ rawHtmlBlocks = do return result -- remove markdown="1" attribute -stripMarkdownAttribute :: String -> String +stripMarkdownAttribute :: Text -> Text stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s where filterAttrib (TagOpen t as) = TagOpen t [(k,v) | (k,v) <- as, k /= "markdown"] @@ -1211,7 +1214,7 @@ lineBlock = try $ do -- and the length including trailing space. dashedLine :: PandocMonad m => Char - -> ParserT [Char] st m (Int, Int) + -> ParserT Text st m (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many spaceChar @@ -1232,9 +1235,9 @@ simpleTableHeader headless = try $ do dashes <- many1 (dashedLine '-') newline let (lengths, lines') = unzip dashes - let indices = scanl (+) (length initSp) lines' + let indices = scanl (+) (T.length initSp) lines' -- If no header, calculate alignment on basis of first row of text - rawHeads <- fmap (tail . splitStringByIndices (init indices)) $ + rawHeads <- fmap (tail . splitTextByIndices (init indices)) $ if headless then lookAhead anyLine else return rawContent @@ -1250,15 +1253,15 @@ simpleTableHeader headless = try $ do -- Returns an alignment type for a table, based on a list of strings -- (the rows of the column header) and a number (the length of the -- dashed line under the rows. -alignType :: [String] +alignType :: [Text] -> Int -> Alignment alignType [] _ = AlignDefault alignType strLst len = - let nonempties = filter (not . null) $ map trimr strLst + let nonempties = filter (not . T.null) $ map trimr strLst (leftSpace, rightSpace) = - case sortBy (comparing length) nonempties of - (x:_) -> (head x `elem` " \t", length x < len) + case sortBy (comparing T.length) nonempties of + (x:_) -> (T.head x `elem` [' ', 't'], T.length x < len) [] -> (False, False) in case (leftSpace, rightSpace) of (True, False) -> AlignRight @@ -1267,7 +1270,7 @@ alignType strLst len = (False, False) -> AlignDefault -- Parse a table footer - dashed lines followed by blank line. -tableFooter :: PandocMonad m => MarkdownParser m String +tableFooter :: PandocMonad m => MarkdownParser m Text tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines' -- Parse a table separator - dashed line. @@ -1277,12 +1280,12 @@ tableSep = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> char '\n' -- Parse a raw line and split it into chunks by indices. rawTableLine :: PandocMonad m => [Int] - -> MarkdownParser m [String] + -> MarkdownParser m [Text] rawTableLine indices = do notFollowedBy' (blanklines' <|> tableFooter) - line <- many1Till anyChar newline + line <- many1TillChar anyChar newline return $ map trim $ tail $ - splitStringByIndices (init indices) line + splitTextByIndices (init indices) line -- Parse a table line and return a list of lists of blocks (columns). tableLine :: PandocMonad m @@ -1297,7 +1300,7 @@ multilineRow :: PandocMonad m -> MarkdownParser m (F [Blocks]) multilineRow indices = do colLines <- many1 (rawTableLine indices) - let cols = map unlines $ transpose colLines + let cols = map T.unlines $ transpose colLines fmap sequence $ mapM (parseFromString' (mconcat <$> many plain)) cols -- Parses a table caption: inlines beginning with 'Table:' @@ -1344,7 +1347,7 @@ multilineTableHeader headless = try $ do dashes <- many1 (dashedLine '-') newline let (lengths, lines') = unzip dashes - let indices = scanl (+) (length initSp) lines' + let indices = scanl (+) (T.length initSp) lines' -- compensate for the fact that intercolumn spaces are -- not included in the last index: let indices' = case reverse indices of @@ -1352,14 +1355,14 @@ multilineTableHeader headless = try $ do (x:xs) -> reverse (x+1:xs) rawHeadsList <- if headless then fmap (map (:[]) . tail . - splitStringByIndices (init indices')) $ lookAhead anyLine + splitTextByIndices (init indices')) $ lookAhead anyLine else return $ transpose $ map - (tail . splitStringByIndices (init indices')) + (tail . splitTextByIndices (init indices')) rawContent let aligns = zipWith alignType rawHeadsList lengths let rawHeads = if headless then replicate (length dashes) "" - else map (unlines . map trim) rawHeadsList + else map (T.unlines . map trim) rawHeadsList heads <- fmap sequence $ mapM ((parseFromString' (mconcat <$> many plain)).trim) rawHeads return (heads, aligns, indices') @@ -1393,7 +1396,7 @@ pipeTable = try $ do lines' <- many pipeTableRow let lines'' = map (take (length aligns) <$>) lines' let maxlength = maximum $ - map (\x -> length . stringify $ runF x def) (heads' : lines'') + map (\x -> T.length . stringify $ runF x def) (heads' : lines'') numColumns <- getOption readerColumns let widths = if maxlength > numColumns then map (\len -> @@ -1430,7 +1433,7 @@ pipeTableCell = return $ B.plain <$> result) <|> return mempty -pipeTableHeaderPart :: PandocMonad m => ParserT [Char] st m (Alignment, Int) +pipeTableHeaderPart :: PandocMonad m => ParserT Text st m (Alignment, Int) pipeTableHeaderPart = try $ do skipMany spaceChar left <- optionMaybe (char ':') @@ -1446,12 +1449,12 @@ pipeTableHeaderPart = try $ do (Just _,Just _) -> AlignCenter, len) -- Succeed only if current line contains a pipe. -scanForPipe :: PandocMonad m => ParserT [Char] st m () +scanForPipe :: PandocMonad m => ParserT Text st m () scanForPipe = do inp <- getInput - case break (\c -> c == '\n' || c == '|') inp of - (_,'|':_) -> return () - _ -> mzero + case T.break (\c -> c == '\n' || c == '|') inp of + (_, T.uncons -> Just ('|', _)) -> return () + _ -> mzero -- | Parse a table using 'headerParser', 'rowParser', -- 'lineParser', and 'footerParser'. Variant of the version in @@ -1561,7 +1564,7 @@ escapedChar = do result <- escapedChar' case result of ' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space - _ -> return $ return $ B.str [result] + _ -> return $ return $ B.str $ T.singleton result ltSign :: PandocMonad m => MarkdownParser m (F Inlines) ltSign = do @@ -1574,12 +1577,12 @@ exampleRef :: PandocMonad m => MarkdownParser m (F Inlines) exampleRef = try $ do guardEnabled Ext_example_lists char '@' - lab <- many1 (alphaNum <|> oneOf "-_") + lab <- many1Char (alphaNum <|> oneOf "-_") return $ do st <- askF return $ case M.lookup lab (stateExamples st) of - Just n -> B.str (show n) - Nothing -> B.str ('@':lab) + Just n -> B.str $ tshow n + Nothing -> B.str $ "@" <> lab symbol :: PandocMonad m => MarkdownParser m (F Inlines) symbol = do @@ -1587,16 +1590,16 @@ symbol = do <|> try (do lookAhead $ char '\\' notFollowedBy' (() <$ rawTeXBlock) char '\\') - return $ return $ B.str [result] + return $ return $ B.str $ T.singleton result -- parses inline code, between n `s and n `s code :: PandocMonad m => MarkdownParser m (F Inlines) code = try $ do starts <- many1 (char '`') skipSpaces - result <- (trim . concat) <$> + result <- (trim . T.concat) <$> manyTill (notFollowedBy (inList >> listStart) >> - (many1 (noneOf "`\n") <|> many1 (char '`') <|> + (many1Char (noneOf "`\n") <|> many1Char (char '`') <|> (char '\n' >> notFollowedBy' blankline >> return " "))) (try (skipSpaces >> count (length starts) (char '`') >> notFollowedBy (char '`'))) @@ -1627,10 +1630,10 @@ enclosure c = do guardDisabled Ext_intraword_underscores <|> guard (c == '*') <|> (guard =<< notAfterString) - cs <- many1 (char c) + cs <- many1Char (char c) (return (B.str cs) <>) <$> whitespace <|> - case length cs of + case T.length cs of 3 -> three c 2 -> two c mempty 1 -> one c mempty @@ -1653,7 +1656,7 @@ three c = do (ender c 3 >> updateLastStrPos >> return ((B.strong . B.emph) <$> contents)) <|> (ender c 2 >> updateLastStrPos >> one c (B.strong <$> contents)) <|> (ender c 1 >> updateLastStrPos >> two c (B.emph <$> contents)) - <|> return (return (B.str [c,c,c]) <> contents) + <|> return (return (B.str $ T.pack [c,c,c]) <> contents) -- Parse inlines til you hit two c's, and emit strong. -- If you never do hit two cs, emit ** plus inlines parsed. @@ -1662,7 +1665,7 @@ two c prefix' = do contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline) (ender c 2 >> updateLastStrPos >> return (B.strong <$> (prefix' <> contents))) - <|> return (return (B.str [c,c]) <> (prefix' <> contents)) + <|> return (return (B.str $ T.pack [c,c]) <> (prefix' <> contents)) -- Parse inlines til you hit a c, and emit emph. -- If you never hit a c, emit * plus inlines parsed. @@ -1673,7 +1676,7 @@ one c prefix' = do notFollowedBy (ender c 1) >> two c mempty) ) (ender c 1 >> updateLastStrPos >> return (B.emph <$> (prefix' <> contents))) - <|> return (return (B.str [c]) <> (prefix' <> contents)) + <|> return (return (B.str $ T.singleton c) <> (prefix' <> contents)) strongOrEmph :: PandocMonad m => MarkdownParser m (F Inlines) strongOrEmph = enclosure '*' <|> enclosure '_' @@ -1717,16 +1720,16 @@ whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace" where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak) regsp = skipMany spaceChar >> return B.space -nonEndline :: PandocMonad m => ParserT [Char] st m Char +nonEndline :: PandocMonad m => ParserT Text st m Char nonEndline = satisfy (/='\n') str :: PandocMonad m => MarkdownParser m (F Inlines) str = do - result <- many1 (alphaNum <|> try (char '.' <* notFollowedBy (char '.'))) + result <- many1Char (alphaNum <|> try (char '.' <* notFollowedBy (char '.'))) updateLastStrPos (do guardEnabled Ext_smart abbrevs <- getOption readerAbbreviations - if not (null result) && last result == '.' && result `Set.member` abbrevs + if not (T.null result) && T.last result == '.' && result `Set.member` abbrevs then try (do ils <- whitespace -- ?? lookAhead alphaNum -- replace space after with nonbreaking space @@ -1766,36 +1769,36 @@ endline = try $ do -- -- a reference label for a link -reference :: PandocMonad m => MarkdownParser m (F Inlines, String) +reference :: PandocMonad m => MarkdownParser m (F Inlines, Text) reference = do guardDisabled Ext_footnotes <|> notFollowedBy' (string "[^") guardDisabled Ext_citations <|> notFollowedBy' (string "[@") withRaw $ trimInlinesF <$> inlinesInBalancedBrackets -parenthesizedChars :: PandocMonad m => MarkdownParser m [Char] +parenthesizedChars :: PandocMonad m => MarkdownParser m Text parenthesizedChars = do result <- charsInBalanced '(' ')' litChar - return $ '(' : result ++ ")" + return $ "(" <> result <> ")" -- source for a link, with optional title -source :: PandocMonad m => MarkdownParser m (String, String) +source :: PandocMonad m => MarkdownParser m (Text, Text) source = do char '(' skipSpaces let urlChunk = try parenthesizedChars - <|> (notFollowedBy (oneOf " )") >> count 1 litChar) - <|> try (many1 spaceChar <* notFollowedBy (oneOf "\"')")) - let sourceURL = (unwords . words . concat) <$> many urlChunk + <|> (notFollowedBy (oneOf " )") >> countChar 1 litChar) + <|> try (many1Char spaceChar <* notFollowedBy (oneOf "\"')")) + let sourceURL = (T.unwords . T.words . T.concat) <$> many urlChunk let betweenAngles = try $ - char '<' >> manyTill litChar (char '>') + char '<' >> manyTillChar litChar (char '>') src <- try betweenAngles <|> sourceURL tit <- option "" $ try $ spnl >> linkTitle skipSpaces char ')' return (escapeURI $ trimr src, tit) -linkTitle :: PandocMonad m => MarkdownParser m String +linkTitle :: PandocMonad m => MarkdownParser m Text linkTitle = quotedTitle '"' <|> quotedTitle '\'' link :: PandocMonad m => MarkdownParser m (F Inlines) @@ -1823,13 +1826,13 @@ isSmallCaps :: Attr -> Bool isSmallCaps ("",["smallcaps"],[]) = True isSmallCaps ("",[],kvs) = case lookup "style" kvs of - Just s -> map toLower (filter (`notElem` " \t;") s) == + Just s -> T.toLower (T.filter (`notElem` [' ', '\t', ';']) s) == "font-variant:small-caps" Nothing -> False isSmallCaps _ = False regLink :: PandocMonad m - => (Attr -> String -> String -> Inlines -> Inlines) + => (Attr -> Text -> Text -> Inlines -> Inlines) -> F Inlines -> MarkdownParser m (F Inlines) regLink constructor lab = try $ do @@ -1840,8 +1843,8 @@ regLink constructor lab = try $ do -- a link like [this][ref] or [this][] or [this] referenceLink :: PandocMonad m - => (Attr -> String -> String -> Inlines -> Inlines) - -> (F Inlines, String) + => (Attr -> Text -> Text -> Inlines -> Inlines) + -> (F Inlines, Text) -> MarkdownParser m (F Inlines) referenceLink constructor (lab, raw) = do sp <- (True <$ lookAhead (char ' ')) <|> return False @@ -1863,7 +1866,7 @@ referenceLink constructor (lab, raw) = do parsedRaw' <- parsedRaw fallback' <- fallback return $ B.str "[" <> fallback' <> B.str "]" <> - (if sp && not (null raw) then B.space else mempty) <> + (if sp && not (T.null raw) then B.space else mempty) <> parsedRaw' return $ do keys <- asksF stateKeys @@ -1878,19 +1881,19 @@ referenceLink constructor (lab, raw) = do else makeFallback Just ((src,tit), attr) -> constructor attr src tit <$> lab -dropBrackets :: String -> String -dropBrackets = reverse . dropRB . reverse . dropLB - where dropRB (']':xs) = xs - dropRB xs = xs - dropLB ('[':xs) = xs - dropLB xs = xs +dropBrackets :: Text -> Text +dropBrackets = dropRB . dropLB + where dropRB (T.unsnoc -> Just (xs,']')) = xs + dropRB xs = xs + dropLB (T.uncons -> Just ('[',xs)) = xs + dropLB xs = xs bareURL :: PandocMonad m => MarkdownParser m (F Inlines) bareURL = try $ do guardEnabled Ext_autolink_bare_uris getState >>= guard . stateAllowLinks (cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress) - notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a") + notFollowedBy $ try $ spaces >> htmlTag (~== TagClose ("a" :: Text)) return $ return $ B.linkWith ("",[cls],[]) src "" (B.str orig) autoLink :: PandocMonad m => MarkdownParser m (F Inlines) @@ -1902,19 +1905,20 @@ autoLink = try $ do -- is finished, because the uri parser tries to avoid parsing -- final punctuation. for example: in `<http://hi---there>`, -- the URI parser will stop before the dashes. - extra <- fromEntities <$> manyTill nonspaceChar (char '>') + extra <- fromEntities <$> manyTillChar nonspaceChar (char '>') attr <- option ("", [cls], []) $ try $ guardEnabled Ext_link_attributes >> attributes - return $ return $ B.linkWith attr (src ++ escapeURI extra) "" - (B.str $ orig ++ extra) + return $ return $ B.linkWith attr (src <> escapeURI extra) "" + (B.str $ orig <> extra) image :: PandocMonad m => MarkdownParser m (F Inlines) image = try $ do char '!' (lab,raw) <- reference defaultExt <- getOption readerDefaultImageExtension - let constructor attr' src = case takeExtension src of - "" -> B.imageWith attr' (addExtension src defaultExt) + let constructor attr' src = case takeExtension (T.unpack src) of + "" -> B.imageWith attr' (T.pack $ addExtension (T.unpack src) + $ T.unpack defaultExt) _ -> B.imageWith attr' src regLink constructor lab <|> referenceLink constructor (lab,raw) @@ -1926,7 +1930,7 @@ note = try $ do return $ do notes <- asksF stateNotes' case M.lookup ref notes of - Nothing -> return $ B.str $ "[^" ++ ref ++ "]" + Nothing -> return $ B.str $ "[^" <> ref <> "]" Just (_pos, contents) -> do st <- askF -- process the note in a context that doesn't resolve @@ -1949,29 +1953,29 @@ rawLaTeXInline' = try $ do s <- rawLaTeXInline return $ return $ B.rawInline "tex" s -- "tex" because it might be context -rawConTeXtEnvironment :: PandocMonad m => ParserT [Char] st m String +rawConTeXtEnvironment :: PandocMonad m => ParserT Text st m Text rawConTeXtEnvironment = try $ do string "\\start" completion <- inBrackets (letter <|> digit <|> spaceChar) - <|> many1 letter - contents <- manyTill (rawConTeXtEnvironment <|> count 1 anyChar) - (try $ string "\\stop" >> string completion) - return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion + <|> many1Char letter + contents <- manyTill (rawConTeXtEnvironment <|> countChar 1 anyChar) + (try $ string "\\stop" >> textStr completion) + return $ "\\start" <> completion <> T.concat contents <> "\\stop" <> completion -inBrackets :: PandocMonad m => ParserT [Char] st m Char -> ParserT [Char] st m String +inBrackets :: PandocMonad m => ParserT Text st m Char -> ParserT Text st m Text inBrackets parser = do char '[' - contents <- many parser + contents <- manyChar parser char ']' - return $ "[" ++ contents ++ "]" + return $ "[" <> contents <> "]" spanHtml :: PandocMonad m => MarkdownParser m (F Inlines) spanHtml = try $ do guardEnabled Ext_native_spans - (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" []) - contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span")) + (TagOpen _ attrs, _) <- htmlTag (~== TagOpen ("span" :: Text) []) + contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose ("span" :: Text))) let ident = fromMaybe "" $ lookup "id" attrs - let classes = maybe [] words $ lookup "class" attrs + let classes = maybe [] T.words $ lookup "class" attrs let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] return $ if isSmallCaps (ident, classes, keyvals) then B.smallcaps <$> contents @@ -1980,20 +1984,20 @@ spanHtml = try $ do divHtml :: PandocMonad m => MarkdownParser m (F Blocks) divHtml = try $ do guardEnabled Ext_native_divs - (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" []) + (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen ("div" :: Text) []) -- we set stateInHtmlBlock so that closing tags that can be either block or -- inline will not be parsed as inline tags oldInHtmlBlock <- stateInHtmlBlock <$> getState updateState $ \st -> st{ stateInHtmlBlock = Just "div" } bls <- option "" (blankline >> option "" blanklines) contents <- mconcat <$> - many (notFollowedBy' (htmlTag (~== TagClose "div")) >> block) - closed <- option False (True <$ htmlTag (~== TagClose "div")) + many (notFollowedBy' (htmlTag (~== TagClose ("div" :: Text))) >> block) + closed <- option False (True <$ htmlTag (~== TagClose ("div" :: Text))) if closed then do updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock } let ident = fromMaybe "" $ lookup "id" attrs - let classes = maybe [] words $ lookup "class" attrs + let classes = maybe [] T.words $ lookup "class" attrs let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] return $ B.divWith (ident, classes, keyvals) <$> contents else -- avoid backtracing @@ -2005,7 +2009,7 @@ divFenced = try $ do string ":::" skipMany (char ':') skipMany spaceChar - attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1 nonspaceChar) + attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1Char nonspaceChar) skipMany spaceChar skipMany (char ':') blankline @@ -2047,7 +2051,7 @@ emoji :: PandocMonad m => MarkdownParser m (F Inlines) emoji = try $ do guardEnabled Ext_emoji char ':' - emojikey <- many1 (oneOf emojiChars) + emojikey <- many1Char (oneOf emojiChars) char ':' case emojiToInline emojikey of Just i -> return (return $ B.singleton i) @@ -2077,14 +2081,14 @@ textualCite = try $ do mbrest <- option Nothing $ try $ spnl >> Just <$> withRaw normalCite case mbrest of Just (rest, raw) -> - return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw) . (first:)) + return $ (flip B.cite (B.text $ "@" <> key <> " " <> raw) . (first:)) <$> rest Nothing -> (do (cs, raw) <- withRaw $ bareloc first - let (spaces',raw') = span isSpace raw - spc | null spaces' = mempty - | otherwise = B.space + let (spaces',raw') = T.span isSpace raw + spc | T.null spaces' = mempty + | otherwise = B.space lab <- parseFromString' inlines $ dropBrackets raw' fallback <- referenceLink B.linkWith (lab,raw') return $ do @@ -2092,12 +2096,12 @@ textualCite = try $ do cs' <- cs return $ case B.toList fallback' of - Link{}:_ -> B.cite [first] (B.str $ '@':key) <> spc <> fallback' - _ -> B.cite cs' (B.text $ '@':key ++ " " ++ raw)) + Link{}:_ -> B.cite [first] (B.str $ "@" <> key) <> spc <> fallback' + _ -> B.cite cs' (B.text $ "@" <> key <> " " <> raw)) <|> return (do st <- askF return $ case M.lookup key (stateExamples st) of - Just n -> B.str (show n) - _ -> B.cite [first] $ B.str $ '@':key) + Just n -> B.str $ tshow n + _ -> B.cite [first] $ B.str $ "@" <> key) bareloc :: PandocMonad m => Citation -> MarkdownParser m (F [Citation]) bareloc c = try $ do diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 35bb8e3eb..07240e951 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE RelaxedPolyRec #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RelaxedPolyRec #-} -- RelaxedPolyRec needed for inlinesBetween on GHC < 7 {- | Module : Text.Pandoc.Readers.MediaWiki @@ -24,11 +25,12 @@ import Control.Monad import Control.Monad.Except (throwError) import Data.Char (isDigit, isSpace) import qualified Data.Foldable as F -import Data.List (intercalate, intersperse, isPrefixOf) +import Data.List (intersperse) import Data.Maybe (fromMaybe, maybeToList) import Data.Sequence (ViewL (..), viewl, (<|)) import qualified Data.Set as Set -import Data.Text (Text, unpack) +import Data.Text (Text) +import qualified Data.Text as T import Text.HTML.TagSoup import Text.Pandoc.Builder (Blocks, Inlines, trimInlines) import qualified Text.Pandoc.Builder as B @@ -39,7 +41,7 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (nested) import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag) import Text.Pandoc.Shared (crFilter, safeRead, stringify, stripTrailingNewlines, - trim) + trim, splitTextBy, tshow) import Text.Pandoc.Walk (walk) import Text.Pandoc.XML (fromEntities) @@ -57,7 +59,7 @@ readMediaWiki opts s = do , mwLogMessages = [] , mwInTT = False } - (unpack (crFilter s) ++ "\n") + (crFilter s <> "\n") case parsed of Right result -> return result Left e -> throwError e @@ -66,12 +68,12 @@ data MWState = MWState { mwOptions :: ReaderOptions , mwMaxNestingLevel :: Int , mwNextLinkNumber :: Int , mwCategoryLinks :: [Inlines] - , mwIdentifierList :: Set.Set String + , mwIdentifierList :: Set.Set Text , mwLogMessages :: [LogMessage] , mwInTT :: Bool } -type MWParser m = ParserT [Char] MWState m +type MWParser m = ParserT Text MWState m instance HasReaderOptions MWState where extractReaderOptions = mwOptions @@ -105,58 +107,58 @@ specialChars = "'[]<=&*{}|\":\\" spaceChars :: [Char] spaceChars = " \n\t" -sym :: PandocMonad m => String -> MWParser m () -sym s = () <$ try (string s) +sym :: PandocMonad m => Text -> MWParser m () +sym s = () <$ try (string $ T.unpack s) -newBlockTags :: [String] +newBlockTags :: [Text] newBlockTags = ["haskell","syntaxhighlight","source","gallery","references"] -isBlockTag' :: Tag String -> Bool +isBlockTag' :: Tag Text -> Bool isBlockTag' tag@(TagOpen t _) = (isBlockTag tag || t `elem` newBlockTags) && t `notElem` eitherBlockOrInline isBlockTag' tag@(TagClose t) = (isBlockTag tag || t `elem` newBlockTags) && t `notElem` eitherBlockOrInline isBlockTag' tag = isBlockTag tag -isInlineTag' :: Tag String -> Bool +isInlineTag' :: Tag Text -> Bool isInlineTag' (TagComment _) = True isInlineTag' t = not (isBlockTag' t) -eitherBlockOrInline :: [String] +eitherBlockOrInline :: [Text] eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins", "map", "area", "object"] htmlComment :: PandocMonad m => MWParser m () htmlComment = () <$ htmlTag isCommentTag -inlinesInTags :: PandocMonad m => String -> MWParser m Inlines +inlinesInTags :: PandocMonad m => Text -> MWParser m Inlines inlinesInTags tag = try $ do (_,raw) <- htmlTag (~== TagOpen tag []) - if '/' `elem` raw -- self-closing tag + if T.any (== '/') raw -- self-closing tag then return mempty else trimInlines . mconcat <$> manyTill inline (htmlTag (~== TagClose tag)) -blocksInTags :: PandocMonad m => String -> MWParser m Blocks +blocksInTags :: PandocMonad m => Text -> MWParser m Blocks blocksInTags tag = try $ do (_,raw) <- htmlTag (~== TagOpen tag []) let closer = if tag == "li" - then htmlTag (~== TagClose "li") + then htmlTag (~== TagClose ("li" :: Text)) <|> lookAhead ( - htmlTag (~== TagOpen "li" []) - <|> htmlTag (~== TagClose "ol") - <|> htmlTag (~== TagClose "ul")) + htmlTag (~== TagOpen ("li" :: Text) []) + <|> htmlTag (~== TagClose ("ol" :: Text)) + <|> htmlTag (~== TagClose ("ul" :: Text))) else htmlTag (~== TagClose tag) - if '/' `elem` raw -- self-closing tag + if T.any (== '/') raw -- self-closing tag then return mempty else mconcat <$> manyTill block closer -charsInTags :: PandocMonad m => String -> MWParser m [Char] -charsInTags tag = try $ do +textInTags :: PandocMonad m => Text -> MWParser m Text +textInTags tag = try $ do (_,raw) <- htmlTag (~== TagOpen tag []) - if '/' `elem` raw -- self-closing tag + if T.any (== '/') raw -- self-closing tag then return "" - else manyTill anyChar (htmlTag (~== TagClose tag)) + else T.pack <$> manyTill anyChar (htmlTag (~== TagClose tag)) -- -- main parser @@ -192,7 +194,7 @@ block = do <|> blockTag <|> (B.rawBlock "mediawiki" <$> template) <|> para - trace (take 60 $ show $ B.toList res) + trace (T.take 60 $ tshow $ B.toList res) return res para :: PandocMonad m => MWParser m Blocks @@ -234,16 +236,16 @@ table = do else (replicate cols mempty, hdr:rows') return $ B.table caption cellspecs headers rows -parseAttrs :: PandocMonad m => MWParser m [(String,String)] +parseAttrs :: PandocMonad m => MWParser m [(Text,Text)] parseAttrs = many1 parseAttr -parseAttr :: PandocMonad m => MWParser m (String, String) +parseAttr :: PandocMonad m => MWParser m (Text, Text) parseAttr = try $ do skipMany spaceChar - k <- many1 letter + k <- many1Char letter char '=' - v <- (char '"' >> many1Till (satisfy (/='\n')) (char '"')) - <|> many1 (satisfy $ \c -> not (isSpace c) && c /= '|') + v <- (char '"' >> many1TillChar (satisfy (/='\n')) (char '"')) + <|> many1Char (satisfy $ \c -> not (isSpace c) && c /= '|') return (k,v) tableStart :: PandocMonad m => MWParser m () @@ -293,8 +295,8 @@ tableCell = try $ do notFollowedBy (char '|') skipMany spaceChar pos' <- getPosition - ls <- concat <$> many (notFollowedBy (cellsep <|> rowsep <|> tableEnd) *> - ((snd <$> withRaw table) <|> count 1 anyChar)) + ls <- T.concat <$> many (notFollowedBy (cellsep <|> rowsep <|> tableEnd) *> + ((snd <$> withRaw table) <|> countChar 1 anyChar)) bs <- parseFromString (do setPosition pos' mconcat <$> many block) ls let align = case lookup "align" attrs of @@ -307,48 +309,49 @@ tableCell = try $ do Nothing -> 0.0 return ((align, width), bs) -parseWidth :: String -> Maybe Double +parseWidth :: Text -> Maybe Double parseWidth s = - case reverse s of - ('%':ds) | all isDigit ds -> safeRead ('0':'.':reverse ds) - _ -> Nothing + case T.unsnoc s of + Just (ds, '%') | T.all isDigit ds -> safeRead $ "0." <> ds + _ -> Nothing -template :: PandocMonad m => MWParser m String +template :: PandocMonad m => MWParser m Text template = try $ do string "{{" notFollowedBy (char '{') lookAhead $ letter <|> digit <|> char ':' - let chunk = template <|> variable <|> many1 (noneOf "{}") <|> count 1 anyChar + let chunk = template <|> variable <|> many1Char (noneOf "{}") <|> countChar 1 anyChar contents <- manyTill chunk (try $ string "}}") - return $ "{{" ++ concat contents ++ "}}" + return $ "{{" <> T.concat contents <> "}}" blockTag :: PandocMonad m => MWParser m Blocks blockTag = do (tag, _) <- lookAhead $ htmlTag isBlockTag' case tag of TagOpen "blockquote" _ -> B.blockQuote <$> blocksInTags "blockquote" - TagOpen "pre" _ -> B.codeBlock . trimCode <$> charsInTags "pre" + TagOpen "pre" _ -> B.codeBlock . trimCode <$> textInTags "pre" TagOpen "syntaxhighlight" attrs -> syntaxhighlight "syntaxhighlight" attrs TagOpen "source" attrs -> syntaxhighlight "source" attrs TagOpen "haskell" _ -> B.codeBlockWith ("",["haskell"],[]) . trimCode <$> - charsInTags "haskell" + textInTags "haskell" TagOpen "gallery" _ -> blocksInTags "gallery" TagOpen "p" _ -> mempty <$ htmlTag (~== tag) TagClose "p" -> mempty <$ htmlTag (~== tag) _ -> B.rawBlock "html" . snd <$> htmlTag (~== tag) -trimCode :: String -> String -trimCode ('\n':xs) = stripTrailingNewlines xs -trimCode xs = stripTrailingNewlines xs +trimCode :: Text -> Text +trimCode t = case T.uncons t of + Just ('\n', xs) -> stripTrailingNewlines xs + _ -> stripTrailingNewlines t -syntaxhighlight :: PandocMonad m => String -> [Attribute String] -> MWParser m Blocks +syntaxhighlight :: PandocMonad m => Text -> [Attribute Text] -> MWParser m Blocks syntaxhighlight tag attrs = try $ do let mblang = lookup "lang" attrs let mbstart = lookup "start" attrs let mbline = lookup "line" attrs let classes = maybeToList mblang ++ maybe [] (const ["numberLines"]) mbline let kvs = maybe [] (\x -> [("startFrom",x)]) mbstart - contents <- charsInTags tag + contents <- textInTags tag return $ B.codeBlockWith ("",classes,kvs) $ trimCode contents hrule :: PandocMonad m => MWParser m Blocks @@ -362,17 +365,17 @@ preformatted = try $ do guardColumnOne char ' ' let endline' = B.linebreak <$ try (newline <* char ' ') - let whitespace' = B.str <$> many1 ('\160' <$ spaceChar) + let whitespace' = B.str <$> many1Char ('\160' <$ spaceChar) let spToNbsp ' ' = '\160' spToNbsp x = x let nowiki' = mconcat . intersperse B.linebreak . map B.str . - lines . fromEntities . map spToNbsp <$> try - (htmlTag (~== TagOpen "nowiki" []) *> - manyTill anyChar (htmlTag (~== TagClose "nowiki"))) + T.lines . fromEntities . T.map spToNbsp <$> try + (htmlTag (~== TagOpen ("nowiki" :: Text) []) *> + manyTillChar anyChar (htmlTag (~== TagClose ("nowiki" :: Text)))) let inline' = whitespace' <|> endline' <|> nowiki' <|> try (notFollowedBy newline *> inline) contents <- mconcat <$> many1 inline' - let spacesStr (Str xs) = all isSpace xs + let spacesStr (Str xs) = T.all isSpace xs spacesStr _ = False if F.all spacesStr contents then return mempty @@ -385,7 +388,7 @@ encode = B.fromList . normalizeCode . B.toList . walk strToCode strToCode x = x normalizeCode [] = [] normalizeCode (Code a1 x : Code a2 y : zs) | a1 == a2 = - normalizeCode $ Code a1 (x ++ y) : zs + normalizeCode $ Code a1 (x <> y) : zs normalizeCode (x:xs) = x : normalizeCode xs header :: PandocMonad m => MWParser m Blocks @@ -400,22 +403,22 @@ header = try $ do -- See #4731: modifyIdentifier :: Attr -> Attr modifyIdentifier (ident,cl,kv) = (ident',cl,kv) - where ident' = map (\c -> if c == '-' then '_' else c) ident + where ident' = T.map (\c -> if c == '-' then '_' else c) ident bulletList :: PandocMonad m => MWParser m Blocks bulletList = B.bulletList <$> ( many1 (listItem '*') - <|> (htmlTag (~== TagOpen "ul" []) *> spaces *> many (listItem '*' <|> li) <* - optional (htmlTag (~== TagClose "ul"))) ) + <|> (htmlTag (~== TagOpen ("ul" :: Text) []) *> spaces *> many (listItem '*' <|> li) <* + optional (htmlTag (~== TagClose ("ul" :: Text)))) ) orderedList :: PandocMonad m => MWParser m Blocks orderedList = (B.orderedList <$> many1 (listItem '#')) <|> try - (do (tag,_) <- htmlTag (~== TagOpen "ol" []) + (do (tag,_) <- htmlTag (~== TagOpen ("ol" :: Text) []) spaces items <- many (listItem '#' <|> li) - optional (htmlTag (~== TagClose "ol")) + optional (htmlTag (~== TagClose ("ol" :: Text))) let start = fromMaybe 1 $ safeRead $ fromAttrib "start" tag return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items) @@ -452,7 +455,7 @@ anyListStart :: PandocMonad m => MWParser m Char anyListStart = guardColumnOne >> oneOf "*#:;" li :: PandocMonad m => MWParser m Blocks -li = lookAhead (htmlTag (~== TagOpen "li" [])) *> +li = lookAhead (htmlTag (~== TagOpen ("li" :: Text) [])) *> (firstParaToPlain <$> blocksInTags "li") <* spaces listItem :: PandocMonad m => Char -> MWParser m Blocks @@ -464,13 +467,13 @@ listItem c = try $ do else do skipMany spaceChar pos' <- getPosition - first <- concat <$> manyTill listChunk newline + first <- T.concat <$> manyTill listChunk newline rest <- many (try $ string extras *> lookAhead listStartChar *> - (concat <$> manyTill listChunk newline)) + (T.concat <$> manyTill listChunk newline)) contents <- parseFromString (do setPosition pos' many1 $ listItem' c) - (unlines (first : rest)) + (T.unlines (first : rest)) case c of '*' -> return $ B.bulletList contents '#' -> return $ B.orderedList contents @@ -484,20 +487,20 @@ listItem c = try $ do -- }} -- * next list item -- which seems to be valid mediawiki. -listChunk :: PandocMonad m => MWParser m String -listChunk = template <|> count 1 anyChar +listChunk :: PandocMonad m => MWParser m Text +listChunk = template <|> countChar 1 anyChar listItem' :: PandocMonad m => Char -> MWParser m Blocks listItem' c = try $ do listStart c skipMany spaceChar pos' <- getPosition - first <- concat <$> manyTill listChunk newline + first <- T.concat <$> manyTill listChunk newline rest <- many (try $ char c *> lookAhead listStartChar *> - (concat <$> manyTill listChunk newline)) + (T.concat <$> manyTill listChunk newline)) parseFromString (do setPosition pos' firstParaToPlain . mconcat <$> many1 block) - $ unlines $ first : rest + $ T.unlines $ first : rest firstParaToPlain :: Blocks -> Blocks firstParaToPlain contents = @@ -528,23 +531,23 @@ inline = whitespace <|> special str :: PandocMonad m => MWParser m Inlines -str = B.str <$> many1 (noneOf $ specialChars ++ spaceChars) +str = B.str <$> many1Char (noneOf $ specialChars ++ spaceChars) math :: PandocMonad m => MWParser m Inlines -math = (B.displayMath . trim <$> try (many1 (char ':') >> charsInTags "math")) - <|> (B.math . trim <$> charsInTags "math") - <|> (B.displayMath . trim <$> try (dmStart *> manyTill anyChar dmEnd)) - <|> (B.math . trim <$> try (mStart *> manyTill (satisfy (/='\n')) mEnd)) +math = (B.displayMath . trim <$> try (many1 (char ':') >> textInTags "math")) + <|> (B.math . trim <$> textInTags "math") + <|> (B.displayMath . trim <$> try (dmStart *> manyTillChar anyChar dmEnd)) + <|> (B.math . trim <$> try (mStart *> manyTillChar (satisfy (/='\n')) mEnd)) where dmStart = string "\\[" dmEnd = try (string "\\]") mStart = string "\\(" mEnd = try (string "\\)") -variable :: PandocMonad m => MWParser m String +variable :: PandocMonad m => MWParser m Text variable = try $ do string "{{{" - contents <- manyTill anyChar (try $ string "}}}") - return $ "{{{" ++ contents ++ "}}}" + contents <- manyTillChar anyChar (try $ string "}}}") + return $ "{{{" <> contents <> "}}}" inlineTag :: PandocMonad m => MWParser m Inlines inlineTag = do @@ -553,11 +556,11 @@ inlineTag = do TagOpen "ref" _ -> B.note . B.plain <$> inlinesInTags "ref" TagOpen "nowiki" _ -> try $ do (_,raw) <- htmlTag (~== tag) - if '/' `elem` raw + if T.any (== '/') raw then return mempty else B.text . fromEntities <$> - manyTill anyChar (htmlTag (~== TagClose "nowiki")) - TagOpen "br" _ -> B.linebreak <$ (htmlTag (~== TagOpen "br" []) -- will get /> too + manyTillChar anyChar (htmlTag (~== TagClose ("nowiki" :: Text))) + TagOpen "br" _ -> B.linebreak <$ (htmlTag (~== TagOpen ("br" :: Text) []) -- will get /> too *> optional blankline) TagOpen "strike" _ -> B.strikeout <$> inlinesInTags "strike" TagOpen "del" _ -> B.strikeout <$> inlinesInTags "del" @@ -570,12 +573,12 @@ inlineTag = do result <- encode <$> inlinesInTags "tt" updateState $ \st -> st{ mwInTT = inTT } return result - TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> charsInTags "hask" + TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> textInTags "hask" _ -> B.rawInline "html" . snd <$> htmlTag (~== tag) special :: PandocMonad m => MWParser m Inlines -special = B.str <$> count 1 (notFollowedBy' (htmlTag isBlockTag') *> - oneOf specialChars) +special = B.str <$> countChar 1 (notFollowedBy' (htmlTag isBlockTag') *> + oneOf specialChars) inlineHtml :: PandocMonad m => MWParser m Inlines inlineHtml = B.rawInline "html" . snd <$> htmlTag isInlineTag' @@ -594,7 +597,7 @@ endline = () <$ try (newline <* notFollowedBy anyListStart) imageIdentifiers :: PandocMonad m => [MWParser m ()] -imageIdentifiers = [sym (identifier ++ ":") | identifier <- identifiers] +imageIdentifiers = [sym (identifier <> ":") | identifier <- identifiers] where identifiers = ["File", "Image", "Archivo", "Datei", "Fichier", "Bild"] @@ -602,9 +605,9 @@ image :: PandocMonad m => MWParser m Inlines image = try $ do sym "[[" choice imageIdentifiers - fname <- addUnderscores <$> many1 (noneOf "|]") + fname <- addUnderscores <$> many1Char (noneOf "|]") _ <- many imageOption - dims <- try (char '|' *> sepBy (many digit) (char 'x') <* string "px") + dims <- try (char '|' *> sepBy (manyChar digit) (char 'x') <* string "px") <|> return [] _ <- many imageOption let kvs = case dims of @@ -614,9 +617,9 @@ image = try $ do let attr = ("", [], kvs) caption <- (B.str fname <$ sym "]]") <|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]"))) - return $ B.imageWith attr fname ("fig:" ++ stringify caption) caption + return $ B.imageWith attr fname ("fig:" <> stringify caption) caption -imageOption :: PandocMonad m => MWParser m String +imageOption :: PandocMonad m => MWParser m Text imageOption = try $ char '|' *> opt where opt = try (oneOfStrings [ "border", "thumbnail", "frameless" @@ -624,30 +627,27 @@ imageOption = try $ char '|' *> opt , "center", "none", "baseline", "sub" , "super", "top", "text-top", "middle" , "bottom", "text-bottom" ]) - <|> try (string "frame") + <|> try (textStr "frame") <|> try (oneOfStrings ["link=","alt=","page=","class="] <* many (noneOf "|]")) -collapseUnderscores :: String -> String -collapseUnderscores [] = [] -collapseUnderscores ('_':'_':xs) = collapseUnderscores ('_':xs) -collapseUnderscores (x:xs) = x : collapseUnderscores xs - -addUnderscores :: String -> String -addUnderscores = collapseUnderscores . intercalate "_" . words +addUnderscores :: Text -> Text +addUnderscores = T.intercalate "_" . splitTextBy sep + where + sep c = isSpace c || c == '_' internalLink :: PandocMonad m => MWParser m Inlines internalLink = try $ do sym "[[" - pagename <- unwords . words <$> many (noneOf "|]") + pagename <- T.unwords . T.words <$> manyChar (noneOf "|]") label <- option (B.text pagename) $ char '|' *> ( (mconcat <$> many1 (notFollowedBy (char ']') *> inline)) -- the "pipe trick" -- [[Help:Contents|] -> "Contents" - <|> return (B.text $ drop 1 $ dropWhile (/=':') pagename) ) + <|> return (B.text $ T.drop 1 $ T.dropWhile (/=':') pagename) ) sym "]]" - linktrail <- B.text <$> many letter + linktrail <- B.text <$> manyChar letter let link = B.link (addUnderscores pagename) "wikilink" (label <> linktrail) - if "Category:" `isPrefixOf` pagename + if "Category:" `T.isPrefixOf` pagename then do updateState $ \st -> st{ mwCategoryLinks = link : mwCategoryLinks st } return mempty @@ -662,7 +662,7 @@ externalLink = try $ do <|> do char ']' num <- mwNextLinkNumber <$> getState updateState $ \st -> st{ mwNextLinkNumber = num + 1 } - return $ B.str $ show num + return $ B.str $ tshow num return $ B.link src "" lab url :: PandocMonad m => MWParser m Inlines diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index b8cbe2f26..4ade61294 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -1,6 +1,7 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE TupleSections #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Muse Copyright : Copyright (C) 2017-2019 Alexander Krotov @@ -24,12 +25,12 @@ import Control.Monad.Reader import Control.Monad.Except (throwError) import Data.Bifunctor import Data.Default -import Data.List (intercalate, transpose, uncons) -import Data.List.Split (splitOn) +import Data.List (transpose, uncons) import qualified Data.Map as M import qualified Data.Set as Set import Data.Maybe (fromMaybe, isNothing, maybeToList) -import Data.Text (Text, unpack) +import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Builder (Blocks, Inlines) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad (..)) @@ -38,7 +39,7 @@ import Text.Pandoc.Error (PandocError (PandocParsecError)) import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (F) -import Text.Pandoc.Shared (crFilter, trimr, underlineSpan) +import Text.Pandoc.Shared (crFilter, trimr, underlineSpan, tshow) -- | Read Muse from an input string and return a Pandoc document. readMuse :: PandocMonad m @@ -49,18 +50,18 @@ readMuse opts s = do let input = crFilter s res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts } "source" input case res of - Left e -> throwError $ PandocParsecError (unpack input) e + Left e -> throwError $ PandocParsecError input e Right d -> return d type F = Future MuseState data MuseState = MuseState { museMeta :: F Meta -- ^ Document metadata , museOptions :: ReaderOptions - , museIdentifierList :: Set.Set String + , museIdentifierList :: Set.Set Text , museLastSpacePos :: Maybe SourcePos -- ^ Position after last space or newline parsed , museLastStrPos :: Maybe SourcePos -- ^ Position after last str parsed , museLogMessages :: [LogMessage] - , museNotes :: M.Map String (SourcePos, F Blocks) + , museNotes :: M.Map Text (SourcePos, F Blocks) } instance Default MuseState where @@ -116,22 +117,27 @@ parseMuse = do -- * Utility functions -- | Trim up to one newline from the beginning of the string. -lchop :: String -> String -lchop ('\n':xs) = xs -lchop s = s +lchop :: Text -> Text +lchop s = case T.uncons s of + Just ('\n', xs) -> xs + _ -> s -- | Trim up to one newline from the end of the string. -rchop :: String -> String -rchop = reverse . lchop . reverse +rchop :: Text -> Text +rchop s = case T.unsnoc s of + Just (xs, '\n') -> xs + _ -> s -unindent :: String -> String -unindent = rchop . intercalate "\n" . dropSpacePrefix . splitOn "\n" . lchop +unindent :: Text -> Text +unindent = rchop . T.intercalate "\n" . dropSpacePrefix . T.splitOn "\n" . lchop -dropSpacePrefix :: [String] -> [String] -dropSpacePrefix lns = drop maxIndent <$> lns +dropSpacePrefix :: [Text] -> [Text] +dropSpacePrefix lns = T.drop maxIndent <$> lns where isSpaceChar c = c == ' ' || c == '\t' - maxIndent = length $ takeWhile (isSpaceChar . head) $ takeWhile same $ transpose lns - same = and . (zipWith (==) <*> drop 1) + maxIndent = length $ takeWhile (isSpaceChar . T.head) $ takeWhile same $ T.transpose lns + same t = case T.uncons t of + Just (c, cs) -> T.all (== c) cs + Nothing -> True atStart :: PandocMonad m => MuseParser m () atStart = do @@ -160,29 +166,29 @@ getIndent = subtract 1 . sourceColumn <$ many spaceChar <*> getPosition -- ** HTML parsers -openTag :: PandocMonad m => String -> MuseParser m [(String, String)] +openTag :: PandocMonad m => Text -> MuseParser m [(Text, Text)] openTag tag = try $ - char '<' *> string tag *> manyTill attr (char '>') + char '<' *> textStr tag *> manyTill attr (char '>') where attr = try $ (,) <$ many1 spaceChar - <*> many1 (noneOf "=\n") + <*> many1Char (noneOf "=\n") <* string "=\"" - <*> manyTill (noneOf "\"") (char '"') + <*> manyTillChar (noneOf "\"") (char '"') -closeTag :: PandocMonad m => String -> MuseParser m () -closeTag tag = try $ string "</" *> string tag *> void (char '>') +closeTag :: PandocMonad m => Text -> MuseParser m () +closeTag tag = try $ string "</" *> textStr tag *> void (char '>') -- | Convert HTML attributes to Pandoc 'Attr' -htmlAttrToPandoc :: [(String, String)] -> Attr +htmlAttrToPandoc :: [(Text, Text)] -> Attr htmlAttrToPandoc attrs = (ident, classes, keyvals) where ident = fromMaybe "" $ lookup "id" attrs - classes = maybe [] words $ lookup "class" attrs + classes = maybe [] T.words $ lookup "class" attrs keyvals = [(k,v) | (k,v) <- attrs, k /= "id", k /= "class"] parseHtmlContent :: PandocMonad m - => String -- ^ Tag name + => Text -- ^ Tag name -> MuseParser m (Attr, F Blocks) parseHtmlContent tag = try $ getIndent >>= \indent -> (,) <$> fmap htmlAttrToPandoc (openTag tag) @@ -193,16 +199,16 @@ parseHtmlContent tag = try $ getIndent >>= \indent -> (,) -- ** Directive parsers -- While not documented, Emacs Muse allows "-" in directive name -parseDirectiveKey :: PandocMonad m => MuseParser m String -parseDirectiveKey = char '#' *> many (letter <|> char '-') +parseDirectiveKey :: PandocMonad m => MuseParser m Text +parseDirectiveKey = char '#' *> manyChar (letter <|> char '-') -parseEmacsDirective :: PandocMonad m => MuseParser m (String, F Inlines) +parseEmacsDirective :: PandocMonad m => MuseParser m (Text, F Inlines) parseEmacsDirective = (,) <$> parseDirectiveKey <* spaceChar <*> (trimInlinesF . mconcat <$> manyTill inline' eol) -parseAmuseDirective :: PandocMonad m => MuseParser m (String, F Inlines) +parseAmuseDirective :: PandocMonad m => MuseParser m (Text, F Inlines) parseAmuseDirective = (,) <$> parseDirectiveKey <* many1 spaceChar @@ -289,7 +295,7 @@ listItemContentsUntil col pre end = p parseBlock :: PandocMonad m => MuseParser m (F Blocks) parseBlock = do res <- blockElements <|> para - trace (take 60 $ show $ B.toList $ runF res def) + trace (T.take 60 $ tshow $ B.toList $ runF res def) return res where para = fst <$> paraUntil (try (eof <|> void (lookAhead blockElements))) @@ -337,7 +343,7 @@ pagebreak = try $ pure (B.divWith ("", [], [("style", "page-break-before: always <* string "* * * * *" <* manyTill spaceChar eol -headingStart :: PandocMonad m => MuseParser m (String, Int) +headingStart :: PandocMonad m => MuseParser m (Text, Int) headingStart = try $ (,) <$> option "" (try (parseAnchor <* manyTill spaceChar eol)) <* firstColumn @@ -371,14 +377,14 @@ example :: PandocMonad m => MuseParser m (F Blocks) example = try $ pure . B.codeBlock <$ string "{{{" <* many spaceChar - <*> (unindent <$> manyTill anyChar (string "}}}")) + <*> (unindent <$> manyTillChar anyChar (string "}}}")) -- | Parse an @\<example>@ tag. exampleTag :: PandocMonad m => MuseParser m (F Blocks) exampleTag = try $ fmap pure $ B.codeBlockWith <$ many spaceChar <*> (htmlAttrToPandoc <$> openTag "example") - <*> (unindent <$> manyTill anyChar (closeTag "example")) + <*> (unindent <$> manyTillChar anyChar (closeTag "example")) <* manyTill spaceChar eol -- | Parse a @\<literal>@ tag as a raw block. @@ -388,7 +394,7 @@ literalTag = try $ fmap pure $ B.rawBlock <$ many spaceChar <*> (fromMaybe "html" . lookup "style" <$> openTag "literal") -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML <* manyTill spaceChar eol - <*> (unindent <$> manyTill anyChar (closeTag "literal")) + <*> (unindent <$> manyTillChar anyChar (closeTag "literal")) <* manyTill spaceChar eol -- | Parse @\<center>@ tag. @@ -428,7 +434,7 @@ playTag = do verseLine :: PandocMonad m => MuseParser m (F Inlines) verseLine = (<>) - <$> fmap pure (option mempty (B.str <$> many1 ('\160' <$ char ' '))) + <$> fmap pure (option mempty (B.str <$> many1Char ('\160' <$ char ' '))) <*> fmap (trimInlinesF . mconcat) (manyTill inline' eol) -- | Parse @\<verse>@ tag. @@ -466,17 +472,17 @@ paraUntil end = do noteMarker' :: PandocMonad m => Char -> Char - -> MuseParser m String -noteMarker' l r = try $ (\x y -> l:x:y ++ [r]) + -> MuseParser m Text +noteMarker' l r = try $ (\x y -> T.pack $ l:x:y ++ [r]) <$ char l <*> oneOf "123456789" <*> manyTill digit (char r) -noteMarker :: PandocMonad m => MuseParser m String +noteMarker :: PandocMonad m => MuseParser m Text noteMarker = noteMarker' '[' ']' <|> noteMarker' '{' '}' addNote :: PandocMonad m - => String + => Text -> SourcePos -> F Blocks -> MuseParser m () @@ -674,15 +680,15 @@ museGridTableRow :: PandocMonad m -> MuseParser m (F [Blocks]) museGridTableRow indent indices = try $ do lns <- many1 $ try (indentWith indent *> museGridTableRawLine indices) - let cols = map (unlines . map trimr) $ transpose lns + let cols = map (T.unlines . map trimr) $ transpose lns indentWith indent *> museGridTableHeader sequence <$> mapM (parseFromString' parseBlocks) cols museGridTableRawLine :: PandocMonad m => [Int] - -> MuseParser m [String] + -> MuseParser m [Text] museGridTableRawLine indices = - char '|' *> forM indices (\n -> count n anyChar <* char '|') <* manyTill spaceChar eol + char '|' *> forM indices (\n -> countChar n anyChar <* char '|') <* manyTill spaceChar eol museGridTable :: PandocMonad m => MuseParser m (F Blocks) museGridTable = try $ do @@ -767,12 +773,12 @@ inline = endline <|> inline' endline :: PandocMonad m => MuseParser m (F Inlines) endline = try $ pure B.softbreak <$ newline <* notFollowedBy blankline <* updateLastSpacePos -parseAnchor :: PandocMonad m => MuseParser m String -parseAnchor = try $ (:) +parseAnchor :: PandocMonad m => MuseParser m Text +parseAnchor = try $ T.cons <$ firstColumn <* char '#' <*> letter - <*> many (letter <|> digit <|> char '-') + <*> manyChar (letter <|> digit <|> char '-') anchor :: PandocMonad m => MuseParser m (F Inlines) anchor = try $ do @@ -813,7 +819,7 @@ emphasisBetween p = try $ trimInlinesF . mconcat -- | Parse an inline tag, such as @\<em>@ and @\<strong>@. inlineTag :: PandocMonad m - => String -- ^ Tag name + => Text -- ^ Tag name -> MuseParser m (F Inlines) inlineTag tag = try $ mconcat <$ openTag tag @@ -862,12 +868,12 @@ strikeoutTag = fmap B.strikeout <$> inlineTag "del" verbatimTag :: PandocMonad m => MuseParser m (F Inlines) verbatimTag = return . B.text <$ openTag "verbatim" - <*> manyTill anyChar (closeTag "verbatim") + <*> manyTillChar anyChar (closeTag "verbatim") -- | Parse @\<class>@ tag. classTag :: PandocMonad m => MuseParser m (F Inlines) classTag = do - classes <- maybe [] words . lookup "name" <$> openTag "class" + classes <- maybe [] T.words . lookup "name" <$> openTag "class" fmap (B.spanWith ("", classes, [])) . mconcat <$> manyTill inline (closeTag "class") -- | Parse @\<\<\<RTL>>>@ text. @@ -886,43 +892,43 @@ nbsp = try $ pure (B.str "\160") <$ string "~~" -- | Parse code markup, indicated by @\'=\'@ characters. code :: PandocMonad m => MuseParser m (F Inlines) -code = try $ fmap pure $ B.code . uncurry (++) +code = try $ fmap pure $ B.code . uncurry (<>) <$ atStart <* char '=' <* notFollowedBy (spaceChar <|> newline) - <*> manyUntil (noneOf "\n\r" <|> (newline <* notFollowedBy newline)) (try $ fmap pure $ noneOf " \t\n\r=" <* char '=') + <*> manyUntilChar (noneOf "\n\r" <|> (newline <* notFollowedBy newline)) (try $ fmap T.singleton $ noneOf " \t\n\r=" <* char '=') <* notFollowedBy alphaNum -- | Parse @\<code>@ tag. codeTag :: PandocMonad m => MuseParser m (F Inlines) codeTag = fmap pure $ B.codeWith <$> (htmlAttrToPandoc <$> openTag "code") - <*> manyTill anyChar (closeTag "code") + <*> manyTillChar anyChar (closeTag "code") -- | Parse @\<math>@ tag. -- @\<math>@ tag is an Emacs Muse extension enabled by @(require 'muse-latex2png)@ mathTag :: PandocMonad m => MuseParser m (F Inlines) mathTag = return . B.math <$ openTag "math" - <*> manyTill anyChar (closeTag "math") + <*> manyTillChar anyChar (closeTag "math") -- | Parse inline @\<literal>@ tag as a raw inline. inlineLiteralTag :: PandocMonad m => MuseParser m (F Inlines) inlineLiteralTag = try $ fmap pure $ B.rawInline <$> (fromMaybe "html" . lookup "style" <$> openTag "literal") -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML - <*> manyTill anyChar (closeTag "literal") + <*> manyTillChar anyChar (closeTag "literal") str :: PandocMonad m => MuseParser m (F Inlines) -str = return . B.str <$> many1 alphaNum <* updateLastStrPos +str = return . B.str <$> many1Char alphaNum <* updateLastStrPos -- | Consume asterisks that were not used as emphasis opening. -- This prevents series of asterisks from being split into -- literal asterisk and emphasis opening. asterisks :: PandocMonad m => MuseParser m (F Inlines) -asterisks = pure . B.str <$> many1 (char '*') +asterisks = pure . B.str <$> many1Char (char '*') symbol :: PandocMonad m => MuseParser m (F Inlines) -symbol = pure . B.str . pure <$> nonspaceChar +symbol = pure . B.str . T.singleton <$> nonspaceChar -- | Parse a link or image. linkOrImage :: PandocMonad m => MuseParser m (F Inlines) @@ -934,12 +940,12 @@ linkContent = trimInlinesF . mconcat <*> manyTill inline (char ']') -- | Parse a link starting with (possibly null) prefix -link :: PandocMonad m => String -> MuseParser m (F Inlines) +link :: PandocMonad m => Text -> MuseParser m (F Inlines) link prefix = try $ do inLink <- asks museInLink guard $ not inLink - string $ "[[" ++ prefix - url <- manyTill anyChar $ char ']' + textStr $ "[[" <> prefix + url <- manyTillChar anyChar $ char ']' content <- option (pure $ B.str url) (local (\s -> s { museInLink = True }) linkContent) char ']' return $ B.link url "" <$> content @@ -947,27 +953,27 @@ link prefix = try $ do image :: PandocMonad m => MuseParser m (F Inlines) image = try $ do string "[[" - (url, (ext, width, align)) <- manyUntil (noneOf "]") (imageExtensionAndOptions <* char ']') + (url, (ext, width, align)) <- manyUntilChar (noneOf "]") (imageExtensionAndOptions <* char ']') content <- option mempty linkContent char ']' let widthAttr = case align of - Just 'f' -> [("width", fromMaybe "100" width ++ "%"), ("height", "75%")] - _ -> maybeToList (("width",) . (++ "%") <$> width) + Just 'f' -> [("width", fromMaybe "100" width <> "%"), ("height", "75%")] + _ -> maybeToList (("width",) . (<> "%") <$> width) let alignClass = case align of Just 'r' -> ["align-right"] Just 'l' -> ["align-left"] Just 'f' -> [] _ -> [] - return $ B.imageWith ("", alignClass, widthAttr) (url ++ ext) mempty <$> content + return $ B.imageWith ("", alignClass, widthAttr) (url <> ext) mempty <$> content where -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"] - imageExtension = choice (try . string <$> imageExtensions) + imageExtension = choice (try . textStr <$> imageExtensions) imageExtensionAndOptions = do ext <- imageExtension (width, align) <- option (Nothing, Nothing) imageAttrs return (ext, width, align) imageAttrs = (,) <$ many1 spaceChar - <*> optionMaybe (many1 digit) + <*> optionMaybe (many1Char digit) <* many spaceChar <*> optionMaybe (oneOf "rlf") diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs index 9e3c118d8..34d3c5e8f 100644 --- a/src/Text/Pandoc/Readers/Native.hs +++ b/src/Text/Pandoc/Readers/Native.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Native Copyright : Copyright (C) 2011-2019 John MacFarlane @@ -19,7 +20,7 @@ import Text.Pandoc.Options (ReaderOptions) import Text.Pandoc.Shared (safeRead) import Control.Monad.Except (throwError) -import Data.Text (Text, unpack) +import Data.Text (Text) import Text.Pandoc.Class import Text.Pandoc.Error @@ -38,18 +39,18 @@ readNative :: PandocMonad m -> Text -- ^ String to parse (assuming @'\n'@ line endings) -> m Pandoc readNative _ s = - case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead (unpack s)) of + case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) of Right doc -> return doc Left _ -> throwError $ PandocParseError "couldn't read native" readBlocks :: Text -> Either PandocError [Block] -readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead (unpack s)) +readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s) readBlock :: Text -> Either PandocError Block -readBlock s = maybe (Plain <$> readInlines s) Right (safeRead (unpack s)) +readBlock s = maybe (Plain <$> readInlines s) Right (safeRead s) readInlines :: Text -> Either PandocError [Inline] -readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead (unpack s)) +readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead s) readInline :: Text -> Either PandocError Inline -readInline s = maybe (Left . PandocParseError $ "Could not read: " ++ unpack s) Right (safeRead (unpack s)) +readInline s = maybe (Left . PandocParseError $ "Could not read: " <> s) Right (safeRead s) diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index 2c3b0367f..5330b0238 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.OPML Copyright : Copyright (C) 2013-2019 John MacFarlane @@ -18,7 +19,8 @@ import Data.Char (toUpper) import Data.Default import Data.Generics import Data.Maybe (fromMaybe) -import Data.Text (Text, pack, unpack) +import Data.Text (Text) +import qualified Data.Text as T import Text.HTML.TagSoup.Entity (lookupEntity) import Text.Pandoc.Builder import Text.Pandoc.Class (PandocMonad) @@ -50,7 +52,7 @@ readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readOPML opts inp = do (bs, st') <- runStateT (mapM parseBlock $ normalizeTree $ - parseXML (unpack (crFilter inp))) def{ opmlOptions = opts } + parseXML (T.unpack (crFilter inp))) def{ opmlOptions = opts } return $ setTitle (opmlDocTitle st') $ setAuthors (opmlDocAuthors st') $ @@ -76,23 +78,26 @@ convertEntity :: String -> String convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e) -- convenience function to get an attribute value, defaulting to "" -attrValue :: String -> Element -> String +attrValue :: String -> Element -> Text attrValue attr elt = - fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) + maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) + +textContent :: Element -> Text +textContent = T.pack . strContent -- exceptT :: PandocMonad m => Either PandocError a -> OPML m a -- exceptT = either throwError return -asHtml :: PandocMonad m => String -> OPML m Inlines +asHtml :: PandocMonad m => Text -> OPML m Inlines asHtml s = do opts <- gets opmlOptions - Pandoc _ bs <- readHtml def{ readerExtensions = readerExtensions opts } (pack s) + Pandoc _ bs <- readHtml def{ readerExtensions = readerExtensions opts } s return $ blocksToInlines' bs -asMarkdown :: PandocMonad m => String -> OPML m Blocks +asMarkdown :: PandocMonad m => Text -> OPML m Blocks asMarkdown s = do opts <- gets opmlOptions - Pandoc _ bs <- readMarkdown def{ readerExtensions = readerExtensions opts } (pack s) + Pandoc _ bs <- readMarkdown def{ readerExtensions = readerExtensions opts } s return $ fromList bs getBlocks :: PandocMonad m => Element -> OPML m Blocks @@ -102,11 +107,11 @@ parseBlock :: PandocMonad m => Content -> OPML m Blocks parseBlock (Elem e) = case qName (elName e) of "ownerName" -> mempty <$ modify (\st -> - st{opmlDocAuthors = [text $ strContent e]}) + st{opmlDocAuthors = [text $ textContent e]}) "dateModified" -> mempty <$ modify (\st -> - st{opmlDocDate = text $ strContent e}) + st{opmlDocDate = text $ textContent e}) "title" -> mempty <$ modify (\st -> - st{opmlDocTitle = text $ strContent e}) + st{opmlDocTitle = text $ textContent e}) "outline" -> gets opmlSectionLevel >>= sect . (+1) "?xml" -> return mempty _ -> getBlocks e @@ -115,7 +120,7 @@ parseBlock (Elem e) = modify $ \st -> st{ opmlSectionLevel = n } bs <- getBlocks e modify $ \st -> st{ opmlSectionLevel = n - 1 } - let headerText' = case map toUpper (attrValue "type" e) of + let headerText' = case T.toUpper (attrValue "type" e) of "LINK" -> link (attrValue "url" e) "" headerText _ -> headerText diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs index dfa019932..f9b78d5bf 100644 --- a/src/Text/Pandoc/Readers/Odt.hs +++ b/src/Text/Pandoc/Readers/Odt.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Reader.Odt Copyright : Copyright (C) 2015 Martin Linnemann diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs index d8e5ba272..ff8cdc5fa 100644 --- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs +++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs @@ -1,11 +1,12 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE Arrows #-} -{-# LANGUAGE DeriveFoldable #-} +{-# LANGUAGE Arrows #-} +{-# LANGUAGE DeriveFoldable #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# LANGUAGE PatternGuards #-} -{-# LANGUAGE RecordWildCards #-} -{-# LANGUAGE TupleSections #-} -{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Odt.ContentReader Copyright : Copyright (C) 2015 Martin Linnemann @@ -29,8 +30,9 @@ import Control.Arrow import qualified Data.ByteString.Lazy as B import Data.Foldable (fold) -import Data.List (find, intercalate, stripPrefix) +import Data.List (find, stripPrefix) import qualified Data.Map as M +import qualified Data.Text as T import Data.Maybe import Data.Semigroup (First(..), Option(..)) @@ -59,7 +61,7 @@ import qualified Data.Set as Set -- State -------------------------------------------------------------------------------- -type Anchor = String +type Anchor = T.Text type Media = [(FilePath, B.ByteString)] data ReaderState @@ -204,21 +206,21 @@ updateMediaWithResource = keepingTheValue ( ) >>^ fst -lookupResource :: OdtReaderSafe String (FilePath, B.ByteString) +lookupResource :: OdtReaderSafe FilePath (FilePath, B.ByteString) lookupResource = proc target -> do state <- getExtraState -< () case lookup target (getMediaEnv state) of Just bs -> returnV (target, bs) -<< () Nothing -> returnV ("", B.empty) -< () -type AnchorPrefix = String +type AnchorPrefix = T.Text -- | An adaptation of 'uniqueIdent' from "Text.Pandoc.Shared" that generates a -- unique identifier but without assuming that the id should be for a header. -- Second argument is a list of already used identifiers. uniqueIdentFrom :: AnchorPrefix -> [Anchor] -> Anchor uniqueIdentFrom baseIdent usedIdents = - let numIdent n = baseIdent ++ "-" ++ show n + let numIdent n = baseIdent <> "-" <> T.pack (show n) in if baseIdent `elem` usedIdents then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of Just x -> numIdent x @@ -305,7 +307,7 @@ withNewStyle a = proc x -> do isCodeStyle _ = False inlineCode :: Inlines -> Inlines - inlineCode = code . intercalate "" . map stringify . toList + inlineCode = code . T.concat . map stringify . toList type PropertyTriple = (ReaderState, TextProperties, Maybe StyleFamily) type InlineModifier = Inlines -> Inlines @@ -535,7 +537,6 @@ matchChildContent :: (Monoid result) -> OdtReaderSafe _x result matchChildContent ls fallback = returnV mempty >>> matchContent ls fallback - -------------------------------------------- -- Matchers -------------------------------------------- @@ -556,8 +557,8 @@ read_plain_text = fst ^&&& read_plain_text' >>% recover ) >>?% mappend -- - extractText :: XML.Content -> Fallible String - extractText (XML.Text cData) = succeedWith (XML.cdData cData) + extractText :: XML.Content -> Fallible T.Text + extractText (XML.Text cData) = succeedWith (T.pack $ XML.cdData cData) extractText _ = failEmpty read_text_seq :: InlineMatcher @@ -675,8 +676,8 @@ read_list_item = matchingElement NsText "list-item" read_link :: InlineMatcher read_link = matchingElement NsText "a" $ liftA3 link - ( findAttrWithDefault NsXLink "href" "" ) - ( findAttrWithDefault NsOffice "title" "" ) + ( findAttrTextWithDefault NsXLink "href" "" ) + ( findAttrTextWithDefault NsOffice "title" "" ) ( matchChildContent [ read_span , read_note , read_citation @@ -709,12 +710,12 @@ read_citation :: InlineMatcher read_citation = matchingElement NsText "bibliography-mark" $ liftA2 cite ( liftA2 makeCitation - ( findAttrWithDefault NsText "identifier" "" ) + ( findAttrTextWithDefault NsText "identifier" "" ) ( readAttrWithDefault NsText "number" 0 ) ) ( matchChildContent [] read_plain_text ) where - makeCitation :: String -> Int -> [Citation] + makeCitation :: T.Text -> Int -> [Citation] makeCitation citeId num = [Citation citeId [] [] NormalCitation num 0] @@ -779,17 +780,17 @@ read_frame_img = let exts = extensionsFromList [Ext_auto_identifiers] resource <- lookupResource -< src' _ <- updateMediaWithResource -< resource - w <- findAttr' NsSVG "width" -< () - h <- findAttr' NsSVG "height" -< () + w <- findAttrText' NsSVG "width" -< () + h <- findAttrText' NsSVG "height" -< () titleNodes <- matchChildContent' [ read_frame_title ] -< () alt <- matchChildContent [] read_plain_text -< () arr (firstMatch . uncurry4 imageWith) -< - (image_attributes w h, src', inlineListToIdentifier exts (toList titleNodes), alt) + (image_attributes w h, T.pack src', inlineListToIdentifier exts (toList titleNodes), alt) read_frame_title :: InlineMatcher read_frame_title = matchingElement NsSVG "title" (matchChildContent [] read_plain_text) -image_attributes :: Maybe String -> Maybe String -> Attr +image_attributes :: Maybe T.Text -> Maybe T.Text -> Attr image_attributes x y = ( "", [], (dim "width" x) ++ (dim "height" y)) where @@ -806,7 +807,7 @@ read_frame_mathml = src' -> do let path = fromMaybe src' (stripPrefix "./" src') ++ "/content.xml" (_, mathml) <- lookupResource -< path - case readMathML (UTF8.toString $ B.toStrict mathml) of + case readMathML (UTF8.toText $ B.toStrict mathml) of Left _ -> returnV mempty -< () Right exps -> arr (firstMatch . displayMath . writeTeX) -< exps @@ -817,9 +818,9 @@ read_frame_text_box = proc box -> do read_img_with_caption :: [Block] -> FirstMatch Inlines read_img_with_caption (Para [Image attr alt (src,title)] : _) = - firstMatch $ singleton (Image attr alt (src, 'f':'i':'g':':':title)) -- no text, default caption + firstMatch $ singleton (Image attr alt (src, "fig:" <> title)) -- no text, default caption read_img_with_caption (Para (Image attr _ (src,title) : txt) : _) = - firstMatch $ singleton (Image attr txt (src, 'f':'i':'g':':':title) ) -- override caption with the text that follows + firstMatch $ singleton (Image attr txt (src, "fig:" <> title) ) -- override caption with the text that follows read_img_with_caption ( Para (_ : xs) : ys) = read_img_with_caption (Para xs : ys) read_img_with_caption _ = @@ -829,12 +830,12 @@ read_img_with_caption _ = -- Internal links ---------------------- -_ANCHOR_PREFIX_ :: String +_ANCHOR_PREFIX_ :: T.Text _ANCHOR_PREFIX_ = "anchor" -- readAnchorAttr :: OdtReader _x Anchor -readAnchorAttr = findAttr NsText "name" +readAnchorAttr = findAttrText NsText "name" -- | Beware: may fail findAnchorName :: OdtReader AnchorPrefix Anchor @@ -875,7 +876,7 @@ read_reference_start = matchingElement NsText "reference-mark-start" -- | Beware: may fail findAnchorRef :: OdtReader _x Anchor -findAnchorRef = ( findAttr NsText "ref-name" +findAnchorRef = ( findAttrText NsText "ref-name" >>?^ (_ANCHOR_PREFIX_,) ) >>?! getPrettyAnchor @@ -890,7 +891,7 @@ maybeInAnchorRef = proc inlines -> do Left _ -> returnA -< inlines where toAnchorRef :: Anchor -> Inlines -> Inlines - toAnchorRef anchor = link ('#':anchor) "" -- no title + toAnchorRef anchor = link ("#" <> anchor) "" -- no title -- read_bookmark_ref :: InlineMatcher diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs index ccbaf6fc4..59d1b8abd 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs @@ -38,8 +38,11 @@ module Text.Pandoc.Readers.Odt.Generic.XMLConverter , lookupAttr' , lookupDefaultingAttr , findAttr' +, findAttrText' , findAttr +, findAttrText , findAttrWithDefault +, findAttrTextWithDefault , readAttr , readAttr' , readAttrWithDefault @@ -59,6 +62,7 @@ import Control.Arrow import Data.Either ( rights ) import qualified Data.Map as M +import qualified Data.Text as T import Data.Default import Data.Maybe @@ -79,6 +83,7 @@ import Text.Pandoc.Readers.Odt.Generic.Fallible type ElementName = String type AttributeName = String type AttributeValue = String +type TextAttributeValue = T.Text -- type NameSpacePrefix = String @@ -466,6 +471,16 @@ findAttr' nsID attrName = qualifyName nsID attrName &&& getCurrentElement >>% XML.findAttr +-- | Return value as a (Maybe Text) +findAttrText' :: (NameSpaceID nsID) + => nsID -> AttributeName + -> XMLConverter nsID extraState x (Maybe TextAttributeValue) +findAttrText' nsID attrName + = qualifyName nsID attrName + &&& getCurrentElement + >>% XML.findAttr + >>^ fmap T.pack + -- | Return value as string or fail findAttr :: (NameSpaceID nsID) => nsID -> AttributeName @@ -473,6 +488,15 @@ findAttr :: (NameSpaceID nsID) findAttr nsID attrName = findAttr' nsID attrName >>> maybeToChoice +-- | Return value as text or fail +findAttrText :: (NameSpaceID nsID) + => nsID -> AttributeName + -> FallibleXMLConverter nsID extraState x TextAttributeValue +findAttrText nsID attrName + = findAttr' nsID attrName + >>^ fmap T.pack + >>> maybeToChoice + -- | Return value as string or return provided default value findAttrWithDefault :: (NameSpaceID nsID) => nsID -> AttributeName @@ -482,6 +506,15 @@ findAttrWithDefault nsID attrName deflt = findAttr' nsID attrName >>^ fromMaybe deflt +-- | Return value as string or return provided default value +findAttrTextWithDefault :: (NameSpaceID nsID) + => nsID -> AttributeName + -> TextAttributeValue + -> XMLConverter nsID extraState x TextAttributeValue +findAttrTextWithDefault nsID attrName deflt + = findAttr' nsID attrName + >>^ maybe deflt T.pack + -- | Read and return value or fail readAttr :: (NameSpaceID nsID, Read attrValue) => nsID -> AttributeName diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs index 79e8d7aea..99fa05880 100644 --- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs +++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs @@ -548,11 +548,11 @@ readListLevelStyle :: ListLevelType -> StyleReader _x (Int, ListLevelStyle) readListLevelStyle levelType = readAttr NsText "level" >>?! keepingTheValue ( liftA5 toListLevelStyle - ( returnV levelType ) - ( findAttr' NsStyle "num-prefix" ) - ( findAttr' NsStyle "num-suffix" ) - ( getAttr NsStyle "num-format" ) - ( findAttr' NsText "start-value" ) + ( returnV levelType ) + ( findAttr' NsStyle "num-prefix" ) + ( findAttr' NsStyle "num-suffix" ) + ( getAttr NsStyle "num-format" ) + ( findAttrText' NsText "start-value" ) ) where toListLevelStyle _ p s LinfNone b = ListLevelStyle LltBullet p s LinfNone (startValue b) diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 2c88c7776..99ece152c 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org Copyright : Copyright (C) 2014-2019 Albert Krewinkel @@ -26,7 +27,6 @@ import Control.Monad.Except (throwError) import Control.Monad.Reader (runReaderT) import Data.Text (Text) -import qualified Data.Text as T -- | Parse org-mode string and return a Pandoc document. readOrg :: PandocMonad m @@ -36,7 +36,7 @@ readOrg :: PandocMonad m readOrg opts s = do parsed <- flip runReaderT def $ readWithM parseOrg (optionsToParserState opts) - (T.unpack (crFilter s) ++ "\n\n") + (crFilter s <> "\n\n") case parsed of Right result -> return result Left _ -> throwError $ PandocParseError "problem parsing org" diff --git a/src/Text/Pandoc/Readers/Org/BlockStarts.hs b/src/Text/Pandoc/Readers/Org/BlockStarts.hs index 58db4f46c..b4f3cc0d8 100644 --- a/src/Text/Pandoc/Readers/Org/BlockStarts.hs +++ b/src/Text/Pandoc/Readers/Org/BlockStarts.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.BlockStarts Copyright : Copyright (C) 2014-2019 Albert Krewinkel @@ -25,6 +26,8 @@ module Text.Pandoc.Readers.Org.BlockStarts import Prelude import Control.Monad (void) +import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Readers.Org.Parsing -- | Horizontal Line (five -- dashes or more) @@ -49,15 +52,15 @@ gridTableStart :: Monad m => OrgParser m () gridTableStart = try $ skipSpaces <* char '+' <* char '-' -latexEnvStart :: Monad m => OrgParser m String +latexEnvStart :: Monad m => OrgParser m Text latexEnvStart = try $ skipSpaces *> string "\\begin{" *> latexEnvName <* string "}" <* blankline where - latexEnvName :: Monad m => OrgParser m String - latexEnvName = try $ mappend <$> many1 alphaNum <*> option "" (string "*") + latexEnvName :: Monad m => OrgParser m Text + latexEnvName = try $ mappend <$> many1Char alphaNum <*> option "" (textStr "*") bulletListStart :: Monad m => OrgParser m Int bulletListStart = try $ do @@ -68,7 +71,7 @@ bulletListStart = try $ do return (ind + 1) genericListStart :: Monad m - => OrgParser m String + => OrgParser m Text -> OrgParser m Int genericListStart listMarker = try $ do ind <- length <$> many spaceChar @@ -82,11 +85,11 @@ eol = void (char '\n') orderedListStart :: Monad m => OrgParser m Int orderedListStart = genericListStart orderedListMarker -- Ordered list markers allowed in org-mode - where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)") + where orderedListMarker = T.snoc <$> many1Char digit <*> oneOf ".)" -drawerStart :: Monad m => OrgParser m String +drawerStart :: Monad m => OrgParser m Text drawerStart = try $ skipSpaces *> drawerName <* skipSpaces <* newline - where drawerName = char ':' *> manyTill nonspaceChar (char ':') + where drawerName = char ':' *> manyTillChar nonspaceChar (char ':') metaLineStart :: Monad m => OrgParser m () metaLineStart = try $ skipSpaces <* string "#+" @@ -99,12 +102,12 @@ commentLineStart = try $ exampleLineStart :: Monad m => OrgParser m () exampleLineStart = () <$ try (skipSpaces *> string ": ") -noteMarker :: Monad m => OrgParser m String +noteMarker :: Monad m => OrgParser m Text noteMarker = try $ do char '[' - choice [ many1Till digit (char ']') - , (++) <$> string "fn:" - <*> many1Till (noneOf "\n\r\t ") (char ']') + choice [ many1TillChar digit (char ']') + , (<>) <$> textStr "fn:" + <*> many1TillChar (noneOf "\n\r\t ") (char ']') ] -- | Succeeds if the parser is at the end of a block. diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index cba876f06..de51dec3d 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -1,6 +1,7 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.Blocks Copyright : Copyright (C) 2014-2019 Albert Krewinkel @@ -23,7 +24,7 @@ import Text.Pandoc.Readers.Org.Inlines import Text.Pandoc.Readers.Org.Meta (metaExport, metaKey, metaLine) import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing -import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename, +import Text.Pandoc.Readers.Org.Shared (cleanLinkText, isImageFilename, originalLang, translateLang, exportsCode) import Text.Pandoc.Builder (Blocks, Inlines) @@ -33,11 +34,13 @@ import Text.Pandoc.Options import Text.Pandoc.Shared (compactify, compactifyDL, safeRead) import Control.Monad (foldM, guard, mzero, void) -import Data.Char (isSpace, toLower, toUpper) +import Data.Char (isSpace) import Data.Default (Default) -import Data.List (foldl', isPrefixOf) +import Data.List (foldl') import Data.Maybe (fromMaybe, isJust, isNothing) +import Data.Text (Text) +import qualified Data.Text as T import qualified Text.Pandoc.Builder as B import qualified Text.Pandoc.Walk as Walk @@ -90,10 +93,10 @@ horizontalRule = return B.horizontalRule <$ try hline -- | Attributes that may be added to figures (like a name or caption). data BlockAttributes = BlockAttributes - { blockAttrName :: Maybe String - , blockAttrLabel :: Maybe String + { blockAttrName :: Maybe Text + , blockAttrLabel :: Maybe Text , blockAttrCaption :: Maybe (F Inlines) - , blockAttrKeyValues :: [(String, String)] + , blockAttrKeyValues :: [(Text, Text)] } -- | Convert BlockAttributes into pandoc Attr @@ -103,14 +106,14 @@ attrFromBlockAttributes BlockAttributes{..} = ident = fromMaybe mempty $ lookup "id" blockAttrKeyValues classes = case lookup "class" blockAttrKeyValues of Nothing -> [] - Just clsStr -> words clsStr + Just clsStr -> T.words clsStr kv = filter ((`notElem` ["id", "class"]) . fst) blockAttrKeyValues in (ident, classes, kv) -stringyMetaAttribute :: Monad m => OrgParser m (String, String) +stringyMetaAttribute :: Monad m => OrgParser m (Text, Text) stringyMetaAttribute = try $ do metaLineStart - attrName <- map toUpper <$> many1Till nonspaceChar (char ':') + attrName <- T.toUpper <$> many1TillChar nonspaceChar (char ':') skipSpaces attrValue <- anyLine <|> ("" <$ newline) return (attrName, attrValue) @@ -129,8 +132,8 @@ blockAttributes = try $ do let label = lookup "LABEL" kv caption' <- case caption of Nothing -> return Nothing - Just s -> Just <$> parseFromString inlines (s ++ "\n") - kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs + Just s -> Just <$> parseFromString inlines (s <> "\n") + kvAttrs' <- parseFromString keyValues . (<> "\n") $ fromMaybe mempty kvAttrs return BlockAttributes { blockAttrName = name , blockAttrLabel = label @@ -138,31 +141,31 @@ blockAttributes = try $ do , blockAttrKeyValues = kvAttrs' } where - isBlockAttr :: String -> Bool + isBlockAttr :: Text -> Bool isBlockAttr = flip elem [ "NAME", "LABEL", "CAPTION" , "ATTR_HTML", "ATTR_LATEX" , "RESULTS" ] - appendValues :: String -> Maybe String -> (String, String) -> Maybe String + appendValues :: Text -> Maybe Text -> (Text, Text) -> Maybe Text appendValues attrName accValue (key, value) = if key /= attrName then accValue else case accValue of - Just acc -> Just $ acc ++ ' ':value + Just acc -> Just $ acc <> " " <> value Nothing -> Just value -- | Parse key-value pairs for HTML attributes -keyValues :: Monad m => OrgParser m [(String, String)] +keyValues :: Monad m => OrgParser m [(Text, Text)] keyValues = try $ manyTill ((,) <$> key <*> value) newline where - key :: Monad m => OrgParser m String - key = try $ skipSpaces *> char ':' *> many1 nonspaceChar + key :: Monad m => OrgParser m Text + key = try $ skipSpaces *> char ':' *> many1Char nonspaceChar - value :: Monad m => OrgParser m String - value = skipSpaces *> manyTill anyChar endOfValue + value :: Monad m => OrgParser m Text + value = skipSpaces *> manyTillChar anyChar endOfValue endOfValue :: Monad m => OrgParser m () endOfValue = @@ -180,7 +183,7 @@ orgBlock = try $ do blockAttrs <- blockAttributes blkType <- blockHeaderStart ($ blkType) $ - case map toLower blkType of + case T.toLower blkType of "export" -> exportBlock "comment" -> rawBlockLines (const mempty) "html" -> rawBlockLines (return . B.rawBlock (lowercase blkType)) @@ -194,13 +197,13 @@ orgBlock = try $ do let (ident, classes, kv) = attrFromBlockAttributes blockAttrs in fmap $ B.divWith (ident, classes ++ [blkType], kv) where - blockHeaderStart :: Monad m => OrgParser m String + blockHeaderStart :: Monad m => OrgParser m Text blockHeaderStart = try $ skipSpaces *> stringAnyCase "#+begin_" *> orgArgWord - lowercase :: String -> String - lowercase = map toLower + lowercase :: Text -> Text + lowercase = T.toLower -exampleBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks) +exampleBlock :: PandocMonad m => BlockAttributes -> Text -> OrgParser m (F Blocks) exampleBlock blockAttrs _label = do skipSpaces (classes, kv) <- switchesAsAttributes @@ -210,54 +213,54 @@ exampleBlock blockAttrs _label = do let codeBlck = B.codeBlockWith (id', "example":classes, kv) content return . return $ codeBlck -rawBlockLines :: Monad m => (String -> F Blocks) -> String -> OrgParser m (F Blocks) +rawBlockLines :: Monad m => (Text -> F Blocks) -> Text -> OrgParser m (F Blocks) rawBlockLines f blockType = ignHeaders *> (f <$> rawBlockContent blockType) -parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> String -> OrgParser m (F Blocks) +parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> Text -> OrgParser m (F Blocks) parseBlockLines f blockType = ignHeaders *> (f <$> parsedBlockContent) where parsedBlockContent :: PandocMonad m => OrgParser m (F Blocks) parsedBlockContent = try $ do raw <- rawBlockContent blockType - parseFromString blocks (raw ++ "\n") + parseFromString blocks (raw <> "\n") -- | Read the raw string content of a block -rawBlockContent :: Monad m => String -> OrgParser m String +rawBlockContent :: Monad m => Text -> OrgParser m Text rawBlockContent blockType = try $ do blkLines <- manyTill rawLine blockEnder tabLen <- getOption readerTabStop trimP <- orgStateTrimLeadBlkIndent <$> getState - let stripIndent strs = if trimP then map (drop (shortestIndent strs)) strs else strs - (unlines + let stripIndent strs = if trimP then map (T.drop (shortestIndent strs)) strs else strs + (T.unlines . stripIndent . map (tabsToSpaces tabLen . commaEscaped) $ blkLines) <$ updateState (\s -> s { orgStateTrimLeadBlkIndent = True }) where - rawLine :: Monad m => OrgParser m String + rawLine :: Monad m => OrgParser m Text rawLine = try $ ("" <$ blankline) <|> anyLine blockEnder :: Monad m => OrgParser m () blockEnder = try $ skipSpaces <* stringAnyCase ("#+end_" <> blockType) - shortestIndent :: [String] -> Int - shortestIndent = foldr (min . length . takeWhile isSpace) maxBound - . filter (not . null) - - tabsToSpaces :: Int -> String -> String - tabsToSpaces _ [] = [] - tabsToSpaces tabLen cs'@(c:cs) = - case c of - ' ' -> ' ':tabsToSpaces tabLen cs - '\t' -> replicate tabLen ' ' ++ tabsToSpaces tabLen cs - _ -> cs' - - commaEscaped :: String -> String - commaEscaped (',':cs@('*':_)) = cs - commaEscaped (',':cs@('#':'+':_)) = cs - commaEscaped (' ':cs) = ' ':commaEscaped cs - commaEscaped ('\t':cs) = '\t':commaEscaped cs - commaEscaped cs = cs + shortestIndent :: [Text] -> Int + shortestIndent = foldr (min . T.length . T.takeWhile isSpace) maxBound + . filter (not . T.null) + + tabsToSpaces :: Int -> Text -> Text + tabsToSpaces tabStop t = + let (ind, suff) = T.span (\c -> c == ' ' || c == '\t') t + tabNum = T.length $ T.filter (== '\n') ind + spaceNum = T.length ind - tabNum + in T.replicate (spaceNum + tabStop * tabNum) " " <> suff + + commaEscaped t = + let (ind, suff) = T.span (\c -> c == ' ' || c == '\t') t + in case T.uncons suff of + Just (',', cs) + | "*" <- T.take 1 cs -> ind <> cs + | "#+" <- T.take 2 cs -> ind <> cs + _ -> t -- | Read but ignore all remaining block headers. ignHeaders :: Monad m => OrgParser m () @@ -265,34 +268,34 @@ ignHeaders = (() <$ newline) <|> (() <$ anyLine) -- | Read a block containing code intended for export in specific backends -- only. -exportBlock :: Monad m => String -> OrgParser m (F Blocks) +exportBlock :: Monad m => Text -> OrgParser m (F Blocks) exportBlock blockType = try $ do exportType <- skipSpaces *> orgArgWord <* ignHeaders contents <- rawBlockContent blockType - returnF (B.rawBlock (map toLower exportType) contents) + returnF (B.rawBlock (T.toLower exportType) contents) -verseBlock :: PandocMonad m => String -> OrgParser m (F Blocks) +verseBlock :: PandocMonad m => Text -> OrgParser m (F Blocks) verseBlock blockType = try $ do ignHeaders content <- rawBlockContent blockType fmap B.lineBlock . sequence - <$> mapM parseVerseLine (lines content) + <$> mapM parseVerseLine (T.lines content) where -- replace initial spaces with nonbreaking spaces to preserve -- indentation, parse the rest as normal inline - parseVerseLine :: PandocMonad m => String -> OrgParser m (F Inlines) + parseVerseLine :: PandocMonad m => Text -> OrgParser m (F Inlines) parseVerseLine cs = do - let (initialSpaces, indentedLine) = span isSpace cs - let nbspIndent = if null initialSpaces + let (initialSpaces, indentedLine) = T.span isSpace cs + let nbspIndent = if T.null initialSpaces then mempty - else B.str $ map (const '\160') initialSpaces - line <- parseFromString inlines (indentedLine ++ "\n") + else B.str $ T.map (const '\160') initialSpaces + line <- parseFromString inlines (indentedLine <> "\n") return (trimInlinesF $ pure nbspIndent <> line) -- | Read a code block and the associated results block if present. Which of -- boths blocks is included in the output is determined using the "exports" -- argument in the block header. -codeBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks) +codeBlock :: PandocMonad m => BlockAttributes -> Text -> OrgParser m (F Blocks) codeBlock blockAttrs blockType = do skipSpaces (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders) @@ -314,7 +317,7 @@ codeBlock blockAttrs blockType = do labelledBlock :: F Inlines -> F Blocks labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], [])) - exportsResults :: [(String, String)] -> Bool + exportsResults :: [(Text, Text)] -> Bool exportsResults = maybe False (`elem` ["results", "both"]) . lookup "exports" -- | Parse the result of an evaluated babel code block. @@ -329,7 +332,7 @@ babelResultsBlock = try $ do resultsMarker = try . void $ stringAnyCase "#+RESULTS:" *> blankline -- | Parse code block arguments -codeHeaderArgs :: Monad m => OrgParser m ([String], [(String, String)]) +codeHeaderArgs :: Monad m => OrgParser m ([Text], [(Text, Text)]) codeHeaderArgs = try $ do language <- skipSpaces *> orgArgWord (switchClasses, switchKv) <- switchesAsAttributes @@ -338,14 +341,14 @@ codeHeaderArgs = try $ do , originalLang language <> switchKv <> parameters ) -switchesAsAttributes :: Monad m => OrgParser m ([String], [(String, String)]) +switchesAsAttributes :: Monad m => OrgParser m ([Text], [(Text, Text)]) switchesAsAttributes = try $ do switches <- skipSpaces *> try (switch `sepBy` many1 spaceChar) return $ foldr addToAttr ([], []) switches where - addToAttr :: (Char, Maybe String, SwitchPolarity) - -> ([String], [(String, String)]) - -> ([String], [(String, String)]) + addToAttr :: (Char, Maybe Text, SwitchPolarity) + -> ([Text], [(Text, Text)]) + -> ([Text], [(Text, Text)]) addToAttr ('n', lineNum, pol) (cls, kv) = let kv' = case lineNum of Just num -> ("startFrom", num):kv @@ -365,15 +368,15 @@ switchPolarity :: Monad m => OrgParser m SwitchPolarity switchPolarity = (SwitchMinus <$ char '-') <|> (SwitchPlus <$ char '+') -- | Parses a source block switch option. -switch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity) +switch :: Monad m => OrgParser m (Char, Maybe Text, SwitchPolarity) switch = try $ lineNumberSwitch <|> labelSwitch <|> whitespaceSwitch <|> simpleSwitch where simpleSwitch = (\pol c -> (c, Nothing, pol)) <$> switchPolarity <*> letter labelSwitch = genericSwitch 'l' $ - char '"' *> many1Till nonspaceChar (char '"') + char '"' *> many1TillChar nonspaceChar (char '"') -whitespaceSwitch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity) +whitespaceSwitch :: Monad m => OrgParser m (Char, Maybe Text, SwitchPolarity) whitespaceSwitch = do string "-i" updateState $ \s -> s { orgStateTrimLeadBlkIndent = False } @@ -382,8 +385,8 @@ whitespaceSwitch = do -- | Generic source block switch-option parser. genericSwitch :: Monad m => Char - -> OrgParser m String - -> OrgParser m (Char, Maybe String, SwitchPolarity) + -> OrgParser m Text + -> OrgParser m (Char, Maybe Text, SwitchPolarity) genericSwitch c p = try $ do polarity <- switchPolarity <* char c <* skipSpaces arg <- optionMaybe p @@ -391,17 +394,17 @@ genericSwitch c p = try $ do -- | Reads a line number switch option. The line number switch can be used with -- example and source blocks. -lineNumberSwitch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity) -lineNumberSwitch = genericSwitch 'n' (many digit) +lineNumberSwitch :: Monad m => OrgParser m (Char, Maybe Text, SwitchPolarity) +lineNumberSwitch = genericSwitch 'n' (manyChar digit) -blockOption :: Monad m => OrgParser m (String, String) +blockOption :: Monad m => OrgParser m (Text, Text) blockOption = try $ do argKey <- orgArgKey paramValue <- option "yes" orgParamValue return (argKey, paramValue) -orgParamValue :: Monad m => OrgParser m String -orgParamValue = try $ +orgParamValue :: Monad m => OrgParser m Text +orgParamValue = try $ fmap T.pack $ skipSpaces *> notFollowedBy orgArgKey *> noneOf "\n\r" `many1Till` endOfValue @@ -420,7 +423,7 @@ orgParamValue = try $ -- export setting. genericDrawer :: PandocMonad m => OrgParser m (F Blocks) genericDrawer = try $ do - name <- map toUpper <$> drawerStart + name <- T.toUpper <$> drawerStart content <- manyTill drawerLine (try drawerEnd) state <- getState -- Include drawer if it is explicitly included in or not explicitly excluded @@ -432,16 +435,16 @@ genericDrawer = try $ do Right names | name `notElem` names -> return mempty _ -> drawerDiv name <$> parseLines content where - parseLines :: PandocMonad m => [String] -> OrgParser m (F Blocks) - parseLines = parseFromString blocks . (++ "\n") . unlines + parseLines :: PandocMonad m => [Text] -> OrgParser m (F Blocks) + parseLines = parseFromString blocks . (<> "\n") . T.unlines - drawerDiv :: String -> F Blocks -> F Blocks + drawerDiv :: Text -> F Blocks -> F Blocks drawerDiv drawerName = fmap $ B.divWith (mempty, [drawerName, "drawer"], mempty) -drawerLine :: Monad m => OrgParser m String +drawerLine :: Monad m => OrgParser m Text drawerLine = anyLine -drawerEnd :: Monad m => OrgParser m String +drawerEnd :: Monad m => OrgParser m Text drawerEnd = try $ skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline @@ -456,17 +459,17 @@ figure :: PandocMonad m => OrgParser m (F Blocks) figure = try $ do figAttrs <- blockAttributes src <- skipSpaces *> selfTarget <* skipSpaces <* endOfParagraph - case cleanLinkString src of + case cleanLinkText src of Nothing -> mzero Just imgSrc -> do guard (isImageFilename imgSrc) let isFigure = isJust $ blockAttrCaption figAttrs return $ imageBlock isFigure figAttrs imgSrc where - selfTarget :: PandocMonad m => OrgParser m String + selfTarget :: PandocMonad m => OrgParser m Text selfTarget = try $ char '[' *> linkTarget <* char ']' - imageBlock :: Bool -> BlockAttributes -> String -> F Blocks + imageBlock :: Bool -> BlockAttributes -> Text -> F Blocks imageBlock isFigure figAttrs imgSrc = let figName = fromMaybe mempty $ blockAttrName figAttrs @@ -478,11 +481,11 @@ figure = try $ do in B.para . B.imageWith attr imgSrc figTitle <$> figCaption - withFigPrefix :: String -> String + withFigPrefix :: Text -> Text withFigPrefix cs = - if "fig:" `isPrefixOf` cs + if "fig:" `T.isPrefixOf` cs then cs - else "fig:" ++ cs + else "fig:" <> cs -- | Succeeds if looking at the end of the current paragraph endOfParagraph :: Monad m => OrgParser m () @@ -495,12 +498,12 @@ endOfParagraph = try $ skipSpaces *> newline *> endOfBlock -- | Example code marked up by a leading colon. example :: Monad m => OrgParser m (F Blocks) -example = try $ returnF . exampleCode =<< unlines <$> many1 exampleLine +example = try $ returnF . exampleCode =<< T.unlines <$> many1 exampleLine where - exampleLine :: Monad m => OrgParser m String + exampleLine :: Monad m => OrgParser m Text exampleLine = try $ exampleLineStart *> anyLine -exampleCode :: String -> Blocks +exampleCode :: Text -> Blocks exampleCode = B.codeBlockWith ("", ["example"], []) @@ -516,7 +519,7 @@ include :: PandocMonad m => OrgParser m (F Blocks) include = try $ do metaLineStart <* stringAnyCase "include:" <* skipSpaces filename <- includeTarget - includeArgs <- many (try $ skipSpaces *> many1 alphaNum) + includeArgs <- many (try $ skipSpaces *> many1Char alphaNum) params <- keyValues blocksParser <- case includeArgs of ("example" : _) -> return $ pure . B.codeBlock <$> parseRaw @@ -535,10 +538,10 @@ include = try $ do char '"' manyTill (noneOf "\n\r\t") (char '"') - parseRaw :: PandocMonad m => OrgParser m String - parseRaw = many anyChar + parseRaw :: PandocMonad m => OrgParser m Text + parseRaw = manyChar anyChar - blockFilter :: [(String, String)] -> [Block] -> [Block] + blockFilter :: [(Text, Text)] -> [Block] -> [Block] blockFilter params blks = let minlvl = lookup "minlevel" params in case (minlvl >>= safeRead :: Maybe Int) of @@ -660,7 +663,7 @@ columnPropertyCell = emptyCell <|> propCell <?> "alignment info" <$> (skipSpaces *> char '<' *> optionMaybe tableAlignFromChar) - <*> (optionMaybe (many1 digit >>= safeRead) + <*> (optionMaybe (many1Char digit >>= safeRead) <* char '>' <* emptyCell) @@ -739,10 +742,10 @@ latexFragment = try $ do , "\\end{", e, "}\n" ] -latexEnd :: Monad m => String -> OrgParser m () +latexEnd :: Monad m => Text -> OrgParser m () latexEnd envName = try $ () <$ skipSpaces - <* string ("\\end{" ++ envName ++ "}") + <* textStr ("\\end{" <> envName <> "}") <* blankline @@ -813,12 +816,12 @@ definitionListItem :: PandocMonad m -> OrgParser m (F (Inlines, [Blocks])) definitionListItem parseIndentedMarker = try $ do markerLength <- parseIndentedMarker - term <- manyTill (noneOf "\n\r") (try definitionMarker) + term <- manyTillChar (noneOf "\n\r") (try definitionMarker) line1 <- anyLineNewline blank <- option "" ("\n" <$ blankline) - cont <- concat <$> many (listContinuation markerLength) + cont <- T.concat <$> many (listContinuation markerLength) term' <- parseFromString inlines term - contents' <- parseFromString blocks $ line1 ++ blank ++ cont + contents' <- parseFromString blocks $ line1 <> blank <> cont return $ (,) <$> term' <*> fmap (:[]) contents' where definitionMarker = @@ -832,16 +835,16 @@ listItem parseIndentedMarker = try . withContext ListItemState $ do markerLength <- try parseIndentedMarker firstLine <- anyLineNewline blank <- option "" ("\n" <$ blankline) - rest <- concat <$> many (listContinuation markerLength) - parseFromString blocks $ firstLine ++ blank ++ rest + rest <- T.concat <$> many (listContinuation markerLength) + parseFromString blocks $ firstLine <> blank <> rest -- continuation of a list item - indented and separated by blankline or endline. -- Note: nested lists are parsed as continuations. -listContinuation :: PandocMonad m => Int -> OrgParser m String +listContinuation :: PandocMonad m => Int -> OrgParser m Text listContinuation markerLength = try $ do notFollowedBy' blankline - mappend <$> (concat <$> many1 (listContinuation' markerLength)) - <*> many blankline + mappend <$> (T.concat <$> many1 (listContinuation' markerLength)) + <*> manyChar blankline where listContinuation' indentation = blockLines indentation <|> listLine indentation @@ -853,6 +856,6 @@ listContinuation markerLength = try $ do >> blockAttributes >>= (\blockAttrs -> case attrFromBlockAttributes blockAttrs of - ("", [], []) -> count 1 anyChar + ("", [], []) -> countChar 1 anyChar _ -> indentWith indentation)) >> (snd <$> withRaw orgBlock) diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs index c96087be7..09a501b68 100644 --- a/src/Text/Pandoc/Readers/Org/DocumentTree.hs +++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs @@ -1,5 +1,6 @@ -{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.DocumentTree Copyright : Copyright (C) 2014-2019 Albert Krewinkel @@ -17,9 +18,9 @@ module Text.Pandoc.Readers.Org.DocumentTree import Prelude import Control.Arrow ((***)) import Control.Monad (guard, void) -import Data.Char (toLower, toUpper) import Data.List (intersperse) import Data.Maybe (mapMaybe) +import Data.Text (Text) import Text.Pandoc.Builder (Blocks, Inlines) import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition @@ -28,6 +29,7 @@ import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing import qualified Data.Set as Set +import qualified Data.Text as T import qualified Text.Pandoc.Builder as B -- @@ -59,28 +61,28 @@ documentTree blocks inline = do } -- | Create a tag containing the given string. -toTag :: String -> Tag +toTag :: Text -> Tag toTag = Tag -- | The key (also called name or type) of a property. -newtype PropertyKey = PropertyKey { fromKey :: String } +newtype PropertyKey = PropertyKey { fromKey :: Text } deriving (Show, Eq, Ord) -- | Create a property key containing the given string. Org mode keys are -- case insensitive and are hence converted to lower case. -toPropertyKey :: String -> PropertyKey -toPropertyKey = PropertyKey . map toLower +toPropertyKey :: Text -> PropertyKey +toPropertyKey = PropertyKey . T.toLower -- | The value assigned to a property. -newtype PropertyValue = PropertyValue { fromValue :: String } +newtype PropertyValue = PropertyValue { fromValue :: Text } -- | Create a property value containing the given string. -toPropertyValue :: String -> PropertyValue +toPropertyValue :: Text -> PropertyValue toPropertyValue = PropertyValue -- | Check whether the property value is non-nil (i.e. truish). isNonNil :: PropertyValue -> Bool -isNonNil p = map toLower (fromValue p) `notElem` ["()", "{}", "nil"] +isNonNil p = T.toLower (fromValue p) `notElem` ["()", "{}", "nil"] -- | Key/value pairs from a PROPERTIES drawer type Properties = [(PropertyKey, PropertyValue)] @@ -273,7 +275,7 @@ headlineToHeader hdln = do todoKeyword :: Monad m => OrgParser m TodoMarker todoKeyword = try $ do taskStates <- activeTodoMarkers <$> getState - let kwParser tdm = try (tdm <$ string (todoMarkerName tdm) + let kwParser tdm = try (tdm <$ textStr (todoMarkerName tdm) <* spaceChar <* updateLastPreCharPos) choice (map kwParser taskStates) @@ -281,26 +283,26 @@ todoKeyword = try $ do todoKeywordToInlines :: TodoMarker -> Inlines todoKeywordToInlines tdm = let todoText = todoMarkerName tdm - todoState = map toLower . show $ todoMarkerState tdm + todoState = T.toLower . T.pack . show $ todoMarkerState tdm classes = [todoState, todoText] in B.spanWith (mempty, classes, mempty) (B.str todoText) propertiesToAttr :: Properties -> Attr propertiesToAttr properties = let - toStringPair = fromKey *** fromValue + toTextPair = fromKey *** fromValue customIdKey = toPropertyKey "custom_id" classKey = toPropertyKey "class" unnumberedKey = toPropertyKey "unnumbered" specialProperties = [customIdKey, classKey, unnumberedKey] id' = maybe mempty fromValue . lookup customIdKey $ properties cls = maybe mempty fromValue . lookup classKey $ properties - kvs' = map toStringPair . filter ((`notElem` specialProperties) . fst) + kvs' = map toTextPair . filter ((`notElem` specialProperties) . fst) $ properties isUnnumbered = maybe False isNonNil . lookup unnumberedKey $ properties in - (id', words cls ++ ["unnumbered" | isUnnumbered], kvs') + (id', T.words cls ++ ["unnumbered" | isUnnumbered], kvs') tagsToInlines :: [Tag] -> Inlines tagsToInlines [] = mempty @@ -336,15 +338,15 @@ planningToBlock planning = do <> B.emph (B.str time) -- | An Org timestamp, including repetition marks. TODO: improve -type Timestamp = String +type Timestamp = Text timestamp :: Monad m => OrgParser m Timestamp timestamp = try $ do openChar <- oneOf "<[" let isActive = openChar == '<' let closeChar = if isActive then '>' else ']' - content <- many1Till anyChar (char closeChar) - return (openChar : content ++ [closeChar]) + content <- many1TillChar anyChar (char closeChar) + return $ T.cons openChar $ content `T.snoc` closeChar -- | Planning information for a subtree/headline. data PlanningInfo = PlanningInfo @@ -374,7 +376,7 @@ planningInfo = try $ do propertiesDrawer :: Monad m => OrgParser m Properties propertiesDrawer = try $ do drawerType <- drawerStart - guard $ map toUpper drawerType == "PROPERTIES" + guard $ T.toUpper drawerType == "PROPERTIES" manyTill property (try endOfDrawer) where property :: Monad m => OrgParser m (PropertyKey, PropertyValue) @@ -382,12 +384,12 @@ propertiesDrawer = try $ do key :: Monad m => OrgParser m PropertyKey key = fmap toPropertyKey . try $ - skipSpaces *> char ':' *> many1Till nonspaceChar (char ':') + skipSpaces *> char ':' *> many1TillChar nonspaceChar (char ':') value :: Monad m => OrgParser m PropertyValue value = fmap toPropertyValue . try $ - skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline) + skipSpaces *> manyTillChar anyChar (try $ skipSpaces *> newline) - endOfDrawer :: Monad m => OrgParser m String + endOfDrawer :: Monad m => OrgParser m Text endOfDrawer = try $ skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs index f783eaa0f..f1f089273 100644 --- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs +++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.ExportSettings Copyright : © 2016–2019 Albert Krewinkel @@ -21,6 +22,7 @@ import Text.Pandoc.Readers.Org.Parsing import Control.Monad (mzero, void) import Data.Char (toLower) import Data.Maybe (listToMaybe) +import Data.Text (Text) -- | Read and handle space separated org-mode export settings. exportSettings :: PandocMonad m => OrgParser m () @@ -70,11 +72,11 @@ exportSetting = choice genericExportSetting :: Monad m => OrgParser m a - -> String + -> Text -> ExportSettingSetter a -> OrgParser m () genericExportSetting optionParser settingIdentifier setter = try $ do - _ <- string settingIdentifier *> char ':' + _ <- textStr settingIdentifier *> char ':' value <- optionParser updateState $ modifyExportSettings value where @@ -82,11 +84,11 @@ genericExportSetting optionParser settingIdentifier setter = try $ do st { orgStateExportSettings = setter val . orgStateExportSettings $ st } -- | A boolean option, either nil (False) or non-nil (True). -booleanSetting :: Monad m => String -> ExportSettingSetter Bool -> OrgParser m () +booleanSetting :: Monad m => Text -> ExportSettingSetter Bool -> OrgParser m () booleanSetting = genericExportSetting elispBoolean -- | An integer-valued option. -integerSetting :: Monad m => String -> ExportSettingSetter Int -> OrgParser m () +integerSetting :: Monad m => Text -> ExportSettingSetter Int -> OrgParser m () integerSetting = genericExportSetting parseInt where parseInt = try $ @@ -95,7 +97,7 @@ integerSetting = genericExportSetting parseInt -- | Either the string "headline" or an elisp boolean and treated as an -- @ArchivedTreesOption@. archivedTreeSetting :: Monad m - => String + => Text -> ExportSettingSetter ArchivedTreesOption -> OrgParser m () archivedTreeSetting = @@ -115,42 +117,42 @@ archivedTreeSetting = -- | A list or a complement list (i.e. a list starting with `not`). complementableListSetting :: Monad m - => String - -> ExportSettingSetter (Either [String] [String]) + => Text + -> ExportSettingSetter (Either [Text] [Text]) -> OrgParser m () complementableListSetting = genericExportSetting $ choice - [ Left <$> complementStringList + [ Left <$> complementTextList , Right <$> stringList , (\b -> if b then Left [] else Right []) <$> elispBoolean ] where -- Read a plain list of strings. - stringList :: Monad m => OrgParser m [String] + stringList :: Monad m => OrgParser m [Text] stringList = try $ char '(' - *> sepBy elispString spaces + *> sepBy elispText spaces <* char ')' -- Read an emacs lisp list specifying a complement set. - complementStringList :: Monad m => OrgParser m [String] - complementStringList = try $ + complementTextList :: Monad m => OrgParser m [Text] + complementTextList = try $ string "(not " - *> sepBy elispString spaces + *> sepBy elispText spaces <* char ')' - elispString :: Monad m => OrgParser m String - elispString = try $ + elispText :: Monad m => OrgParser m Text + elispText = try $ char '"' - *> manyTill alphaNum (char '"') + *> manyTillChar alphaNum (char '"') -- | Read but ignore the export setting. -ignoredSetting :: Monad m => String -> OrgParser m () -ignoredSetting s = try (() <$ string s <* char ':' <* many1 nonspaceChar) +ignoredSetting :: Monad m => Text -> OrgParser m () +ignoredSetting s = try (() <$ textStr s <* char ':' <* many1 nonspaceChar) -- | Read any setting string, but ignore it and emit a warning. ignoreAndWarn :: PandocMonad m => OrgParser m () ignoreAndWarn = try $ do - opt <- many1 nonspaceChar + opt <- many1Char nonspaceChar report (UnknownOrgExportOption opt) return () diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index cae590c5f..da638f717 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -20,7 +20,7 @@ import Prelude import Text.Pandoc.Readers.Org.BlockStarts (endOfBlock, noteMarker) import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing -import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename, +import Text.Pandoc.Readers.Org.Shared (cleanLinkText, isImageFilename, originalLang, translateLang, exportsCode) import Text.Pandoc.Builder (Inlines) @@ -38,12 +38,14 @@ import Control.Monad.Trans (lift) import Data.Char (isAlphaNum, isSpace) import Data.List (intersperse) import qualified Data.Map as M +import Data.Text (Text) +import qualified Data.Text as T import Data.Maybe (fromMaybe) -- -- Functions acting on the parser state -- -recordAnchorId :: PandocMonad m => String -> OrgParser m () +recordAnchorId :: PandocMonad m => Text -> OrgParser m () recordAnchorId i = updateState $ \s -> s{ orgStateAnchorIds = i : orgStateAnchorIds s } @@ -127,7 +129,7 @@ linebreak :: PandocMonad m => OrgParser m (F Inlines) linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline str :: PandocMonad m => OrgParser m (F Inlines) -str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") +str = return . B.str <$> many1Char (noneOf $ specialChars ++ "\n\r ") <* updateLastStrPos -- | An endline character that can be treated as a space, not a structural @@ -321,7 +323,7 @@ linkLikeOrgRefCite = try $ do -- | Read a citation key. The characters allowed in citation keys are taken -- from the `org-ref-cite-re` variable in `org-ref.el`. -orgRefCiteKey :: PandocMonad m => OrgParser m String +orgRefCiteKey :: PandocMonad m => OrgParser m Text orgRefCiteKey = let citeKeySpecialChars = "-_:\\./," :: String isCiteKeySpecialChar c = c `elem` citeKeySpecialChars @@ -329,7 +331,7 @@ orgRefCiteKey = endOfCitation = try $ do many $ satisfy isCiteKeySpecialChar satisfy $ not . isCiteKeyChar - in try $ satisfy isCiteKeyChar `many1Till` lookAhead endOfCitation + in try $ satisfy isCiteKeyChar `many1TillChar` lookAhead endOfCitation -- | Supported citation types. Only a small subset of org-ref types is @@ -384,11 +386,11 @@ footnote = try $ inlineNote <|> referencedNote inlineNote :: PandocMonad m => OrgParser m (F Inlines) inlineNote = try $ do string "[fn:" - ref <- many alphaNum + ref <- manyChar alphaNum char ':' note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']') - unless (null ref) $ - addToNotesTable ("fn:" ++ ref, note) + unless (T.null ref) $ + addToNotesTable ("fn:" <> ref, note) return $ B.note <$> note referencedNote :: PandocMonad m => OrgParser m (F Inlines) @@ -397,7 +399,7 @@ referencedNote = try $ do return $ do notes <- asksF orgStateNotes' case lookup ref notes of - Nothing -> return . B.str $ "[" ++ ref ++ "]" + Nothing -> return . B.str $ "[" <> ref <> "]" Just contents -> do st <- askF let contents' = runF contents st{ orgStateNotes' = [] } @@ -420,7 +422,7 @@ explicitOrImageLink = try $ do return $ do src <- srcF title <- titleF - case cleanLinkString descr of + case cleanLinkText descr of Just imgSrc | isImageFilename imgSrc -> return . B.link src "" $ B.image imgSrc mempty mempty _ -> @@ -429,10 +431,10 @@ explicitOrImageLink = try $ do selflinkOrImage :: PandocMonad m => OrgParser m (F Inlines) selflinkOrImage = try $ do target <- char '[' *> linkTarget <* char ']' - case cleanLinkString target of - Nothing -> case target of - '#':_ -> returnF $ B.link target "" (B.str target) - _ -> return $ internalLink target (B.str target) + case cleanLinkText target of + Nothing -> case T.uncons target of + Just ('#', _) -> returnF $ B.link target "" (B.str target) + _ -> return $ internalLink target (B.str target) Just nonDocTgt -> if isImageFilename nonDocTgt then returnF $ B.image nonDocTgt "" "" else returnF $ B.link nonDocTgt "" (B.str target) @@ -449,35 +451,35 @@ angleLink = try $ do char '>' return link -linkTarget :: PandocMonad m => OrgParser m String -linkTarget = enclosedByPair1 '[' ']' (noneOf "\n\r[]") +linkTarget :: PandocMonad m => OrgParser m Text +linkTarget = T.pack <$> enclosedByPair1 '[' ']' (noneOf "\n\r[]") -possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m String +possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m Text possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]") -applyCustomLinkFormat :: String -> OrgParser m (F String) +applyCustomLinkFormat :: Text -> OrgParser m (F Text) applyCustomLinkFormat link = do - let (linkType, rest) = break (== ':') link + let (linkType, rest) = T.break (== ':') link return $ do formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters - return $ maybe link ($ drop 1 rest) formatter + return $ maybe link ($ T.drop 1 rest) formatter -- | Take a link and return a function which produces new inlines when given -- description inlines. -linkToInlinesF :: String -> Inlines -> F Inlines +linkToInlinesF :: Text -> Inlines -> F Inlines linkToInlinesF linkStr = - case linkStr of - "" -> pure . B.link mempty "" -- wiki link (empty by convention) - ('#':_) -> pure . B.link linkStr "" -- document-local fraction - _ -> case cleanLinkString linkStr of - Just extTgt -> return . B.link extTgt "" - Nothing -> internalLink linkStr -- other internal link - -internalLink :: String -> Inlines -> F Inlines + case T.uncons linkStr of + Nothing -> pure . B.link mempty "" -- wiki link (empty by convention) + Just ('#', _) -> pure . B.link linkStr "" -- document-local fraction + _ -> case cleanLinkText linkStr of + Just extTgt -> return . B.link extTgt "" + Nothing -> internalLink linkStr -- other internal link + +internalLink :: Text -> Inlines -> F Inlines internalLink link title = do anchorB <- (link `elem`) <$> asksF orgStateAnchorIds if anchorB - then return $ B.link ('#':link) "" title + then return $ B.link ("#" <> link) "" title else return $ B.emph title -- | Parse an anchor like @<<anchor-id>>@ and return an empty span with @@ -493,15 +495,15 @@ anchor = try $ do returnF $ B.spanWith (solidify anchorId, [], []) mempty where parseAnchor = string "<<" - *> many1 (noneOf "\t\n\r<>\"' ") + *> many1Char (noneOf "\t\n\r<>\"' ") <* string ">>" <* skipSpaces -- | Replace every char but [a-zA-Z0-9_.-:] with a hyphen '-'. This mirrors -- the org function @org-export-solidify-link-text@. -solidify :: String -> String -solidify = map replaceSpecialChar +solidify :: Text -> Text +solidify = T.map replaceSpecialChar where replaceSpecialChar c | isAlphaNum c = c | c `elem` ("_.-:" :: String) = c @@ -511,25 +513,25 @@ solidify = map replaceSpecialChar inlineCodeBlock :: PandocMonad m => OrgParser m (F Inlines) inlineCodeBlock = try $ do string "src_" - lang <- many1 orgArgWordChar + lang <- many1Char orgArgWordChar opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption - inlineCode <- enclosedByPair1 '{' '}' (noneOf "\n\r") + inlineCode <- T.pack <$> enclosedByPair1 '{' '}' (noneOf "\n\r") let attrClasses = [translateLang lang] let attrKeyVal = originalLang lang <> opts let codeInlineBlck = B.codeWith ("", attrClasses, attrKeyVal) inlineCode returnF $ if exportsCode opts then codeInlineBlck else mempty where - inlineBlockOption :: PandocMonad m => OrgParser m (String, String) + inlineBlockOption :: PandocMonad m => OrgParser m (Text, Text) inlineBlockOption = try $ do argKey <- orgArgKey paramValue <- option "yes" orgInlineParamValue return (argKey, paramValue) - orgInlineParamValue :: PandocMonad m => OrgParser m String + orgInlineParamValue :: PandocMonad m => OrgParser m Text orgInlineParamValue = try $ skipSpaces *> notFollowedBy (char ':') - *> many1 (noneOf "\t\n\r ]") + *> many1Char (noneOf "\t\n\r ]") <* skipSpaces @@ -584,7 +586,7 @@ superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr) math :: PandocMonad m => OrgParser m (F Inlines) math = return . B.math <$> choice [ math1CharBetween '$' - , mathStringBetween '$' + , mathTextBetween '$' , rawMathBetween "\\(" "\\)" ] @@ -604,7 +606,7 @@ updatePositions c = do return c symbol :: PandocMonad m => OrgParser m (F Inlines) -symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions) +symbol = return . B.str . T.singleton <$> (oneOf specialChars >>= updatePositions) emphasisBetween :: PandocMonad m => Char @@ -619,7 +621,7 @@ emphasisBetween c = try $ do verbatimBetween :: PandocMonad m => Char - -> OrgParser m String + -> OrgParser m Text verbatimBetween c = try $ emphasisStart c *> many1TillNOrLessNewlines 1 verbatimChar (emphasisEnd c) @@ -627,33 +629,33 @@ verbatimBetween c = try $ verbatimChar = noneOf "\n\r" >>= updatePositions -- | Parses a raw string delimited by @c@ using Org's math rules -mathStringBetween :: PandocMonad m +mathTextBetween :: PandocMonad m => Char - -> OrgParser m String -mathStringBetween c = try $ do + -> OrgParser m Text +mathTextBetween c = try $ do mathStart c body <- many1TillNOrLessNewlines mathAllowedNewlines (noneOf (c:"\n\r")) (lookAhead $ mathEnd c) final <- mathEnd c - return $ body ++ [final] + return $ T.snoc body final -- | Parse a single character between @c@ using math rules math1CharBetween :: PandocMonad m => Char - -> OrgParser m String + -> OrgParser m Text math1CharBetween c = try $ do char c res <- noneOf $ c:mathForbiddenBorderChars char c eof <|> () <$ lookAhead (oneOf mathPostChars) - return [res] + return $ T.singleton res rawMathBetween :: PandocMonad m - => String - -> String - -> OrgParser m String -rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e) + => Text + -> Text + -> OrgParser m Text +rawMathBetween s e = try $ textStr s *> manyTillChar anyChar (try $ textStr e) -- | Parses the start (opening character) of emphasis emphasisStart :: PandocMonad m => Char -> OrgParser m Char @@ -702,10 +704,10 @@ enclosedInlines start end = try $ enclosedRaw :: (PandocMonad m, Show b) => OrgParser m a -> OrgParser m b - -> OrgParser m String + -> OrgParser m Text enclosedRaw start end = try $ start *> (onSingleLine <|> spanningTwoLines) - where onSingleLine = try $ many1Till (noneOf "\n\r") end + where onSingleLine = try $ many1TillChar (noneOf "\n\r") end spanningTwoLines = try $ anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine @@ -714,7 +716,7 @@ enclosedRaw start end = try $ many1TillNOrLessNewlines :: PandocMonad m => Int -> OrgParser m Char -> OrgParser m a - -> OrgParser m String + -> OrgParser m Text many1TillNOrLessNewlines n p end = try $ nMoreLines (Just n) mempty >>= oneOrMore where @@ -726,7 +728,7 @@ many1TillNOrLessNewlines n p end = try $ rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p newline) finalLine = try $ manyTill p end minus1 k = k - 1 - oneOrMore cs = cs <$ guard (not $ null cs) + oneOrMore cs = T.pack cs <$ guard (not $ null cs) -- Org allows customization of the way it reads emphasis. We use the defaults -- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components` @@ -773,17 +775,17 @@ subOrSuperExpr :: PandocMonad m => OrgParser m (F Inlines) subOrSuperExpr = try $ choice [ charsInBalanced '{' '}' (noneOf "\n\r") , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r") - , simpleSubOrSuperString + , simpleSubOrSuperText ] >>= parseFromString (mconcat <$> many inline) - where enclosing (left, right) s = left : s ++ [right] + where enclosing (left, right) s = T.cons left $ T.snoc s right -simpleSubOrSuperString :: PandocMonad m => OrgParser m String -simpleSubOrSuperString = try $ do +simpleSubOrSuperText :: PandocMonad m => OrgParser m Text +simpleSubOrSuperText = try $ do state <- getState guard . exportSubSuperscripts . orgStateExportSettings $ state - choice [ string "*" - , mappend <$> option [] ((:[]) <$> oneOf "+-") - <*> many1 alphaNum + choice [ textStr "*" + , mappend <$> option "" (T.singleton <$> oneOf "+-") + <*> many1Char alphaNum ] inlineLaTeX :: PandocMonad m => OrgParser m (F Inlines) @@ -793,28 +795,28 @@ inlineLaTeX = try $ do maybe mzero returnF $ parseAsMathMLSym cmd `mplus` parseAsMath cmd `mplus` ils where - parseAsMath :: String -> Maybe Inlines + parseAsMath :: Text -> Maybe Inlines parseAsMath cs = B.fromList <$> texMathToPandoc cs - parseAsInlineLaTeX :: PandocMonad m => String -> m (Maybe Inlines) + parseAsInlineLaTeX :: PandocMonad m => Text -> m (Maybe Inlines) parseAsInlineLaTeX cs = maybeRight <$> runParserT inlineCommand state "" cs - parseAsMathMLSym :: String -> Maybe Inlines + parseAsMathMLSym :: Text -> Maybe Inlines parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs) -- drop initial backslash and any trailing "{}" - where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1 + where clean = T.dropWhileEnd (`elem` ("{}" :: String)) . T.drop 1 state :: ParserState state = def{ stateOptions = def{ readerExtensions = enableExtension Ext_raw_tex (readerExtensions def) } } - texMathToPandoc :: String -> Maybe [Inline] + texMathToPandoc :: Text -> Maybe [Inline] texMathToPandoc cs = maybeRight (readTeX cs) >>= writePandoc DisplayInline maybeRight :: Either a b -> Maybe b maybeRight = either (const Nothing) Just -inlineLaTeXCommand :: PandocMonad m => OrgParser m String +inlineLaTeXCommand :: PandocMonad m => OrgParser m Text inlineLaTeXCommand = try $ do rest <- getInput st <- getState @@ -823,21 +825,17 @@ inlineLaTeXCommand = try $ do Right cs -> do -- drop any trailing whitespace, those are not be part of the command as -- far as org mode is concerned. - let cmdNoSpc = dropWhileEnd isSpace cs - let len = length cmdNoSpc + let cmdNoSpc = T.dropWhileEnd isSpace cs + let len = T.length cmdNoSpc count len anyChar return cmdNoSpc _ -> mzero --- Taken from Data.OldList. -dropWhileEnd :: (a -> Bool) -> [a] -> [a] -dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) [] - exportSnippet :: PandocMonad m => OrgParser m (F Inlines) exportSnippet = try $ do string "@@" - format <- many1Till (alphaNum <|> char '-') (char ':') - snippet <- manyTill anyChar (try $ string "@@") + format <- many1TillChar (alphaNum <|> char '-') (char ':') + snippet <- manyTillChar anyChar (try $ string "@@") returnF $ B.rawInline format snippet macro :: PandocMonad m => OrgParser m (F Inlines) @@ -845,7 +843,7 @@ macro = try $ do recursionDepth <- orgStateMacroDepth <$> getState guard $ recursionDepth < 15 string "{{{" - name <- many alphaNum + name <- manyChar alphaNum args <- ([] <$ string "}}}") <|> char '(' *> argument `sepBy` char ',' <* eoa expander <- lookupMacro name <$> getState @@ -857,7 +855,7 @@ macro = try $ do updateState $ \s -> s { orgStateMacroDepth = recursionDepth } return res where - argument = many $ notFollowedBy eoa *> noneOf "," + argument = manyChar $ notFollowedBy eoa *> noneOf "," eoa = string ")}}}" smart :: PandocMonad m => OrgParser m (F Inlines) diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs index 0a388403e..811a5b974 100644 --- a/src/Text/Pandoc/Readers/Org/Meta.hs +++ b/src/Text/Pandoc/Readers/Org/Meta.hs @@ -1,6 +1,7 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE TupleSections #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.Meta Copyright : Copyright (C) 2014-2019 Albert Krewinkel @@ -30,11 +31,12 @@ import Text.Pandoc.Definition import Text.Pandoc.Shared (safeRead) import Control.Monad (mzero, void, when) -import Data.Char (toLower) import Data.List (intersperse) import Data.Maybe (fromMaybe) import qualified Data.Map as M import qualified Data.Set as Set +import Data.Text (Text) +import qualified Data.Text as T import Network.HTTP (urlEncode) -- | Returns the current meta, respecting export options. @@ -47,7 +49,7 @@ metaExport = do . (if exportWithEmail settings then id else removeMeta "email") <$> orgStateMeta st -removeMeta :: String -> Meta -> Meta +removeMeta :: Text -> Meta -> Meta removeMeta key meta' = let metaMap = unMeta meta' in Meta $ M.delete key metaMap @@ -60,18 +62,18 @@ metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine) declarationLine :: PandocMonad m => OrgParser m () declarationLine = try $ do - key <- map toLower <$> metaKey + key <- T.toLower <$> metaKey (key', value) <- metaValue key let addMetaValue st = st { orgStateMeta = B.setMeta key' <$> value <*> orgStateMeta st } when (key' /= "results") $ updateState addMetaValue -metaKey :: Monad m => OrgParser m String -metaKey = map toLower <$> many1 (noneOf ": \n\r") - <* char ':' - <* skipSpaces +metaKey :: Monad m => OrgParser m Text +metaKey = T.toLower <$> many1Char (noneOf ": \n\r") + <* char ':' + <* skipSpaces -metaValue :: PandocMonad m => String -> OrgParser m (String, F MetaValue) +metaValue :: PandocMonad m => Text -> OrgParser m (Text, F MetaValue) metaValue key = let inclKey = "header-includes" in case key of @@ -88,7 +90,7 @@ metaValue key = -- Org-mode expects class options to contain the surrounding brackets, -- pandoc does not. "latex_class_options" -> ("classoption",) <$> - metaModifiedString (filter (`notElem` "[]")) + metaModifiedString (T.filter (`notElem` ("[]" :: String))) "html_head" -> (inclKey,) <$> accumulatingList inclKey (metaExportSnippet "html") _ -> (key,) <$> metaString @@ -98,25 +100,25 @@ metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline metaInlinesCommaSeparated :: PandocMonad m => OrgParser m (F MetaValue) metaInlinesCommaSeparated = do - itemStrs <- many1 (noneOf ",\n") `sepBy1` char ',' + itemStrs <- many1Char (noneOf ",\n") `sepBy1` char ',' newline - items <- mapM (parseFromString inlinesTillNewline . (++ "\n")) itemStrs + items <- mapM (parseFromString inlinesTillNewline . (<> "\n")) itemStrs let toMetaInlines = MetaInlines . B.toList return $ MetaList . map toMetaInlines <$> sequence items metaString :: Monad m => OrgParser m (F MetaValue) metaString = metaModifiedString id -metaModifiedString :: Monad m => (String -> String) -> OrgParser m (F MetaValue) +metaModifiedString :: Monad m => (Text -> Text) -> OrgParser m (F MetaValue) metaModifiedString f = return . MetaString . f <$> anyLine -- | Read an format specific meta definition -metaExportSnippet :: Monad m => String -> OrgParser m (F MetaValue) +metaExportSnippet :: Monad m => Text -> OrgParser m (F MetaValue) metaExportSnippet format = return . MetaInlines . B.toList . B.rawInline format <$> anyLine -- | Accumulate the result of the @parser@ in a list under @key@. -accumulatingList :: Monad m => String +accumulatingList :: Monad m => Text -> OrgParser m (F MetaValue) -> OrgParser m (F MetaValue) accumulatingList key p = do @@ -147,33 +149,33 @@ optionLine = try $ do "pandoc-emphasis-post" -> emphChars >>= updateState . setEmphasisPostChar _ -> mzero -addLinkFormat :: Monad m => String - -> (String -> String) +addLinkFormat :: Monad m => Text + -> (Text -> Text) -> OrgParser m () addLinkFormat key formatter = updateState $ \s -> let fs = orgStateLinkFormatters s in s{ orgStateLinkFormatters = M.insert key formatter fs } -parseLinkFormat :: Monad m => OrgParser m (String, String -> String) +parseLinkFormat :: Monad m => OrgParser m (Text, Text -> Text) parseLinkFormat = try $ do - linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces + linkType <- T.cons <$> letter <*> manyChar (alphaNum <|> oneOf "-_") <* skipSpaces linkSubst <- parseFormat return (linkType, linkSubst) -- | An ad-hoc, single-argument-only implementation of a printf-style format -- parser. -parseFormat :: Monad m => OrgParser m (String -> String) +parseFormat :: Monad m => OrgParser m (Text -> Text) parseFormat = try $ replacePlain <|> replaceUrl <|> justAppend where -- inefficient, but who cares - replacePlain = try $ (\x -> concat . flip intersperse x) + replacePlain = try $ (\x -> T.concat . flip intersperse x) <$> sequence [tillSpecifier 's', rest] - replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode) + replaceUrl = try $ (\x -> T.concat . flip intersperse x . T.pack . urlEncode . T.unpack) <$> sequence [tillSpecifier 'h', rest] - justAppend = try $ (++) <$> rest + justAppend = try $ (<>) <$> rest - rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r") - tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:"")) + rest = manyTillChar anyChar (eof <|> () <$ oneOf "\n\r") + tillSpecifier c = manyTillChar (noneOf "\n\r") (try $ string ('%':c:"")) tagList :: Monad m => OrgParser m [Tag] tagList = do @@ -231,41 +233,41 @@ todoSequence = try $ do (x:xs) -> return $ keywordsToSequence (reverse xs) [x] where - todoKeywords :: Monad m => OrgParser m [String] + todoKeywords :: Monad m => OrgParser m [Text] todoKeywords = try $ - let keyword = many1 nonspaceChar <* skipSpaces + let keyword = many1Char nonspaceChar <* skipSpaces endOfKeywords = todoDoneSep <|> void newline in manyTill keyword (lookAhead endOfKeywords) todoDoneSep :: Monad m => OrgParser m () todoDoneSep = void . try $ skipSpaces *> char '|' <* skipSpaces1 - keywordsToSequence :: [String] -> [String] -> TodoSequence + keywordsToSequence :: [Text] -> [Text] -> TodoSequence keywordsToSequence todo done = let todoMarkers = map (TodoMarker Todo) todo doneMarkers = map (TodoMarker Done) done in todoMarkers ++ doneMarkers -macroDefinition :: Monad m => OrgParser m (String, [String] -> String) +macroDefinition :: Monad m => OrgParser m (Text, [Text] -> Text) macroDefinition = try $ do - macroName <- many1 nonspaceChar <* skipSpaces + macroName <- many1Char nonspaceChar <* skipSpaces firstPart <- expansionPart (elemOrder, parts) <- unzip <$> many ((,) <$> placeholder <*> expansionPart) let expander = mconcat . alternate (firstPart:parts) . reorder elemOrder return (macroName, expander) where placeholder :: Monad m => OrgParser m Int - placeholder = try . fmap (fromMaybe 1 . safeRead) $ char '$' *> many1 digit + placeholder = try . fmap (fromMaybe 1 . safeRead) $ char '$' *> many1Char digit - expansionPart :: Monad m => OrgParser m String - expansionPart = try $ many (notFollowedBy placeholder *> noneOf "\n\r") + expansionPart :: Monad m => OrgParser m Text + expansionPart = try $ manyChar (notFollowedBy placeholder *> noneOf "\n\r") alternate :: [a] -> [a] -> [a] alternate [] ys = ys alternate xs [] = xs alternate (x:xs) (y:ys) = x : y : alternate xs ys - reorder :: [Int] -> [String] -> [String] + reorder :: [Int] -> [Text] -> [Text] reorder perm xs = let element n = take 1 $ drop (n - 1) xs in concatMap element perm diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index d6dde8b22..cf5583b76 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -1,6 +1,7 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.ParserState Copyright : Copyright (C) 2014-2019 Albert Krewinkel @@ -63,16 +64,16 @@ import Text.Pandoc.Readers.LaTeX.Types (Macro) type F = Future OrgParserState -- | An inline note / footnote containing the note key and its (inline) value. -type OrgNoteRecord = (String, F Blocks) +type OrgNoteRecord = (Text, F Blocks) -- | Table of footnotes type OrgNoteTable = [OrgNoteRecord] -- | Map of functions for link transformations. The map key is refers to the -- link-type, the corresponding function transforms the given link string. -type OrgLinkFormatters = M.Map String (String -> String) +type OrgLinkFormatters = M.Map Text (Text -> Text) -- | Macro expander function -type MacroExpander = [String] -> String +type MacroExpander = [Text] -> Text -- | Tag -newtype Tag = Tag { fromTag :: String } +newtype Tag = Tag { fromTag :: Text } deriving (Show, Eq, Ord) -- | The states in which a todo item can be @@ -82,7 +83,7 @@ data TodoState = Todo | Done -- | A ToDo keyword like @TODO@ or @DONE@. data TodoMarker = TodoMarker { todoMarkerState :: TodoState - , todoMarkerName :: String + , todoMarkerName :: Text } deriving (Show, Eq) @@ -91,7 +92,7 @@ type TodoSequence = [TodoMarker] -- | Org-mode parser state data OrgParserState = OrgParserState - { orgStateAnchorIds :: [String] + { orgStateAnchorIds :: [Text] , orgStateEmphasisCharStack :: [Char] , orgStateEmphasisPreChars :: [Char] -- ^ Chars allowed to occur before -- emphasis; spaces and newlines are @@ -102,13 +103,13 @@ data OrgParserState = OrgParserState , orgStateExcludeTags :: Set.Set Tag , orgStateExcludeTagsChanged :: Bool , orgStateExportSettings :: ExportSettings - , orgStateIdentifiers :: Set.Set String - , orgStateIncludeFiles :: [String] + , orgStateIdentifiers :: Set.Set Text + , orgStateIncludeFiles :: [Text] , orgStateLastForbiddenCharPos :: Maybe SourcePos , orgStateLastPreCharPos :: Maybe SourcePos , orgStateLastStrPos :: Maybe SourcePos , orgStateLinkFormatters :: OrgLinkFormatters - , orgStateMacros :: M.Map String MacroExpander + , orgStateMacros :: M.Map Text MacroExpander , orgStateMacroDepth :: Int , orgStateMeta :: F Meta , orgStateNotes' :: OrgNoteTable @@ -212,10 +213,10 @@ activeTodoSequences st = activeTodoMarkers :: OrgParserState -> TodoSequence activeTodoMarkers = concat . activeTodoSequences -lookupMacro :: String -> OrgParserState -> Maybe MacroExpander +lookupMacro :: Text -> OrgParserState -> Maybe MacroExpander lookupMacro macroName = M.lookup macroName . orgStateMacros -registerMacro :: (String, MacroExpander) -> OrgParserState -> OrgParserState +registerMacro :: (Text, MacroExpander) -> OrgParserState -> OrgParserState registerMacro (name, expander) st = let curMacros = orgStateMacros st in st{ orgStateMacros = M.insert name expander curMacros } @@ -236,7 +237,7 @@ data ArchivedTreesOption = -- These settings can be changed via OPTIONS statements. data ExportSettings = ExportSettings { exportArchivedTrees :: ArchivedTreesOption -- ^ How to treat archived trees - , exportDrawers :: Either [String] [String] + , exportDrawers :: Either [Text] [Text] -- ^ Specify drawer names which should be exported. @Left@ names are -- explicitly excluded from the resulting output while @Right@ means that -- only the listed drawer names should be included. diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs index 24aa0779d..718925120 100644 --- a/src/Text/Pandoc/Readers/Org/Parsing.hs +++ b/src/Text/Pandoc/Readers/Org/Parsing.hs @@ -32,7 +32,13 @@ module Text.Pandoc.Readers.Org.Parsing , orgTagWordChar -- * Re-exports from Text.Pandoc.Parser , ParserContext (..) + , textStr + , countChar + , manyChar + , many1Char + , manyTillChar , many1Till + , many1TillChar , notFollowedBy' , spaceChar , nonspaceChar @@ -98,6 +104,7 @@ module Text.Pandoc.Readers.Org.Parsing ) where import Prelude +import Data.Text (Text) import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Parsing hiding (F, anyLine, blanklines, newline, @@ -108,14 +115,14 @@ import Control.Monad (guard) import Control.Monad.Reader (ReaderT) -- | The parser used to read org files. -type OrgParser m = ParserT [Char] OrgParserState (ReaderT OrgParserLocal m) +type OrgParser m = ParserT Text OrgParserState (ReaderT OrgParserLocal m) -- -- Adaptions and specializations of parsing utilities -- -- | Parse any line of text -anyLine :: Monad m => OrgParser m String +anyLine :: Monad m => OrgParser m Text anyLine = P.anyLine <* updateLastPreCharPos @@ -123,7 +130,7 @@ anyLine = -- | Like @'Text.Pandoc.Parsing'@, but resets the position of the last character -- allowed before emphasised text. -parseFromString :: Monad m => OrgParser m a -> String -> OrgParser m a +parseFromString :: Monad m => OrgParser m a -> Text -> OrgParser m a parseFromString parser str' = do updateState $ \s -> s{ orgStateLastPreCharPos = Nothing } result <- P.parseFromString parser str' @@ -142,7 +149,7 @@ newline = <* updateLastForbiddenCharPos -- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes. -blanklines :: Monad m => OrgParser m [Char] +blanklines :: Monad m => OrgParser m Text blanklines = P.blanklines <* updateLastPreCharPos @@ -192,21 +199,21 @@ updateLastPreCharPos = getPosition >>= \p -> -- -- | Read the key of a plist style key-value list. -orgArgKey :: Monad m => OrgParser m String +orgArgKey :: Monad m => OrgParser m Text orgArgKey = try $ skipSpaces *> char ':' - *> many1 orgArgWordChar + *> many1Char orgArgWordChar -- | Read the value of a plist style key-value list. -orgArgWord :: Monad m => OrgParser m String -orgArgWord = many1 orgArgWordChar +orgArgWord :: Monad m => OrgParser m Text +orgArgWord = many1Char orgArgWordChar -- | Chars treated as part of a word in plists. orgArgWordChar :: Monad m => OrgParser m Char orgArgWordChar = alphaNum <|> oneOf "-_" -orgTagWord :: Monad m => OrgParser m String -orgTagWord = many1 orgTagWordChar +orgTagWord :: Monad m => OrgParser m Text +orgTagWord = many1Char orgTagWordChar orgTagWordChar :: Monad m => OrgParser m Char orgTagWordChar = alphaNum <|> oneOf "@%#_" diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs index 34f958373..be0a2068e 100644 --- a/src/Text/Pandoc/Readers/Org/Shared.hs +++ b/src/Text/Pandoc/Readers/Org/Shared.hs @@ -10,7 +10,7 @@ Utility functions used in other Pandoc Org modules. -} module Text.Pandoc.Readers.Org.Shared - ( cleanLinkString + ( cleanLinkText , isImageFilename , originalLang , translateLang @@ -19,44 +19,44 @@ module Text.Pandoc.Readers.Org.Shared import Prelude import Data.Char (isAlphaNum) -import Data.List (isPrefixOf) +import Data.Text (Text) +import qualified Data.Text as T import System.FilePath (isValid, takeExtension) - +import Text.Pandoc.Shared (elemText) -- | Check whether the given string looks like the path to of URL of an image. -isImageFilename :: String -> Bool -isImageFilename fp = hasImageExtension && (isValid fp || isKnownProtocolUri) +isImageFilename :: Text -> Bool +isImageFilename fp = hasImageExtension && (isValid (T.unpack fp) || isKnownProtocolUri) where - hasImageExtension = takeExtension fp `elem` imageExtensions - isKnownProtocolUri = any (\x -> (x ++ "://") `isPrefixOf` fp) protocols + hasImageExtension = takeExtension (T.unpack fp) `elem` imageExtensions + isKnownProtocolUri = any (\x -> (x <> "://") `T.isPrefixOf` fp) protocols imageExtensions = [ ".jpeg", ".jpg", ".png", ".gif", ".svg" ] protocols = [ "file", "http", "https" ] -- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if -- the string does not appear to be a link. -cleanLinkString :: String -> Maybe String -cleanLinkString s = - case s of - '/':_ -> Just $ "file://" ++ s -- absolute path - '.':'/':_ -> Just s -- relative path - '.':'.':'/':_ -> Just s -- relative path - -- Relative path or URL (file schema) - 'f':'i':'l':'e':':':s' -> Just $ if "//" `isPrefixOf` s' then s else s' - _ -> if isUrl s then Just s else Nothing - where - isUrl :: String -> Bool - isUrl cs = - let (scheme, path) = break (== ':') cs - in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme - && not (null path) +cleanLinkText :: Text -> Maybe Text +cleanLinkText s + | Just _ <- T.stripPrefix "/" s = Just $ "file://" <> s -- absolute path + | Just _ <- T.stripPrefix "./" s = Just s -- relative path + | Just _ <- T.stripPrefix "../" s = Just s -- relative path + -- Relative path or URL (file schema) + | Just s' <- T.stripPrefix "file:" s = Just $ if "//" `T.isPrefixOf` s' then s else s' + | otherwise = if isUrl s then Just s else Nothing + where + isUrl :: Text -> Bool + isUrl cs = + let (scheme, path) = T.break (== ':') cs + in T.all (\c -> isAlphaNum c || c `elemText` ".-") scheme + && not (T.null path) -- | Creates an key-value pair marking the original language name specified for -- a piece of source code. -- | Creates an key-value attributes marking the original language name -- specified for a piece of source code. -originalLang :: String -> [(String, String)] +originalLang :: Text -> [(Text, Text)] originalLang lang = let transLang = translateLang lang in if transLang == lang @@ -66,7 +66,7 @@ originalLang lang = -- | Translate from Org-mode's programming language identifiers to those used -- by Pandoc. This is useful to allow for proper syntax highlighting in -- Pandoc output. -translateLang :: String -> String +translateLang :: Text -> Text translateLang cs = case cs of "C" -> "c" @@ -79,5 +79,5 @@ translateLang cs = "sqlite" -> "sql" _ -> cs -exportsCode :: [(String, String)] -> Bool +exportsCode :: [(Text, Text)] -> Bool exportsCode = maybe True (`elem` ["code", "both"]) . lookup "exports" diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 7e29caf28..d2fba4449 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -1,7 +1,8 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.RST Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -19,9 +20,8 @@ import Control.Arrow (second) import Control.Monad (forM_, guard, liftM, mplus, mzero, when) import Control.Monad.Except (throwError) import Control.Monad.Identity (Identity (..)) -import Data.Char (isHexDigit, isSpace, toLower, toUpper, isAlphaNum) -import Data.List (deleteFirstsBy, elemIndex, intercalate, isInfixOf, isSuffixOf, - nub, sort, transpose) +import Data.Char (isHexDigit, isSpace, toUpper, isAlphaNum) +import Data.List (deleteFirstsBy, elemIndex, nub, sort, transpose) import qualified Data.Map as M import Data.Maybe (fromMaybe) import Data.Sequence (ViewR (..), viewr) @@ -47,16 +47,16 @@ import Text.Printf (printf) -- | Parse reStructuredText string and return Pandoc document. readRST :: PandocMonad m => ReaderOptions -- ^ Reader options - -> Text -- ^ String to parse (assuming @'\n'@ line endings) + -> Text -- ^ Text to parse (assuming @'\n'@ line endings) -> m Pandoc readRST opts s = do parsed <- readWithM parseRST def{ stateOptions = opts } - (T.unpack (crFilter s) ++ "\n\n") + (crFilter s <> "\n\n") case parsed of Right result -> return result Left e -> throwError e -type RSTParser m = ParserT [Char] ParserState m +type RSTParser m = ParserT Text ParserState m -- -- Constants and data structure definitions @@ -113,7 +113,7 @@ titleTransform (bs, meta) = metaFromDefList :: [([Inline], [[Block]])] -> Meta -> Meta metaFromDefList ds meta = adjustAuthors $ foldr f meta ds - where f (k,v) = setMeta (map toLower $ stringify k) (mconcat $ map fromList v) + where f (k,v) = setMeta (T.toLower $ stringify k) (mconcat $ map fromList v) adjustAuthors (Meta metamap) = Meta $ M.adjust splitAuthors "author" $ M.adjust toPlain "date" $ M.adjust toPlain "title" @@ -136,13 +136,13 @@ metaFromDefList ds meta = adjustAuthors $ foldr f meta ds isSp LineBreak = True isSp _ = False splitOnSemi = splitBy (==Str ";") - factorSemi (Str []) = [] - factorSemi (Str s) = case break (==';') s of - (xs,[]) -> [Str xs] - (xs,';':ys) -> Str xs : Str ";" : - factorSemi (Str ys) - (xs,ys) -> Str xs : - factorSemi (Str ys) + factorSemi (Str "") = [] + factorSemi (Str s) = case T.break (==';') s of + (xs,"") -> [Str xs] + (xs,T.uncons -> Just (';',ys)) -> Str xs : Str ";" : + factorSemi (Str ys) + (xs,ys) -> Str xs : + factorSemi (Str ys) factorSemi x = [x] parseRST :: PandocMonad m => RSTParser m Pandoc @@ -151,7 +151,7 @@ parseRST = do startPos <- getPosition -- go through once just to get list of reference keys and notes -- docMinusKeys is the raw document with blanks where the keys were... - docMinusKeys <- concat <$> + docMinusKeys <- T.concat <$> manyTill (referenceKey <|> anchorDef <|> noteBlock <|> citationBlock <|> (snd <$> withRaw comment) <|> @@ -180,7 +180,7 @@ parseRST = do return $ Pandoc meta' (blocks' ++ refBlock) parseCitation :: PandocMonad m - => (String, String) -> RSTParser m (Inlines, [Blocks]) + => (Text, Text) -> RSTParser m (Inlines, [Blocks]) parseCitation (ref, raw) = do contents <- parseFromString' parseBlocks raw return (B.spanWith (ref, ["citation-label"], []) (B.str ref), @@ -215,23 +215,23 @@ block = choice [ codeBlock -- field list -- -rawFieldListItem :: Monad m => Int -> RSTParser m (String, String) +rawFieldListItem :: Monad m => Int -> RSTParser m (Text, Text) rawFieldListItem minIndent = try $ do indent <- length <$> many (char ' ') guard $ indent >= minIndent char ':' - name <- many1Till (noneOf "\n") (char ':') + name <- many1TillChar (noneOf "\n") (char ':') (() <$ lookAhead newline) <|> skipMany1 spaceChar first <- anyLine rest <- option "" $ try $ do lookAhead (count indent (char ' ') >> spaceChar) indentedBlock - let raw = (if null first then "" else (first ++ "\n")) ++ rest ++ "\n" + let raw = (if T.null first then "" else (first <> "\n")) <> rest <> "\n" return (name, raw) fieldListItem :: PandocMonad m => Int -> RSTParser m (Inlines, [Blocks]) fieldListItem minIndent = try $ do (name, raw) <- rawFieldListItem minIndent - term <- parseInlineFromString name + term <- parseInlineFromText name contents <- parseFromString' parseBlocks raw optional blanklines return (term, [contents]) @@ -251,12 +251,12 @@ fieldList = try $ do lineBlock :: PandocMonad m => RSTParser m Blocks lineBlock = try $ do lines' <- lineBlockLines - lines'' <- mapM parseInlineFromString lines' + lines'' <- mapM parseInlineFromText lines' return $ B.lineBlock lines'' -lineBlockDirective :: PandocMonad m => String -> RSTParser m Blocks +lineBlockDirective :: PandocMonad m => Text -> RSTParser m Blocks lineBlockDirective body = do - lines' <- mapM parseInlineFromString $ lines $ stripTrailingNewlines body + lines' <- mapM parseInlineFromText $ T.lines $ stripTrailingNewlines body return $ B.lineBlock lines' -- @@ -271,9 +271,9 @@ para = try $ do newline blanklines case viewr (B.unMany result) of - ys :> Str xs | "::" `isSuffixOf` xs -> do + ys :> Str xs | "::" `T.isSuffixOf` xs -> do raw <- option mempty codeBlockBody - return $ B.para (B.Many ys <> B.str (take (length xs - 1) xs)) + return $ B.para (B.Many ys <> B.str (T.take (T.length xs - 1) xs)) <> raw _ -> return (B.para result) @@ -349,7 +349,7 @@ singleHeader' = try $ do -- hrule block -- -hrule :: Monad m => ParserT [Char] st m Blocks +hrule :: Monad m => ParserT Text st m Blocks hrule = try $ do chr <- oneOf underlineChars count 3 (char chr) @@ -364,7 +364,7 @@ hrule = try $ do -- read a line indented by a given string indentedLine :: (HasReaderOptions st, Monad m) - => Int -> ParserT [Char] st m [Char] + => Int -> ParserT Text st m Text indentedLine indents = try $ do lookAhead spaceChar gobbleAtMostSpaces indents @@ -373,29 +373,29 @@ indentedLine indents = try $ do -- one or more indented lines, possibly separated by blank lines. -- any amount of indentation will work. indentedBlock :: (HasReaderOptions st, Monad m) - => ParserT [Char] st m [Char] + => ParserT Text st m Text indentedBlock = try $ do indents <- length <$> lookAhead (many1 spaceChar) lns <- many1 $ try $ do b <- option "" blanklines l <- indentedLine indents - return (b ++ l) + return (b <> l) optional blanklines - return $ unlines lns + return $ T.unlines lns -quotedBlock :: Monad m => ParserT [Char] st m [Char] +quotedBlock :: Monad m => ParserT Text st m Text quotedBlock = try $ do quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" lns <- many1 $ lookAhead (char quote) >> anyLine optional blanklines - return $ unlines lns + return $ T.unlines lns -codeBlockStart :: Monad m => ParserT [Char] st m Char +codeBlockStart :: Monad m => ParserT Text st m Char codeBlockStart = string "::" >> blankline >> blankline -codeBlock :: (HasReaderOptions st, Monad m) => ParserT [Char] st m Blocks +codeBlock :: (HasReaderOptions st, Monad m) => ParserT Text st m Blocks codeBlock = try $ codeBlockStart >> codeBlockBody -codeBlockBody :: (HasReaderOptions st, Monad m) => ParserT [Char] st m Blocks +codeBlockBody :: (HasReaderOptions st, Monad m) => ParserT Text st m Blocks codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$> (indentedBlock <|> quotedBlock) @@ -407,24 +407,24 @@ lhsCodeBlock = try $ do lns <- latexCodeBlock <|> birdCodeBlock blanklines return $ B.codeBlockWith ("", ["haskell","literate"], []) - $ intercalate "\n" lns + $ T.intercalate "\n" lns -latexCodeBlock :: Monad m => ParserT [Char] st m [[Char]] +latexCodeBlock :: Monad m => ParserT Text st m [Text] latexCodeBlock = try $ do try (latexBlockLine "\\begin{code}") many1Till anyLine (try $ latexBlockLine "\\end{code}") where latexBlockLine s = skipMany spaceChar >> string s >> blankline -birdCodeBlock :: Monad m => ParserT [Char] st m [[Char]] +birdCodeBlock :: Monad m => ParserT Text st m [Text] birdCodeBlock = filterSpace <$> many1 birdTrackLine where filterSpace lns = -- if (as is normal) there is always a space after >, drop it - if all (\ln -> null ln || take 1 ln == " ") lns - then map (drop 1) lns + if all (\ln -> T.null ln || T.take 1 ln == " ") lns + then map (T.drop 1) lns else lns -birdTrackLine :: Monad m => ParserT [Char] st m [Char] +birdTrackLine :: Monad m => ParserT Text st m Text birdTrackLine = char '>' >> anyLine -- @@ -435,7 +435,7 @@ blockQuote :: PandocMonad m => RSTParser m Blocks blockQuote = do raw <- indentedBlock -- parse the extracted block, which may contain various block elements: - contents <- parseFromString' parseBlocks $ raw ++ "\n\n" + contents <- parseFromString' parseBlocks $ raw <> "\n\n" return $ B.blockQuote contents {- @@ -445,12 +445,12 @@ encoding -} includeDirective :: PandocMonad m - => String -> [(String, String)] -> String + => Text -> [(Text, Text)] -> Text -> RSTParser m Blocks includeDirective top fields body = do let f = trim top - guard $ not (null f) - guard $ null (trim body) + guard $ not (T.null f) + guard $ T.null (trim body) -- options let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead @@ -458,11 +458,11 @@ includeDirective top fields body = do oldInput <- getInput containers <- stateContainers <$> getState when (f `elem` containers) $ - throwError $ PandocParseError $ "Include file loop at " ++ show oldPos + throwError $ PandocParseError $ "Include file loop at " <> tshow oldPos updateState $ \s -> s{ stateContainers = f : stateContainers s } - mbContents <- readFileFromDirs ["."] f + mbContents <- readFileFromDirs ["."] $ T.unpack f contentLines <- case mbContents of - Just s -> return $ lines s + Just s -> return $ T.lines s Nothing -> do logMessage $ CouldNotLoadIncludeFile f oldPos return [] @@ -478,23 +478,23 @@ includeDirective top fields body = do let contentLines' = drop (startLine' - 1) $ take (endLine' - 1) contentLines let contentLines'' = (case trim <$> lookup "end-before" fields of - Just patt -> takeWhile (not . (patt `isInfixOf`)) + Just patt -> takeWhile (not . (patt `T.isInfixOf`)) Nothing -> id) . (case trim <$> lookup "start-after" fields of Just patt -> drop 1 . - dropWhile (not . (patt `isInfixOf`)) + dropWhile (not . (patt `T.isInfixOf`)) Nothing -> id) $ contentLines' - let contents' = unlines contentLines'' ++ "\n" + let contents' = T.unlines contentLines'' <> "\n" case lookup "code" fields of Just lang -> do let numberLines = lookup "number-lines" fields - let classes = maybe [] words (lookup "class" fields) + let classes = maybe [] T.words (lookup "class" fields) let ident = maybe "" trimr $ lookup "name" fields codeblock ident classes numberLines (trimr lang) contents' False Nothing -> case lookup "literal" fields of Just _ -> return $ B.rawBlock "rst" contents' Nothing -> do - setPosition $ newPos f 1 1 + setPosition $ newPos (T.unpack f) 1 1 setInput contents' bs <- optional blanklines >> (mconcat <$> many block) @@ -519,14 +519,14 @@ definitionListItem = try $ do term <- trimInlines . mconcat <$> many1Till inline endline raw <- indentedBlock -- parse the extracted block, which may contain various block elements: - contents <- parseFromString' parseBlocks $ raw ++ "\n" + contents <- parseFromString' parseBlocks $ raw <> "\n" return (term, [contents]) definitionList :: PandocMonad m => RSTParser m Blocks definitionList = B.definitionList <$> many1 definitionListItem -- parses bullet list start and returns its length (inc. following whitespace) -bulletListStart :: Monad m => ParserT [Char] st m Int +bulletListStart :: Monad m => ParserT Text st m Int bulletListStart = try $ do notFollowedBy' hrule -- because hrules start out just like lists marker <- oneOf bulletListMarkers @@ -543,7 +543,7 @@ orderedListStart style delim = try $ do return $ markerLen + length white -- parse a line of a list item -listLine :: Monad m => Int -> RSTParser m [Char] +listLine :: Monad m => Int -> RSTParser m Text listLine markerLength = try $ do notFollowedBy blankline indentWith markerLength @@ -551,21 +551,21 @@ listLine markerLength = try $ do -- parse raw text for one list item, excluding start marker and continuations rawListItem :: Monad m => RSTParser m Int - -> RSTParser m (Int, [Char]) + -> RSTParser m (Int, Text) rawListItem start = try $ do markerLength <- start firstLine <- anyLineNewline restLines <- many (listLine markerLength) - return (markerLength, firstLine ++ concat restLines) + return (markerLength, firstLine <> T.concat restLines) -- continuation of a list item - indented and separated by blankline or -- (in compact lists) endline. -- Note: nested lists are parsed as continuations. -listContinuation :: Monad m => Int -> RSTParser m [Char] +listContinuation :: Monad m => Int -> RSTParser m Text listContinuation markerLength = try $ do - blanks <- many1 blankline + blanks <- many1Char blankline result <- many1 (listLine markerLength) - return $ blanks ++ concat result + return $ blanks <> T.concat result listItem :: PandocMonad m => RSTParser m Int @@ -581,7 +581,7 @@ listItem start = try $ do let oldContext = stateParserContext state setState $ state {stateParserContext = ListItemState} -- parse the extracted block, which may itself contain block elements - parsed <- parseFromString' parseBlocks $ concat (first:rest) ++ "\n" + parsed <- parseFromString' parseBlocks $ T.concat (first:rest) <> "\n" updateState (\st -> st {stateParserContext = oldContext}) return $ case B.toList parsed of [Para xs] -> @@ -617,9 +617,9 @@ comment = try $ do optional indentedBlock return mempty -directiveLabel :: Monad m => RSTParser m String -directiveLabel = map toLower - <$> many1Till (letter <|> char '-') (try $ string "::") +directiveLabel :: Monad m => RSTParser m Text +directiveLabel = T.toLower + <$> many1TillChar (letter <|> char '-') (try $ string "::") directive :: PandocMonad m => RSTParser m Blocks directive = try $ do @@ -631,7 +631,7 @@ directive' = do skipMany1 spaceChar label <- directiveLabel skipMany spaceChar - top <- many $ satisfy (/='\n') + top <- manyChar $ satisfy (/='\n') <|> try (char '\n' <* notFollowedBy' (rawFieldListItem 1) <* many1 (char ' ') <* @@ -644,35 +644,33 @@ directive' = do else many $ rawFieldListItem fieldIndent body <- option "" $ try $ blanklines >> indentedBlock optional blanklines - let body' = body ++ "\n\n" + let body' = body <> "\n\n" name = trim $ fromMaybe "" (lookup "name" fields) - classes = words $ maybe "" trim (lookup "class" fields) + classes = T.words $ maybe "" trim (lookup "class" fields) keyvals = [(k, trim v) | (k, v) <- fields, k /= "name", k /= "class"] imgAttr cl = (name, classes ++ alignClasses, widthAttr ++ heightAttr) where - alignClasses = words $ maybe "" trim (lookup cl fields) ++ - maybe "" (\x -> "align-" ++ trim x) + alignClasses = T.words $ maybe "" trim (lookup cl fields) <> + maybe "" (\x -> "align-" <> trim x) (lookup "align" fields) scale = case trim <$> lookup "scale" fields of - Just v -> case reverse v of - '%':vv -> - case safeRead (reverse vv) of - Just (percent :: Double) - -> percent / 100.0 - Nothing -> 1.0 - _ -> - case safeRead v of - Just (s :: Double) -> s - Nothing -> 1.0 - Nothing -> 1.0 + Just v -> case T.unsnoc v of + Just (vv, '%') -> case safeRead vv of + Just (percent :: Double) + -> percent / 100.0 + Nothing -> 1.0 + _ -> case safeRead v of + Just (s :: Double) -> s + Nothing -> 1.0 + Nothing -> 1.0 widthAttr = maybe [] (\x -> [("width", - show $ scaleDimension scale x)]) + tshow $ scaleDimension scale x)]) $ lookup "width" fields >>= - (lengthToDim . filter (not . isSpace)) + (lengthToDim . T.filter (not . isSpace)) heightAttr = maybe [] (\x -> [("height", - show $ scaleDimension scale x)]) + tshow $ scaleDimension scale x)]) $ lookup "height" fields >>= - (lengthToDim . filter (not . isSpace)) + (lengthToDim . T.filter (not . isSpace)) case label of "include" -> includeDirective top fields body' "table" -> tableDirective top fields body' @@ -682,36 +680,37 @@ directive' = do "raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body) "role" -> addNewRole top $ map (second trim) fields "container" -> B.divWith - (name, "container" : words top ++ classes, []) <$> + (name, "container" : T.words top ++ classes, []) <$> parseFromString' parseBlocks body' "replace" -> B.para <$> -- consumed by substKey - parseInlineFromString (trim top) + parseInlineFromText (trim top) "unicode" -> B.para <$> -- consumed by substKey - parseInlineFromString (trim $ unicodeTransform top) + parseInlineFromText (trim $ unicodeTransform top) "compound" -> parseFromString' parseBlocks body' "pull-quote" -> B.blockQuote <$> parseFromString' parseBlocks body' "epigraph" -> B.blockQuote <$> parseFromString' parseBlocks body' "highlights" -> B.blockQuote <$> parseFromString' parseBlocks body' - "rubric" -> B.para . B.strong <$> parseInlineFromString top + "rubric" -> B.para . B.strong <$> parseInlineFromText top _ | label `elem` ["attention","caution","danger","error","hint", "important","note","tip","warning","admonition"] -> - do bod <- parseFromString' parseBlocks $ top ++ "\n\n" ++ body' + do bod <- parseFromString' parseBlocks $ top <> "\n\n" <> body' let lab = case label of - "admonition" -> mempty - (l:ls) -> B.divWith ("",["title"],[]) - (B.para (B.str (toUpper l : ls))) - [] -> mempty + "admonition" -> mempty + (T.uncons -> Just (l, ls)) + -> B.divWith ("",["title"],[]) + (B.para (B.str $ T.cons (toUpper l) ls)) + _ -> mempty return $ B.divWith (name,label:classes,keyvals) (lab <> bod) "sidebar" -> do let subtit = maybe "" trim $ lookup "subtitle" fields - tit <- B.para . B.strong <$> parseInlineFromString - (trim top ++ if null subtit + tit <- B.para . B.strong <$> parseInlineFromText + (trim top <> if T.null subtit then "" - else (": " ++ subtit)) + else (": " <> subtit)) bod <- parseFromString' parseBlocks body' return $ B.divWith (name,"sidebar":classes,keyvals) $ tit <> bod "topic" -> - do tit <- B.para . B.strong <$> parseInlineFromString top + do tit <- B.para . B.strong <$> parseInlineFromText top bod <- parseFromString' parseBlocks body' return $ B.divWith (name,"topic":classes,keyvals) $ tit <> bod "default-role" -> mempty <$ updateState (\s -> @@ -726,7 +725,7 @@ directive' = do let attribs = (name, ["aafig"], map (second trimr) fields) return $ B.codeBlockWith attribs $ stripTrailingNewlines body "math" -> return $ B.para $ mconcat $ map B.displayMath - $ toChunks $ top ++ "\n\n" ++ body + $ toChunks $ top <> "\n\n" <> body "figure" -> do (caption, legend) <- parseFromString' extractCaption body' let src = escapeURI $ trim top @@ -742,7 +741,7 @@ directive' = do $ B.imageWith attr src "" alt Nothing -> B.imageWith attr src "" alt "class" -> do - let attrs = (name, words (trim top), map (second trimr) fields) + let attrs = (name, T.words (trim top), map (second trimr) fields) -- directive content or the first immediately following element children <- case body of "" -> block @@ -750,12 +749,12 @@ directive' = do return $ B.divWith attrs children other -> do pos <- getPosition - logMessage $ SkippedContent (".. " ++ other) pos - bod <- parseFromString' parseBlocks $ top ++ "\n\n" ++ body' + logMessage $ SkippedContent (".. " <> other) pos + bod <- parseFromString' parseBlocks $ top <> "\n\n" <> body' return $ B.divWith (name, other:classes, keyvals) bod tableDirective :: PandocMonad m - => String -> [(String, String)] -> String -> RSTParser m Blocks + => Text -> [(Text, Text)] -> Text -> RSTParser m Blocks tableDirective top fields body = do bs <- parseFromString' parseBlocks body case B.toList bs of @@ -770,7 +769,7 @@ tableDirective top fields body = do Just "grid" -> widths' Just specs -> normWidths $ map (fromMaybe (0 :: Double) . safeRead) - $ splitBy (`elem` (" ," :: String)) specs + $ splitTextBy (`elem` (" ," :: String)) specs Nothing -> widths' -- align is not applicable since we can't represent whole table align return $ B.singleton $ Table (B.toList title) @@ -783,7 +782,7 @@ tableDirective top fields body = do -- since Pandoc doesn't support a table with multiple header rows. -- We don't need to parse :align: as it represents the whole table align. listTableDirective :: PandocMonad m - => String -> [(String, String)] -> String + => Text -> [(Text, Text)] -> Text -> RSTParser m Blocks listTableDirective top fields body = do bs <- parseFromString' parseBlocks body @@ -799,7 +798,7 @@ listTableDirective top fields body = do widths = case trim <$> lookup "widths" fields of Just "auto" -> replicate numOfCols 0 Just specs -> normWidths $ map (fromMaybe (0 :: Double) . safeRead) $ - splitBy (`elem` (" ," :: String)) specs + splitTextBy (`elem` (" ," :: String)) specs _ -> replicate numOfCols 0 return $ B.table title (zip (replicate numOfCols AlignDefault) widths) @@ -812,7 +811,7 @@ listTableDirective top fields body = do normWidths ws = map (/ max 1 (sum ws)) ws csvTableDirective :: PandocMonad m - => String -> [(String, String)] -> String + => Text -> [(Text, Text)] -> Text -> RSTParser m Blocks csvTableDirective top fields rawcsv = do let explicitHeader = trim <$> lookup "header" fields @@ -820,14 +819,17 @@ csvTableDirective top fields rawcsv = do csvDelim = case trim <$> lookup "delim" fields of Just "tab" -> '\t' Just "space" -> ' ' - Just [c] -> c + Just (T.unpack -> [c]) + -> c _ -> ',' , csvQuote = case trim <$> lookup "quote" fields of - Just [c] -> c - _ -> '"' + Just (T.unpack -> [c]) + -> c + _ -> '"' , csvEscape = case trim <$> lookup "escape" fields of - Just [c] -> Just c - _ -> Nothing + Just (T.unpack -> [c]) + -> Just c + _ -> Nothing , csvKeepSpace = case trim <$> lookup "keepspace" fields of Just "true" -> True _ -> False @@ -840,16 +842,16 @@ csvTableDirective top fields rawcsv = do lookup "file" fields `mplus` lookup "url" fields of Just u -> do (bs, _) <- fetchItem u - return $ UTF8.toString bs + return $ UTF8.toText bs Nothing -> return rawcsv - let res = parseCSV opts (T.pack $ case explicitHeader of - Just h -> h ++ "\n" ++ rawcsv' - Nothing -> rawcsv') + let res = parseCSV opts (case explicitHeader of + Just h -> h <> "\n" <> rawcsv' + Nothing -> rawcsv') case res of Left e -> throwError $ PandocParsecError "csv table" e Right rawrows -> do - let parseCell = parseFromString' (plain <|> return mempty) . T.unpack + let parseCell = parseFromString' (plain <|> return mempty) let parseRow = mapM parseCell rows <- mapM parseRow rawrows let (headerRow,bodyRows,numOfCols) = @@ -865,7 +867,7 @@ csvTableDirective top fields rawcsv = do Just "auto" -> replicate numOfCols 0 Just specs -> normWidths $ map (fromMaybe (0 :: Double) . safeRead) - $ splitBy (`elem` (" ," :: String)) specs + $ splitTextBy (`elem` (" ," :: String)) specs _ -> replicate numOfCols 0 return $ B.table title (zip (replicate numOfCols AlignDefault) widths) @@ -876,10 +878,10 @@ csvTableDirective top fields rawcsv = do -- - Only supports :format: fields with a single format for :raw: roles, -- change Text.Pandoc.Definition.Format to fix addNewRole :: PandocMonad m - => String -> [(String, String)] -> RSTParser m Blocks -addNewRole roleString fields = do + => Text -> [(Text, Text)] -> RSTParser m Blocks +addNewRole roleText fields = do pos <- getPosition - (role, parentRole) <- parseFromString' inheritedRole roleString + (role, parentRole) <- parseFromString' inheritedRole roleText customRoles <- stateRstCustomRoles <$> getState let getBaseRole (r, f, a) roles = case M.lookup r roles of @@ -888,7 +890,7 @@ addNewRole roleString fields = do (baseRole, baseFmt, baseAttr) = getBaseRole (parentRole, Nothing, nullAttr) customRoles fmt = if parentRole == "raw" then lookup "format" fields else baseFmt - annotate :: [String] -> [String] + annotate :: [Text] -> [Text] annotate = maybe id (:) $ if baseRole == "code" then lookup "language" fields @@ -904,7 +906,7 @@ addNewRole roleString fields = do pos "format" -> when (baseRole /= "raw") $ logMessage $ SkippedContent ":format: [because parent of role is not :raw:]" pos - _ -> logMessage $ SkippedContent (":" ++ key ++ ":") pos + _ -> logMessage $ SkippedContent (":" <> key <> ":") pos when (parentRole == "raw" && countKeys "format" > 1) $ logMessage $ SkippedContent ":format: [after first in definition of role]" @@ -930,30 +932,29 @@ addNewRole roleString fields = do -- hexadecimal numbers, prefixed by 0x, x, \x, U+, u, or \u -- or as XML-style hexadecimal character entities, e.g. ᨫ -- or text, which is used as-is. Comments start with .. -unicodeTransform :: String -> String -unicodeTransform t = - case t of - ('.':'.':xs) -> unicodeTransform $ dropWhile (/='\n') xs -- comment - ('0':'x':xs) -> go "0x" xs - ('x':xs) -> go "x" xs - ('\\':'x':xs) -> go "\\x" xs - ('U':'+':xs) -> go "U+" xs - ('u':xs) -> go "u" xs - ('\\':'u':xs) -> go "\\u" xs - ('&':'#':'x':xs) -> maybe ("&#x" ++ unicodeTransform xs) - -- drop semicolon - (\(c,s) -> c : unicodeTransform (drop 1 s)) - $ extractUnicodeChar xs - (x:xs) -> x : unicodeTransform xs - [] -> [] - where go pref zs = maybe (pref ++ unicodeTransform zs) - (\(c,s) -> c : unicodeTransform s) - $ extractUnicodeChar zs - -extractUnicodeChar :: String -> Maybe (Char, String) +unicodeTransform :: Text -> Text +unicodeTransform t + | Just xs <- T.stripPrefix ".." t = unicodeTransform $ T.dropWhile (/= '\n') xs -- comment + | Just xs <- T.stripPrefix "0x" t = go "0x" xs + | Just xs <- T.stripPrefix "x" t = go "x" xs + | Just xs <- T.stripPrefix "\\x" t = go "\\x" xs + | Just xs <- T.stripPrefix "U+" t = go "U+" xs + | Just xs <- T.stripPrefix "u" t = go "u" xs + | Just xs <- T.stripPrefix "\\u" t = go "\\u" xs + | Just xs <- T.stripPrefix "&#x" t = maybe ("&#x" <> unicodeTransform xs) + -- drop semicolon + (\(c,s) -> T.cons c $ unicodeTransform $ T.drop 1 s) + $ extractUnicodeChar xs + | Just (x, xs) <- T.uncons t = T.cons x $ unicodeTransform xs + | otherwise = "" + where go pref zs = maybe (pref <> unicodeTransform zs) + (\(c,s) -> T.cons c $ unicodeTransform s) + $ extractUnicodeChar zs + +extractUnicodeChar :: Text -> Maybe (Char, Text) extractUnicodeChar s = maybe Nothing (\c -> Just (c,rest)) mbc - where (ds,rest) = span isHexDigit s - mbc = safeRead ('\'':'\\':'x':ds ++ "'") + where (ds,rest) = T.span isHexDigit s + mbc = safeRead ("'\\x" <> ds <> "'") extractCaption :: PandocMonad m => RSTParser m (Inlines, Blocks) extractCaption = do @@ -963,16 +964,16 @@ extractCaption = do -- divide string by blanklines, and surround with -- \begin{aligned}...\end{aligned} if needed. -toChunks :: String -> [String] -toChunks = dropWhile null - . map (addAligned . trim . unlines) - . splitBy (all (`elem` (" \t" :: String))) . lines +toChunks :: Text -> [Text] +toChunks = dropWhile T.null + . map (addAligned . trim . T.unlines) + . splitBy (T.all (`elem` (" \t" :: String))) . T.lines -- we put this in an aligned environment if it contains \\, see #4254 - where addAligned s = if "\\\\" `isInfixOf` s - then "\\begin{aligned}\n" ++ s ++ "\n\\end{aligned}" + where addAligned s = if "\\\\" `T.isInfixOf` s + then "\\begin{aligned}\n" <> s <> "\n\\end{aligned}" else s -codeblock :: String -> [String] -> Maybe String -> String -> String -> Bool +codeblock :: Text -> [Text] -> Maybe Text -> Text -> Text -> Bool -> RSTParser m Blocks codeblock ident classes numberLines lang body rmTrailingNewlines = return $ B.codeBlockWith attribs $ stripTrailingNewlines' body @@ -984,7 +985,7 @@ codeblock ident classes numberLines lang body rmTrailingNewlines = : maybe [] (const ["numberLines"]) numberLines ++ classes kvs = maybe [] (\n -> case trimr n of - [] -> [] + "" -> [] xs -> [("startFrom", xs)]) numberLines @@ -992,25 +993,25 @@ codeblock ident classes numberLines lang body rmTrailingNewlines = --- note block --- -noteBlock :: Monad m => RSTParser m [Char] +noteBlock :: Monad m => RSTParser m Text noteBlock = try $ do (ref, raw, replacement) <- noteBlock' noteMarker updateState $ \s -> s { stateNotes = (ref, raw) : stateNotes s } -- return blanks so line count isn't affected return replacement -citationBlock :: Monad m => RSTParser m [Char] +citationBlock :: Monad m => RSTParser m Text citationBlock = try $ do (ref, raw, replacement) <- noteBlock' citationMarker updateState $ \s -> s { stateCitations = M.insert ref raw (stateCitations s), - stateKeys = M.insert (toKey ref) (('#':ref,""), ("",["citation"],[])) + stateKeys = M.insert (toKey ref) (("#" <> ref,""), ("",["citation"],[])) (stateKeys s) } -- return blanks so line count isn't affected return replacement noteBlock' :: Monad m - => RSTParser m String -> RSTParser m (String, String, String) + => RSTParser m Text -> RSTParser m (Text, Text, Text) noteBlock' marker = try $ do startPos <- getPosition string ".." @@ -1021,24 +1022,24 @@ noteBlock' marker = try $ do blanks <- option "" blanklines rest <- option "" indentedBlock endPos <- getPosition - let raw = first ++ "\n" ++ blanks ++ rest ++ "\n" - let replacement =replicate (sourceLine endPos - sourceLine startPos) '\n' + let raw = first <> "\n" <> blanks <> rest <> "\n" + let replacement = T.replicate (sourceLine endPos - sourceLine startPos) "\n" return (ref, raw, replacement) -citationMarker :: Monad m => RSTParser m [Char] +citationMarker :: Monad m => RSTParser m Text citationMarker = do char '[' res <- simpleReferenceName char ']' return res -noteMarker :: Monad m => RSTParser m [Char] +noteMarker :: Monad m => RSTParser m Text noteMarker = do char '[' - res <- many1 digit + res <- many1Char digit <|> - try (char '#' >> liftM ('#':) simpleReferenceName) - <|> count 1 (oneOf "#*") + try (char '#' >> liftM ("#" <>) simpleReferenceName) + <|> countChar 1 (oneOf "#*") char ']' return res @@ -1046,47 +1047,48 @@ noteMarker = do -- reference key -- -quotedReferenceName :: PandocMonad m => RSTParser m String +quotedReferenceName :: PandocMonad m => RSTParser m Text quotedReferenceName = try $ do char '`' >> notFollowedBy (char '`') -- `` means inline code! - manyTill anyChar (char '`') + manyTillChar anyChar (char '`') -- Simple reference names are single words consisting of alphanumerics -- plus isolated (no two adjacent) internal hyphens, underscores, -- periods, colons and plus signs; no whitespace or other characters -- are allowed. -simpleReferenceName :: Monad m => ParserT [Char] st m String +simpleReferenceName :: Monad m => ParserT Text st m Text simpleReferenceName = do x <- alphaNum xs <- many $ alphaNum <|> try (oneOf "-_:+." <* lookAhead alphaNum) - return (x:xs) + return $ T.pack (x:xs) -referenceName :: PandocMonad m => RSTParser m String +referenceName :: PandocMonad m => RSTParser m Text referenceName = quotedReferenceName <|> simpleReferenceName -referenceKey :: PandocMonad m => RSTParser m [Char] +referenceKey :: PandocMonad m => RSTParser m Text referenceKey = do startPos <- getPosition choice [substKey, anonymousKey, regularKey] optional blanklines endPos <- getPosition -- return enough blanks to replace key - return $ replicate (sourceLine endPos - sourceLine startPos) '\n' + return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n" -targetURI :: Monad m => ParserT [Char] st m [Char] +targetURI :: Monad m => ParserT Text st m Text targetURI = do skipSpaces optional $ try $ newline >> notFollowedBy blankline contents <- trim <$> - many1 (satisfy (/='\n') + many1Char (satisfy (/='\n') <|> try (newline >> many1 spaceChar >> noneOf " \t\n")) blanklines - case reverse contents of - -- strip backticks - '_':'`':xs -> return (dropWhile (=='`') (reverse xs) ++ "_") - '_':_ -> return contents - _ -> return (escapeURI contents) + return $ stripBackticks contents + where + stripBackticks t + | Just xs <- T.stripSuffix "`_" t = T.dropWhile (=='`') xs <> "_" + | Just _ <- T.stripSuffix "_" t = t + | otherwise = escapeURI t substKey :: PandocMonad m => RSTParser m () substKey = try $ do @@ -1112,21 +1114,21 @@ anonymousKey = try $ do oneOfStrings [".. __:", "__"] src <- targetURI pos <- getPosition - let key = toKey $ "_" ++ printf "%09d" (sourceLine pos) + let key = toKey $ "_" <> T.pack (printf "%09d" (sourceLine pos)) updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s } -referenceNames :: PandocMonad m => RSTParser m [String] +referenceNames :: PandocMonad m => RSTParser m [Text] referenceNames = do let rn = try $ do string ".. _" ref <- quotedReferenceName - <|> many ( noneOf ":\n" - <|> try (char '\n' <* - string " " <* - notFollowedBy blankline) - <|> try (char ':' <* lookAhead alphaNum) - ) + <|> manyChar ( noneOf ":\n" + <|> try (char '\n' <* + string " " <* + notFollowedBy blankline) + <|> try (char ':' <* lookAhead alphaNum) + ) char ':' return ref first <- rn @@ -1140,18 +1142,18 @@ regularKey = try $ do -- .. _goodbye: url.com refs <- referenceNames src <- targetURI - guard $ not (null src) + guard $ not (T.null src) let keys = map toKey refs forM_ keys $ \key -> updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s } -anchorDef :: PandocMonad m => RSTParser m [Char] +anchorDef :: PandocMonad m => RSTParser m Text anchorDef = try $ do (refs, raw) <- withRaw $ try (referenceNames <* blanklines) forM_ refs $ \rawkey -> updateState $ \s -> s { stateKeys = - M.insert (toKey rawkey) (('#':rawkey,""), nullAttr) $ stateKeys s } + M.insert (toKey rawkey) (("#" <> rawkey,""), nullAttr) $ stateKeys s } -- keep this for 2nd round of parsing, where we'll add the divs (anchor) return raw @@ -1174,12 +1176,12 @@ anchor = try $ do -- because it hides them from promoteHeader, see #4240 _ -> return $ foldr addDiv b refs -headerBlock :: PandocMonad m => RSTParser m [Char] +headerBlock :: PandocMonad m => RSTParser m Text headerBlock = do ((txt, _), raw) <- withRaw (doubleHeader' <|> singleHeader') (ident,_,_) <- registerHeader nullAttr txt let key = toKey (stringify txt) - updateState $ \s -> s { stateKeys = M.insert key (('#':ident,""), nullAttr) + updateState $ \s -> s { stateKeys = M.insert key (("#" <> ident,""), nullAttr) $ stateKeys s } return raw @@ -1201,13 +1203,13 @@ headerBlock = do -- Grid tables TODO: -- - column spans -dashedLine :: Monad m => Char -> ParserT [Char] st m (Int, Int) +dashedLine :: Monad m => Char -> ParserT Text st m (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many (char ' ') return (length dashes, length $ dashes ++ sp) -simpleDashedLines :: Monad m => Char -> ParserT [Char] st m [(Int,Int)] +simpleDashedLines :: Monad m => Char -> ParserT Text st m [(Int,Int)] simpleDashedLines ch = try $ many1 (dashedLine ch) -- Parse a table row separator @@ -1215,17 +1217,17 @@ simpleTableSep :: Monad m => Char -> RSTParser m Char simpleTableSep ch = try $ simpleDashedLines ch >> newline -- Parse a table footer -simpleTableFooter :: Monad m => RSTParser m [Char] +simpleTableFooter :: Monad m => RSTParser m Text simpleTableFooter = try $ simpleTableSep '=' >> blanklines -- Parse a raw line and split it into chunks by indices. -simpleTableRawLine :: Monad m => [Int] -> RSTParser m [String] +simpleTableRawLine :: Monad m => [Int] -> RSTParser m [Text] simpleTableRawLine indices = simpleTableSplitLine indices <$> anyLine -simpleTableRawLineWithEmptyCell :: Monad m => [Int] -> RSTParser m [String] +simpleTableRawLineWithEmptyCell :: Monad m => [Int] -> RSTParser m [Text] simpleTableRawLineWithEmptyCell indices = try $ do cs <- simpleTableRawLine indices - let isEmptyCell = all (\c -> c == ' ' || c == '\t') + let isEmptyCell = T.all (\c -> c == ' ' || c == '\t') guard $ any isEmptyCell cs return cs @@ -1235,15 +1237,15 @@ simpleTableRow indices = do notFollowedBy' simpleTableFooter firstLine <- simpleTableRawLine indices conLines <- many $ simpleTableRawLineWithEmptyCell indices - let cols = map unlines . transpose $ firstLine : conLines ++ - [replicate (length indices) "" - | not (null conLines)] + let cols = map T.unlines . transpose $ firstLine : conLines ++ + [replicate (length indices) "" + | not (null conLines)] mapM (parseFromString' parseBlocks) cols -simpleTableSplitLine :: [Int] -> String -> [String] +simpleTableSplitLine :: [Int] -> Text -> [Text] simpleTableSplitLine indices line = map trimr - $ tail $ splitByIndices (init indices) line + $ tail $ splitTextByIndices (init indices) line simpleTableHeader :: PandocMonad m => Bool -- ^ Headerless table @@ -1322,35 +1324,35 @@ inlineContent = choice [ whitespace , escapedChar , symbol ] <?> "inline content" -parseInlineFromString :: PandocMonad m => String -> RSTParser m Inlines -parseInlineFromString = parseFromString' (trimInlines . mconcat <$> many inline) +parseInlineFromText :: PandocMonad m => Text -> RSTParser m Inlines +parseInlineFromText = parseFromString' (trimInlines . mconcat <$> many inline) hyphens :: Monad m => RSTParser m Inlines hyphens = do - result <- many1 (char '-') + result <- many1Char (char '-') optional endline -- don't want to treat endline after hyphen or dash as a space return $ B.str result -escapedChar :: Monad m => ParserT [Char] st m Inlines +escapedChar :: Monad m => ParserT Text st m Inlines escapedChar = do c <- escaped anyChar return $ if c == ' ' || c == '\n' || c == '\r' -- '\ ' is null in RST then mempty - else B.str [c] + else B.str $ T.singleton c symbol :: Monad m => RSTParser m Inlines symbol = do result <- oneOf specialChars - return $ B.str [result] + return $ B.str $ T.singleton result -- parses inline code, between codeStart and codeEnd code :: Monad m => RSTParser m Inlines code = try $ do string "``" - result <- manyTill anyChar (try (string "``")) + result <- manyTillChar anyChar (try (string "``")) return $ B.code - $ trim $ unwords $ lines result + $ trim $ T.unwords $ T.lines result -- succeeds only if we're not right after a str (ie. in middle of word) atStart :: Monad m => RSTParser m a -> RSTParser m a @@ -1382,7 +1384,7 @@ interpretedRole = try $ do renderRole contents Nothing role nullAttr renderRole :: PandocMonad m - => String -> Maybe String -> String -> Attr -> RSTParser m Inlines + => Text -> Maybe Text -> Text -> Attr -> RSTParser m Inlines renderRole contents fmt role attr = case role of "sup" -> return $ B.superscript $ treatAsText contents "superscript" -> return $ B.superscript $ treatAsText contents @@ -1412,36 +1414,36 @@ renderRole contents fmt role attr = case role of contents where titleRef ref = return $ B.spanWith ("",["title-ref"],[]) $ treatAsText ref - rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo) - where rfcUrl = "http://www.faqs.org/rfcs/rfc" ++ rfcNo ++ ".html" - pepLink pepNo = B.link pepUrl ("PEP " ++ pepNo) $ B.str ("PEP " ++ pepNo) - where padNo = replicate (4 - length pepNo) '0' ++ pepNo - pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/" + rfcLink rfcNo = B.link rfcUrl ("RFC " <> rfcNo) $ B.str ("RFC " <> rfcNo) + where rfcUrl = "http://www.faqs.org/rfcs/rfc" <> rfcNo <> ".html" + pepLink pepNo = B.link pepUrl ("PEP " <> pepNo) $ B.str ("PEP " <> pepNo) + where padNo = T.replicate (4 - T.length pepNo) "0" <> pepNo + pepUrl = "http://www.python.org/dev/peps/pep-" <> padNo <> "/" treatAsText = B.text . handleEscapes - handleEscapes [] = [] - handleEscapes ('\\':' ':cs) = handleEscapes cs - handleEscapes ('\\':c:cs) = c : handleEscapes cs - handleEscapes (c:cs) = c : handleEscapes cs + handleEscapes = T.concat . removeSpace . T.splitOn "\\" + where headSpace t = fromMaybe t $ T.stripPrefix " " t + removeSpace (x:xs) = x : map headSpace xs + removeSpace [] = [] -roleName :: PandocMonad m => RSTParser m String -roleName = many1 (letter <|> char '-') +roleName :: PandocMonad m => RSTParser m Text +roleName = many1Char (letter <|> char '-') -roleMarker :: PandocMonad m => RSTParser m String +roleMarker :: PandocMonad m => RSTParser m Text roleMarker = char ':' *> roleName <* char ':' -roleBefore :: PandocMonad m => RSTParser m (String,String) +roleBefore :: PandocMonad m => RSTParser m (Text,Text) roleBefore = try $ do role <- roleMarker contents <- unmarkedInterpretedText return (role,contents) -roleAfter :: PandocMonad m => RSTParser m (String,String) +roleAfter :: PandocMonad m => RSTParser m (Text,Text) roleAfter = try $ do contents <- unmarkedInterpretedText role <- roleMarker <|> (stateRstDefaultRole <$> getState) return (role,contents) -unmarkedInterpretedText :: PandocMonad m => RSTParser m [Char] +unmarkedInterpretedText :: PandocMonad m => RSTParser m Text unmarkedInterpretedText = try $ do atStart (char '`') contents <- mconcat <$> (many1 @@ -1453,7 +1455,7 @@ unmarkedInterpretedText = try $ do lookAhead (satisfy isAlphaNum)) )) char '`' - return contents + return $ T.pack contents whitespace :: PandocMonad m => RSTParser m Inlines whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace" @@ -1461,7 +1463,7 @@ whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace" str :: Monad m => RSTParser m Inlines str = do let strChar = noneOf ("\t\n " ++ specialChars) - result <- many1 strChar + result <- many1Char strChar updateLastStrPos return $ B.str result @@ -1489,7 +1491,7 @@ explicitLink = try $ do notFollowedBy (char '`') -- `` marks start of inline code label' <- trimInlines . mconcat <$> manyTill (notFollowedBy (char '`') >> inlineContent) (char '<') - src <- trim <$> manyTill (noneOf ">\n") (char '>') + src <- trim <$> manyTillChar (noneOf ">\n") (char '>') skipSpaces string "`_" optional $ char '_' -- anonymous form @@ -1501,22 +1503,22 @@ explicitLink = try $ do if isURI src then return ((src, ""), nullAttr) else - case reverse src of - '_':xs -> lookupKey [] (toKey (reverse xs)) - _ -> return ((src, ""), nullAttr) + case T.unsnoc src of + Just (xs, '_') -> lookupKey [] (toKey xs) + _ -> return ((src, ""), nullAttr) return $ B.linkWith attr (escapeURI src') tit label'' -citationName :: PandocMonad m => RSTParser m String +citationName :: PandocMonad m => RSTParser m Text citationName = do raw <- citationMarker - return $ "[" ++ raw ++ "]" + return $ "[" <> raw <> "]" referenceLink :: PandocMonad m => RSTParser m Inlines referenceLink = try $ do ref <- (referenceName <|> citationName) <* char '_' let label' = B.text ref - let isAnonKey (Key ('_':_)) = True - isAnonKey _ = False + let isAnonKey (Key (T.uncons -> Just ('_',_))) = True + isAnonKey _ = False state <- getState let keyTable = stateKeys state key <- option (toKey ref) $ @@ -1533,7 +1535,7 @@ referenceLink = try $ do -- We keep a list of oldkeys so we can detect lookup loops. lookupKey :: PandocMonad m - => [Key] -> Key -> RSTParser m ((String, String), Attr) + => [Key] -> Key -> RSTParser m ((Text, Text), Attr) lookupKey oldkeys key = do pos <- getPosition state <- getState @@ -1544,8 +1546,8 @@ lookupKey oldkeys key = do logMessage $ ReferenceNotFound key' pos return (("",""),nullAttr) -- check for keys of the form link_, which need to be resolved: - Just ((u@(c:_),""),_) | last u == '_', c /= '#' -> do - let rawkey = init u + Just ((u, ""),_) | T.length u > 1, T.last u == '_', T.head u /= '#' -> do + let rawkey = T.init u let newkey = toKey rawkey if newkey `elem` oldkeys then do @@ -1576,7 +1578,7 @@ subst = try $ do case M.lookup key substTable of Nothing -> do pos <- getPosition - logMessage $ ReferenceNotFound (show key) pos + logMessage $ ReferenceNotFound (tshow key) pos return mempty Just target -> return target diff --git a/src/Text/Pandoc/Readers/Roff.hs b/src/Text/Pandoc/Readers/Roff.hs index 6519587c6..73122cc14 100644 --- a/src/Text/Pandoc/Readers/Roff.hs +++ b/src/Text/Pandoc/Readers/Roff.hs @@ -1,5 +1,7 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.Roff Copyright : Copyright (C) 2018-2019 Yan Pashkovsky and John MacFarlane @@ -21,7 +23,7 @@ module Text.Pandoc.Readers.Roff , TableRow , RoffToken(..) , RoffTokens(..) - , linePartsToString + , linePartsToText , lexRoff ) where @@ -40,7 +42,7 @@ import qualified Data.Text as T import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Options import Text.Pandoc.Parsing -import Text.Pandoc.Shared (safeRead, substitute) +import Text.Pandoc.Shared (safeRead) import Text.Parsec hiding (tokenPrim) import Text.Pandoc.RoffChar (characterCodes, combiningAccents) import qualified Data.Sequence as Seq @@ -60,28 +62,28 @@ data FontSpec = FontSpec{ fontBold :: Bool defaultFontSpec :: FontSpec defaultFontSpec = FontSpec False False False -data LinePart = RoffStr String +data LinePart = RoffStr T.Text | Font FontSpec | MacroArg Int deriving Show type Arg = [LinePart] -type TableOption = (String, String) +type TableOption = (T.Text, T.Text) data CellFormat = CellFormat { columnType :: Char , pipePrefix :: Bool , pipeSuffix :: Bool - , columnSuffixes :: [String] + , columnSuffixes :: [T.Text] } deriving (Show, Eq, Ord) type TableRow = ([CellFormat], [RoffTokens]) data RoffToken = TextLine [LinePart] | EmptyLine - | ControlLine String [Arg] SourcePos + | ControlLine T.Text [Arg] SourcePos | Tbl [TableOption] [TableRow] SourcePos deriving Show @@ -95,7 +97,7 @@ data RoffMode = NormalMode | CopyMode deriving Show -data RoffState = RoffState { customMacros :: M.Map String RoffTokens +data RoffState = RoffState { customMacros :: M.Map T.Text RoffTokens , prevFont :: FontSpec , currentFont :: FontSpec , tableTabChar :: Char @@ -121,10 +123,10 @@ instance Default RoffState where , afterConditional = False } -type RoffLexer m = ParserT [Char] RoffState m +type RoffLexer m = ParserT T.Text RoffState m -- --- Lexer: String -> RoffToken +-- Lexer: T.Text -> RoffToken -- eofline :: Stream s m Char => ParsecT s u m () @@ -133,11 +135,11 @@ eofline = void newline <|> eof <|> () <$ lookAhead (string "\\}") spacetab :: Stream s m Char => ParsecT s u m Char spacetab = char ' ' <|> char '\t' -characterCodeMap :: M.Map String Char +characterCodeMap :: M.Map T.Text Char characterCodeMap = M.fromList $ map (\(x,y) -> (y,x)) characterCodes -combiningAccentsMap :: M.Map String Char +combiningAccentsMap :: M.Map T.Text Char combiningAccentsMap = M.fromList $ map (\(x,y) -> (y,x)) combiningAccents @@ -151,43 +153,40 @@ escapeGlyph = do c <- lookAhead (oneOf ['[','(']) escapeArg >>= resolveGlyph c -resolveGlyph :: PandocMonad m => Char -> String -> RoffLexer m [LinePart] +resolveGlyph :: PandocMonad m => Char -> T.Text -> RoffLexer m [LinePart] resolveGlyph delimChar glyph = do - let cs = substitute "_u" " u" glyph -- unicode glyphs separated by _ - (case words cs of + let cs = T.replace "_u" " u" glyph -- unicode glyphs separated by _ + (case T.words cs of [] -> mzero [s] -> case M.lookup s characterCodeMap `mplus` readUnicodeChar s of Nothing -> mzero - Just c -> return [RoffStr [c]] + Just c -> return [RoffStr $ T.singleton c] (s:ss) -> do basechar <- case M.lookup s characterCodeMap `mplus` readUnicodeChar s of Nothing -> - case s of + case T.unpack s of [ch] | isAscii ch && isAlphaNum ch -> return ch _ -> mzero Just c -> return c - let addAccents [] xs = return $ T.unpack . - Normalize.normalize Normalize.NFC . - T.pack $ reverse xs + let addAccents [] xs = return $ Normalize.normalize Normalize.NFC $ + T.reverse xs addAccents (a:as) xs = case M.lookup a combiningAccentsMap `mplus` readUnicodeChar a of - Just x -> addAccents as (x:xs) + Just x -> addAccents as $ T.cons x xs Nothing -> mzero - addAccents ss [basechar] >>= \xs -> return [RoffStr xs]) + addAccents ss (T.singleton basechar) >>= \xs -> return [RoffStr xs]) <|> case delimChar of - '[' -> escUnknown ("\\[" ++ glyph ++ "]") - '(' -> escUnknown ("\\(" ++ glyph) - '\'' -> escUnknown ("\\C'" ++ glyph ++ "'") + '[' -> escUnknown ("\\[" <> glyph <> "]") + '(' -> escUnknown ("\\(" <> glyph) + '\'' -> escUnknown ("\\C'" <> glyph <> "'") _ -> Prelude.fail "resolveGlyph: unknown glyph delimiter" -readUnicodeChar :: String -> Maybe Char -readUnicodeChar ('u':cs@(_:_:_:_:_)) = - case safeRead ('0':'x':cs) of - Just i -> Just (chr i) - Nothing -> Nothing -readUnicodeChar _ = Nothing +readUnicodeChar :: T.Text -> Maybe Char +readUnicodeChar t = case T.uncons t of + Just ('u', cs) | T.length cs > 3 -> chr <$> safeRead ("0x" <> cs) + _ -> Nothing escapeNormal :: PandocMonad m => RoffLexer m [LinePart] escapeNormal = do @@ -218,14 +217,14 @@ escapeNormal = do NormalMode -> return [RoffStr "\\"] 'H' -> escIgnore 'H' [quoteArg] 'L' -> escIgnore 'L' [quoteArg] - 'M' -> escIgnore 'M' [escapeArg, count 1 (satisfy (/='\n'))] + 'M' -> escIgnore 'M' [escapeArg, countChar 1 (satisfy (/='\n'))] 'N' -> escIgnore 'N' [quoteArg] - 'O' -> escIgnore 'O' [count 1 (oneOf ['0','1'])] + 'O' -> escIgnore 'O' [countChar 1 (oneOf ['0','1'])] 'R' -> escIgnore 'R' [quoteArg] 'S' -> escIgnore 'S' [quoteArg] - 'V' -> escIgnore 'V' [escapeArg, count 1 alphaNum] + 'V' -> escIgnore 'V' [escapeArg, countChar 1 alphaNum] 'X' -> escIgnore 'X' [quoteArg] - 'Y' -> escIgnore 'Y' [escapeArg, count 1 (satisfy (/='\n'))] + 'Y' -> escIgnore 'Y' [escapeArg, countChar 1 (satisfy (/='\n'))] 'Z' -> escIgnore 'Z' [quoteArg] '\'' -> return [RoffStr "`"] '\n' -> return mempty -- line continuation @@ -238,12 +237,12 @@ escapeNormal = do 'd' -> escIgnore 'd' [] -- forward down 1/2em 'e' -> return [RoffStr "\\"] 'f' -> escFont - 'g' -> escIgnore 'g' [escapeArg, count 1 (satisfy (/='\n'))] + 'g' -> escIgnore 'g' [escapeArg, countChar 1 (satisfy (/='\n'))] 'h' -> escIgnore 'h' [quoteArg] - 'k' -> escIgnore 'k' [escapeArg, count 1 (satisfy (/='\n'))] + 'k' -> escIgnore 'k' [escapeArg, countChar 1 (satisfy (/='\n'))] 'l' -> escIgnore 'l' [quoteArg] - 'm' -> escIgnore 'm' [escapeArg, count 1 (satisfy (/='\n'))] - 'n' -> escIgnore 'm' [escapeArg, count 1 (satisfy (/='\n'))] + 'm' -> escIgnore 'm' [escapeArg, countChar 1 (satisfy (/='\n'))] + 'n' -> escIgnore 'm' [escapeArg, countChar 1 (satisfy (/='\n'))] 'o' -> escIgnore 'o' [quoteArg] 'p' -> escIgnore 'p' [] 'r' -> escIgnore 'r' [] @@ -253,7 +252,7 @@ escapeNormal = do 'v' -> escIgnore 'v' [quoteArg] 'w' -> escIgnore 'w' [quoteArg] 'x' -> escIgnore 'x' [quoteArg] - 'z' -> escIgnore 'z' [count 1 anyChar] + 'z' -> escIgnore 'z' [countChar 1 anyChar] '|' -> return [RoffStr "\x2006"] --1/6 em space '~' -> return [RoffStr "\160"] -- nonbreaking space '\\' -> do @@ -262,40 +261,40 @@ escapeNormal = do CopyMode -> char '\\' NormalMode -> return '\\' return [RoffStr "\\"] - _ -> return [RoffStr [c]] + _ -> return [RoffStr $ T.singleton c] -- man 7 groff: "If a backslash is followed by a character that -- does not constitute a defined escape sequence, the backslash -- is silently ignored and the character maps to itself." escIgnore :: PandocMonad m => Char - -> [RoffLexer m String] + -> [RoffLexer m T.Text] -> RoffLexer m [LinePart] escIgnore c argparsers = do pos <- getPosition arg <- snd <$> withRaw (choice argparsers) <|> return "" - report $ SkippedContent ('\\':c:arg) pos + report $ SkippedContent ("\\" <> T.cons c arg) pos return mempty -escUnknown :: PandocMonad m => String -> RoffLexer m [LinePart] +escUnknown :: PandocMonad m => T.Text -> RoffLexer m [LinePart] escUnknown s = do pos <- getPosition report $ SkippedContent s pos return [RoffStr "\xFFFD"] -signedNumber :: PandocMonad m => RoffLexer m String +signedNumber :: PandocMonad m => RoffLexer m T.Text signedNumber = try $ do sign <- option "" ("-" <$ char '-' <|> "" <$ char '+') - ds <- many1 digit - return (sign ++ ds) + ds <- many1Char digit + return (sign <> ds) -- Parses: [..] or (.. -escapeArg :: PandocMonad m => RoffLexer m String +escapeArg :: PandocMonad m => RoffLexer m T.Text escapeArg = choice [ char '[' *> optional expandString *> - manyTill (noneOf ['\n',']']) (char ']') + manyTillChar (noneOf ['\n',']']) (char ']') , char '(' *> optional expandString *> - count 2 (satisfy (/='\n')) + countChar 2 (satisfy (/='\n')) ] expandString :: PandocMonad m => RoffLexer m () @@ -303,21 +302,21 @@ expandString = try $ do pos <- getPosition char '\\' char '*' - cs <- escapeArg <|> count 1 anyChar - s <- linePartsToString <$> resolveString cs pos - getInput >>= setInput . (s ++) + cs <- escapeArg <|> countChar 1 anyChar + s <- linePartsToText <$> resolveText cs pos + getInput >>= setInput . (s <>) return () -- Parses: '..' -quoteArg :: PandocMonad m => RoffLexer m String -quoteArg = char '\'' *> manyTill (noneOf ['\n','\'']) (char '\'') +quoteArg :: PandocMonad m => RoffLexer m T.Text +quoteArg = char '\'' *> manyTillChar (noneOf ['\n','\'']) (char '\'') escFont :: PandocMonad m => RoffLexer m [LinePart] escFont = do - font <- escapeArg <|> count 1 alphaNum - font' <- if null font || font == "P" + font <- escapeArg <|> countChar 1 alphaNum + font' <- if T.null font || font == "P" then prevFont <$> getState - else return $ foldr processFontLetter defaultFontSpec font + else return $ foldr processFontLetter defaultFontSpec $ T.unpack font modifyState $ \st -> st{ prevFont = currentFont st , currentFont = font' } return [Font font'] @@ -345,7 +344,7 @@ lexMacro = do guard $ sourceColumn pos == 1 || afterConditional st char '.' <|> char '\'' skipMany spacetab - macroName <- many (satisfy isAlphaNum) + macroName <- manyChar (satisfy isAlphaNum) case macroName of "nop" -> return mempty "ie" -> lexConditional "ie" @@ -374,8 +373,8 @@ lexTable pos = do spaces opts <- try tableOptions <|> [] <$ optional (char ';') case lookup "tab" opts of - Just (c:_) -> modifyState $ \st -> st{ tableTabChar = c } - _ -> modifyState $ \st -> st{ tableTabChar = '\t' } + Just (T.uncons -> Just (c, _)) -> modifyState $ \st -> st{ tableTabChar = c } + _ -> modifyState $ \st -> st{ tableTabChar = '\t' } spaces skipMany lexComment spaces @@ -388,7 +387,7 @@ lexTable pos = do string ".TE" skipMany spacetab eofline - return $ singleTok $ Tbl opts (rows ++ concat morerows) pos + return $ singleTok $ Tbl opts (rows <> concat morerows) pos lexTableRows :: PandocMonad m => RoffLexer m [TableRow] lexTableRows = do @@ -428,11 +427,11 @@ tableOptions = many1 tableOption <* spaces <* char ';' tableOption :: PandocMonad m => RoffLexer m TableOption tableOption = do - k <- many1 letter + k <- many1Char letter v <- option "" $ try $ do skipMany spacetab char '(' - manyTill anyChar (char ')') + manyTillChar anyChar (char ')') skipMany spacetab optional (char ',' >> skipMany spacetab) return (k,v) @@ -444,7 +443,7 @@ tableFormatSpec = do let speclines = first:rest spaces char '.' - return $ speclines ++ repeat (lastDef [] speclines) -- last line is default + return $ speclines <> repeat (lastDef [] speclines) -- last line is default tableFormatSpecLine :: PandocMonad m => RoffLexer m [CellFormat] tableFormatSpecLine = @@ -456,19 +455,19 @@ tableColFormat = do $ True <$ try (string "|" <* notFollowedBy spacetab) c <- oneOf ['a','A','c','C','l','L','n','N','r','R','s','S','^','_','-', '=','|'] - suffixes <- many $ try (skipMany spacetab *> count 1 digit) <|> + suffixes <- many $ try (skipMany spacetab *> countChar 1 digit) <|> (do x <- oneOf ['b','B','d','D','e','E','f','F','i','I','m','M', 'p','P','t','T','u','U','v','V','w','W','x','X', 'z','Z'] num <- case toLower x of 'w' -> many1 digit <|> (do char '(' xs <- manyTill anyChar (char ')') - return ("(" ++ xs ++ ")")) <|> + return ("(" <> xs <> ")")) <|> return "" 'f' -> count 1 alphaNum <* skipMany spacetab 'm' -> count 1 alphaNum <* skipMany spacetab _ -> return "" - return $ x : num) + return $ T.pack $ x : num) pipeSuffix' <- option False $ True <$ string "|" return $ CellFormat { columnType = c @@ -479,7 +478,7 @@ tableColFormat = do -- We don't fully handle the conditional. But we do -- include everything under '.ie n', which occurs commonly -- in man pages. -lexConditional :: PandocMonad m => String -> RoffLexer m RoffTokens +lexConditional :: PandocMonad m => T.Text -> RoffLexer m RoffTokens lexConditional mname = do pos <- getPosition skipMany spacetab @@ -498,7 +497,7 @@ lexConditional mname = do case mbtest of Nothing -> do putState st -- reset state, so we don't record macros in skipped section - report $ SkippedContent ('.':mname) pos + report $ SkippedContent (T.cons '.' mname) pos return mempty Just True -> return ifPart Just False -> do @@ -508,7 +507,7 @@ lexConditional mname = do expression :: PandocMonad m => RoffLexer m (Maybe Bool) expression = do raw <- charsInBalanced '(' ')' (satisfy (/= '\n')) - <|> many1 nonspaceChar + <|> many1Char nonspaceChar returnValue $ case raw of "1" -> Just True @@ -533,17 +532,17 @@ lexIncludeFile args = do pos <- getPosition case args of (f:_) -> do - let fp = linePartsToString f + let fp = linePartsToText f dirs <- getResourcePath - result <- readFileFromDirs dirs fp + result <- readFileFromDirs dirs $ T.unpack fp case result of Nothing -> report $ CouldNotLoadIncludeFile fp pos - Just s -> getInput >>= setInput . (s ++) + Just s -> getInput >>= setInput . (s <>) return mempty [] -> return mempty resolveMacro :: PandocMonad m - => String -> [Arg] -> SourcePos -> RoffLexer m RoffTokens + => T.Text -> [Arg] -> SourcePos -> RoffLexer m RoffTokens resolveMacro macroName args pos = do macros <- customMacros <$> getState case M.lookup macroName macros of @@ -552,7 +551,7 @@ resolveMacro macroName args pos = do let fillLP (MacroArg i) zs = case drop (i - 1) args of [] -> zs - (ys:_) -> ys ++ zs + (ys:_) -> ys <> zs fillLP z zs = z : zs let fillMacroArg (TextLine lineparts) = TextLine (foldr fillLP [] lineparts) @@ -565,7 +564,7 @@ lexStringDef args = do -- string definition [] -> Prelude.fail "No argument to .ds" (x:ys) -> do let ts = singleTok $ TextLine (intercalate [RoffStr " " ] ys) - let stringName = linePartsToString x + let stringName = linePartsToText x modifyState $ \st -> st{ customMacros = M.insert stringName ts (customMacros st) } return mempty @@ -575,14 +574,14 @@ lexMacroDef args = do -- macro definition modifyState $ \st -> st{ roffMode = CopyMode } (macroName, stopMacro) <- case args of - (x : y : _) -> return (linePartsToString x, linePartsToString y) + (x : y : _) -> return (linePartsToText x, linePartsToText y) -- optional second arg - (x:_) -> return (linePartsToString x, ".") + (x:_) -> return (linePartsToText x, ".") [] -> Prelude.fail "No argument to .de" let stop = try $ do char '.' <|> char '\'' skipMany spacetab - string stopMacro + textStr stopMacro _ <- lexArgs return () ts <- mconcat <$> manyTill manToken stop @@ -628,7 +627,7 @@ lexArgs = do char '"' return [RoffStr "\""] -checkDefined :: PandocMonad m => String -> RoffLexer m [LinePart] +checkDefined :: PandocMonad m => T.Text -> RoffLexer m [LinePart] checkDefined name = do macros <- customMacros <$> getState case M.lookup name macros of @@ -638,19 +637,19 @@ checkDefined name = do escString :: PandocMonad m => RoffLexer m [LinePart] escString = try $ do pos <- getPosition - (do cs <- escapeArg <|> count 1 anyChar - resolveString cs pos) + (do cs <- escapeArg <|> countChar 1 anyChar + resolveText cs pos) <|> mempty <$ char 'S' -- strings and macros share namespace -resolveString :: PandocMonad m - => String -> SourcePos -> RoffLexer m [LinePart] -resolveString stringname pos = do +resolveText :: PandocMonad m + => T.Text -> SourcePos -> RoffLexer m [LinePart] +resolveText stringname pos = do RoffTokens ts <- resolveMacro stringname [] pos case Foldable.toList ts of [TextLine xs] -> return xs _ -> do - report $ SkippedContent ("unknown string " ++ stringname) pos + report $ SkippedContent ("unknown string " <> stringname) pos return mempty lexLine :: PandocMonad m => RoffLexer m RoffTokens @@ -688,16 +687,16 @@ macroArg = try $ do pos <- getPosition backslash char '$' - x <- escapeArg <|> count 1 digit + x <- escapeArg <|> countChar 1 digit case safeRead x of Just i -> return [MacroArg i] Nothing -> do - report $ SkippedContent ("illegal macro argument " ++ x) pos + report $ SkippedContent ("illegal macro argument " <> x) pos return [] regularText :: PandocMonad m => RoffLexer m [LinePart] regularText = do - s <- many1 $ noneOf "\n\r\t \\\"" + s <- many1Char $ noneOf "\n\r\t \\\"" return [RoffStr s] quoteChar :: PandocMonad m => RoffLexer m [LinePart] @@ -708,7 +707,7 @@ quoteChar = do spaceTabChar :: PandocMonad m => RoffLexer m [LinePart] spaceTabChar = do c <- spacetab - return [RoffStr [c]] + return [RoffStr $ T.singleton c] lexEmptyLine :: PandocMonad m => RoffLexer m RoffTokens lexEmptyLine = newline >> return (singleTok EmptyLine) @@ -716,8 +715,8 @@ lexEmptyLine = newline >> return (singleTok EmptyLine) manToken :: PandocMonad m => RoffLexer m RoffTokens manToken = lexComment <|> lexMacro <|> lexLine <|> lexEmptyLine -linePartsToString :: [LinePart] -> String -linePartsToString = mconcat . map go +linePartsToText :: [LinePart] -> T.Text +linePartsToText = mconcat . map go where go (RoffStr s) = s go _ = mempty @@ -726,7 +725,7 @@ linePartsToString = mconcat . map go lexRoff :: PandocMonad m => SourcePos -> T.Text -> m RoffTokens lexRoff pos txt = do eithertokens <- readWithM (do setPosition pos - mconcat <$> many manToken) def (T.unpack txt) + mconcat <$> many manToken) def txt case eithertokens of Left e -> throwError e Right tokenz -> return tokenz diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs index 9796de4b9..d587bc41b 100644 --- a/src/Text/Pandoc/Readers/TWiki.hs +++ b/src/Text/Pandoc/Readers/TWiki.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RelaxedPolyRec #-} -- RelaxedPolyRec needed for inlinesBetween on GHC < 7 {- | @@ -31,7 +32,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (enclosed, nested) import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag) -import Text.Pandoc.Shared (crFilter) +import Text.Pandoc.Shared (crFilter, tshow) import Text.Pandoc.XML (fromEntities) -- | Read twiki from an input string and return a Pandoc document. @@ -41,19 +42,19 @@ readTWiki :: PandocMonad m -> m Pandoc readTWiki opts s = do res <- readWithM parseTWiki def{ stateOptions = opts } - (T.unpack (crFilter s) ++ "\n\n") + (crFilter s <> "\n\n") case res of Left e -> throwError e Right d -> return d -type TWParser = ParserT [Char] ParserState +type TWParser = ParserT Text ParserState -- -- utility functions -- -tryMsg :: String -> TWParser m a -> TWParser m a -tryMsg msg p = try p <?> msg +tryMsg :: Text -> TWParser m a -> TWParser m a +tryMsg msg p = try p <?> T.unpack msg nested :: PandocMonad m => TWParser m a -> TWParser m a nested p = do @@ -64,25 +65,25 @@ nested p = do updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } return res -htmlElement :: PandocMonad m => String -> TWParser m (Attr, String) +htmlElement :: PandocMonad m => Text -> TWParser m (Attr, Text) htmlElement tag = tryMsg tag $ do (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag []) - content <- manyTill anyChar (endtag <|> endofinput) + content <- T.pack <$> manyTill anyChar (endtag <|> endofinput) return (htmlAttrToPandoc attr, trim content) where endtag = void $ htmlTag (~== TagClose tag) endofinput = lookAhead $ try $ skipMany blankline >> skipSpaces >> eof - trim = dropWhile (=='\n') . reverse . dropWhile (=='\n') . reverse + trim = T.dropAround (=='\n') -htmlAttrToPandoc :: [Attribute String] -> Attr +htmlAttrToPandoc :: [Attribute Text] -> Attr htmlAttrToPandoc attrs = (ident, classes, keyvals) where ident = fromMaybe "" $ lookup "id" attrs - classes = maybe [] words $ lookup "class" attrs + classes = maybe [] T.words $ lookup "class" attrs keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] parseHtmlContentWithAttrs :: PandocMonad m - => String -> TWParser m a -> TWParser m (Attr, [a]) + => Text -> TWParser m a -> TWParser m (Attr, [a]) parseHtmlContentWithAttrs tag parser = do (attr, content) <- htmlElement tag parsedContent <- try $ parseContent content @@ -91,7 +92,13 @@ parseHtmlContentWithAttrs tag parser = do parseContent = parseFromString' $ nested $ manyTill parser endOfContent endOfContent = try $ skipMany blankline >> skipSpaces >> eof -parseHtmlContent :: PandocMonad m => String -> TWParser m a -> TWParser m [a] +parseCharHtmlContentWithAttrs :: PandocMonad m + => Text -> TWParser m Char -> TWParser m (Attr, Text) +parseCharHtmlContentWithAttrs tag = fmap go . parseHtmlContentWithAttrs tag + where + go (x, y) = (x, T.pack y) + +parseHtmlContent :: PandocMonad m => Text -> TWParser m a -> TWParser m [a] parseHtmlContent tag p = snd <$> parseHtmlContentWithAttrs tag p -- @@ -113,7 +120,7 @@ block = do <|> blockElements <|> para skipMany blankline - trace (take 60 $ show $ B.toList res) + trace (T.take 60 $ tshow $ B.toList res) return res blockElements :: PandocMonad m => TWParser m B.Blocks @@ -150,38 +157,38 @@ literal = rawBlock <$> htmlElement "literal" format (_, _, kvs) = fromMaybe "html" $ lookup "format" kvs rawBlock (attrs, content) = B.rawBlock (format attrs) content -list :: PandocMonad m => String -> TWParser m B.Blocks +list :: PandocMonad m => Text -> TWParser m B.Blocks list prefix = choice [ bulletList prefix , orderedList prefix , definitionList prefix] -definitionList :: PandocMonad m => String -> TWParser m B.Blocks +definitionList :: PandocMonad m => Text -> TWParser m B.Blocks definitionList prefix = tryMsg "definitionList" $ do - indent <- lookAhead $ string prefix *> many1 (string " ") <* string "$ " - elements <- many $ parseDefinitionListItem (prefix ++ concat indent) + indent <- lookAhead $ textStr prefix *> many1 (textStr " ") <* textStr "$ " + elements <- many $ parseDefinitionListItem (prefix <> T.concat indent) return $ B.definitionList elements where parseDefinitionListItem :: PandocMonad m - => String -> TWParser m (B.Inlines, [B.Blocks]) + => Text -> TWParser m (B.Inlines, [B.Blocks]) parseDefinitionListItem indent = do - string (indent ++ "$ ") >> skipSpaces + textStr (indent <> "$ ") >> skipSpaces term <- many1Till inline $ string ": " line <- listItemLine indent $ string "$ " return (mconcat term, [line]) -bulletList :: PandocMonad m => String -> TWParser m B.Blocks +bulletList :: PandocMonad m => Text -> TWParser m B.Blocks bulletList prefix = tryMsg "bulletList" $ parseList prefix (char '*') (char ' ') -orderedList :: PandocMonad m => String -> TWParser m B.Blocks +orderedList :: PandocMonad m => Text -> TWParser m B.Blocks orderedList prefix = tryMsg "orderedList" $ parseList prefix (oneOf "1iIaA") (string ". ") parseList :: PandocMonad m - => String -> TWParser m Char -> TWParser m a -> TWParser m B.Blocks + => Text -> TWParser m Char -> TWParser m a -> TWParser m B.Blocks parseList prefix marker delim = do - (indent, style) <- lookAhead $ string prefix *> listStyle <* delim - blocks <- many $ parseListItem (prefix ++ indent) (char style <* delim) + (indent, style) <- lookAhead $ textStr prefix *> listStyle <* delim + blocks <- many $ parseListItem (prefix <> indent) (char style <* delim) return $ case style of '1' -> B.orderedListWith (1, DefaultStyle, DefaultDelim) blocks 'i' -> B.orderedListWith (1, LowerRoman, DefaultDelim) blocks @@ -191,24 +198,24 @@ parseList prefix marker delim = do _ -> B.bulletList blocks where listStyle = do - indent <- many1 $ string " " + indent <- many1 $ textStr " " style <- marker - return (concat indent, style) + return (T.concat indent, style) parseListItem :: (PandocMonad m, Show a) - => String -> TWParser m a -> TWParser m B.Blocks -parseListItem prefix marker = string prefix >> marker >> listItemLine prefix marker + => Text -> TWParser m a -> TWParser m B.Blocks +parseListItem prefix marker = textStr prefix >> marker >> listItemLine prefix marker listItemLine :: (PandocMonad m, Show a) - => String -> TWParser m a -> TWParser m B.Blocks + => Text -> TWParser m a -> TWParser m B.Blocks listItemLine prefix marker = mconcat <$> (lineContent >>= parseContent) where lineContent = do content <- anyLine continuation <- optionMaybe listContinuation - return $ filterSpaces content ++ "\n" ++ maybe "" (" " ++) continuation - filterSpaces = reverse . dropWhile (== ' ') . reverse - listContinuation = notFollowedBy (string prefix >> marker) >> + return $ filterSpaces content <> "\n" <> maybe "" (" " <>) continuation + filterSpaces = T.dropWhileEnd (== ' ') + listContinuation = notFollowedBy (textStr prefix >> marker) >> string " " >> lineContent parseContent = parseFromString' $ many1 $ nestedList <|> parseInline parseInline = (B.plain . mconcat) <$> many1Till inline (lastNewline <|> newlineBeforeNestedList) @@ -352,29 +359,29 @@ macroWithParameters = try $ do char '%' return $ buildSpan name kvs $ B.str content -buildSpan :: String -> [(String, String)] -> B.Inlines -> B.Inlines +buildSpan :: Text -> [(Text, Text)] -> B.Inlines -> B.Inlines buildSpan className kvs = B.spanWith attrs where attrs = ("", ["twiki-macro", className] ++ additionalClasses, kvsWithoutClasses) - additionalClasses = maybe [] words $ lookup "class" kvs + additionalClasses = maybe [] T.words $ lookup "class" kvs kvsWithoutClasses = [(k,v) | (k,v) <- kvs, k /= "class"] -macroName :: PandocMonad m => TWParser m String +macroName :: PandocMonad m => TWParser m Text macroName = do first <- letter rest <- many $ alphaNum <|> char '_' - return (first:rest) + return $ T.pack $ first:rest -attributes :: PandocMonad m => TWParser m (String, [(String, String)]) -attributes = foldr (either mkContent mkKvs) ([], []) +attributes :: PandocMonad m => TWParser m (Text, [(Text, Text)]) +attributes = foldr (either mkContent mkKvs) ("", []) <$> (char '{' *> spnl *> many (attribute <* spnl) <* char '}') where spnl = skipMany (spaceChar <|> newline) - mkContent c ([], kvs) = (c, kvs) - mkContent c (rest, kvs) = (c ++ " " ++ rest, kvs) + mkContent c ("", kvs) = (c, kvs) + mkContent c (rest, kvs) = (c <> " " <> rest, kvs) mkKvs kv (cont, rest) = (cont, kv : rest) -attribute :: PandocMonad m => TWParser m (Either String (String, String)) +attribute :: PandocMonad m => TWParser m (Either Text (Text, Text)) attribute = withKey <|> withoutKey where withKey = try $ do @@ -383,10 +390,10 @@ attribute = withKey <|> withoutKey curry Right key <$> parseValue False withoutKey = try $ Left <$> parseValue True parseValue allowSpaces = fromEntities <$> (withQuotes <|> withoutQuotes allowSpaces) - withQuotes = between (char '"') (char '"') (\_ -> count 1 $ noneOf ['"']) + withQuotes = between (char '"') (char '"') (\_ -> countChar 1 $ noneOf ['"']) withoutQuotes allowSpaces - | allowSpaces = many1 $ noneOf "}" - | otherwise = many1 $ noneOf " }" + | allowSpaces = many1Char $ noneOf "}" + | otherwise = many1Char $ noneOf " }" nestedInlines :: (Show a, PandocMonad m) => TWParser m a -> TWParser m B.Inlines @@ -413,10 +420,10 @@ emphHtml :: PandocMonad m => TWParser m B.Inlines emphHtml = B.emph . mconcat <$> (parseHtmlContent "em" inline <|> parseHtmlContent "i" inline) nestedString :: (Show a, PandocMonad m) - => TWParser m a -> TWParser m String -nestedString end = innerSpace <|> count 1 nonspaceChar + => TWParser m a -> TWParser m Text +nestedString end = innerSpace <|> countChar 1 nonspaceChar where - innerSpace = try $ many1 spaceChar <* notFollowedBy end + innerSpace = try $ many1Char spaceChar <* notFollowedBy end boldCode :: PandocMonad m => TWParser m B.Inlines boldCode = try $ (B.strong . B.code . fromEntities) <$> enclosed (string "==") nestedString @@ -429,7 +436,7 @@ code = try $ (B.code . fromEntities) <$> enclosed (char '=') nestedString codeHtml :: PandocMonad m => TWParser m B.Inlines codeHtml = do - (attrs, content) <- parseHtmlContentWithAttrs "code" anyChar + (attrs, content) <- parseCharHtmlContentWithAttrs "code" anyChar return $ B.codeWith attrs $ fromEntities content autoLink :: PandocMonad m => TWParser m B.Inlines @@ -437,7 +444,7 @@ autoLink = try $ do state <- getState guard $ stateAllowLinks state (text, url) <- parseLink - guard $ checkLink (last url) + guard $ checkLink (T.last url) return $ makeLink (text, url) where parseLink = notFollowedBy nop >> (uri <|> emailAddress) @@ -447,17 +454,17 @@ autoLink = try $ do | otherwise = isAlphaNum c str :: PandocMonad m => TWParser m B.Inlines -str = B.str <$> (many1 alphaNum <|> count 1 characterReference) +str = B.str <$> (many1Char alphaNum <|> countChar 1 characterReference) nop :: PandocMonad m => TWParser m B.Inlines nop = try $ (void exclamation <|> void nopTag) >> followContent where exclamation = char '!' nopTag = stringAnyCase "<nop>" - followContent = B.str . fromEntities <$> many1 nonspaceChar + followContent = B.str . fromEntities <$> many1Char nonspaceChar symbol :: PandocMonad m => TWParser m B.Inlines -symbol = B.str <$> count 1 nonspaceChar +symbol = B.str <$> countChar 1 nonspaceChar smart :: PandocMonad m => TWParser m B.Inlines smart = do @@ -491,13 +498,13 @@ link = try $ do setState $ st{ stateAllowLinks = True } return $ B.link url title content -linkText :: PandocMonad m => TWParser m (String, String, B.Inlines) +linkText :: PandocMonad m => TWParser m (Text, Text, B.Inlines) linkText = do string "[[" - url <- many1Till anyChar (char ']') + url <- T.pack <$> many1Till anyChar (char ']') content <- option (B.str url) (mconcat <$> linkContent) char ']' return (url, "", content) where - linkContent = char '[' >> many1Till anyChar (char ']') >>= parseLinkContent + linkContent = char '[' >> many1Till anyChar (char ']') >>= parseLinkContent . T.pack parseLinkContent = parseFromString' $ many1 inline diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index a638fdf40..5e7aaf910 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -1,4 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.Textile Copyright : Copyright (C) 2010-2012 Paul Rivier @@ -38,7 +40,7 @@ import Prelude import Control.Monad (guard, liftM) import Control.Monad.Except (throwError) import Data.Char (digitToInt, isUpper) -import Data.List (intercalate, intersperse, transpose) +import Data.List (intersperse, transpose) import Data.Text (Text) import qualified Data.Text as T import Text.HTML.TagSoup (Tag (..), fromAttrib) @@ -52,7 +54,7 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag) import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline) -import Text.Pandoc.Shared (crFilter, trim, underlineSpan) +import Text.Pandoc.Shared (crFilter, trim, underlineSpan, tshow) -- | Parse a Textile text and return a Pandoc document. readTextile :: PandocMonad m @@ -61,21 +63,21 @@ readTextile :: PandocMonad m -> m Pandoc readTextile opts s = do parsed <- readWithM parseTextile def{ stateOptions = opts } - (T.unpack (crFilter s) ++ "\n\n") + (crFilter s <> "\n\n") case parsed of Right result -> return result Left e -> throwError e -- | Generate a Pandoc ADT from a textile document -parseTextile :: PandocMonad m => ParserT [Char] ParserState m Pandoc +parseTextile :: PandocMonad m => ParserT Text ParserState m Pandoc parseTextile = do many blankline startPos <- getPosition -- go through once just to get list of reference keys and notes -- docMinusKeys is the raw document with blanks where the keys/notes were... let firstPassParser = noteBlock <|> lineClump - manyTill firstPassParser eof >>= setInput . concat + manyTill firstPassParser eof >>= setInput . T.concat setPosition startPos st' <- getState let reversedNotes = stateNotes st' @@ -84,29 +86,29 @@ parseTextile = do blocks <- parseBlocks return $ Pandoc nullMeta (B.toList blocks) -- FIXME -noteMarker :: PandocMonad m => ParserT [Char] ParserState m [Char] -noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.') +noteMarker :: PandocMonad m => ParserT Text ParserState m Text +noteMarker = skipMany spaceChar >> string "fn" >> T.pack <$> manyTill digit (char '.') -noteBlock :: PandocMonad m => ParserT [Char] ParserState m [Char] +noteBlock :: PandocMonad m => ParserT Text ParserState m Text noteBlock = try $ do startPos <- getPosition ref <- noteMarker optional blankline - contents <- unlines <$> many1Till anyLine (blanklines <|> noteBlock) + contents <- T.unlines <$> many1Till anyLine (blanklines <|> noteBlock) endPos <- getPosition - let newnote = (ref, contents ++ "\n") + let newnote = (ref, contents <> "\n") st <- getState let oldnotes = stateNotes st updateState $ \s -> s { stateNotes = newnote : oldnotes } -- return blanks so line count isn't affected - return $ replicate (sourceLine endPos - sourceLine startPos) '\n' + return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n" -- | Parse document blocks -parseBlocks :: PandocMonad m => ParserT [Char] ParserState m Blocks +parseBlocks :: PandocMonad m => ParserT Text ParserState m Blocks parseBlocks = mconcat <$> manyTill block eof -- | Block parsers list tried in definition order -blockParsers :: PandocMonad m => [ParserT [Char] ParserState m Blocks] +blockParsers :: PandocMonad m => [ParserT Text ParserState m Blocks] blockParsers = [ codeBlock , header , blockQuote @@ -121,22 +123,22 @@ blockParsers = [ codeBlock ] -- | Any block in the order of definition of blockParsers -block :: PandocMonad m => ParserT [Char] ParserState m Blocks +block :: PandocMonad m => ParserT Text ParserState m Blocks block = do res <- choice blockParsers <?> "block" - trace (take 60 $ show $ B.toList res) + trace (T.take 60 $ tshow $ B.toList res) return res -commentBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks +commentBlock :: PandocMonad m => ParserT Text ParserState m Blocks commentBlock = try $ do string "###." manyTill anyLine blanklines return mempty -codeBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks +codeBlock :: PandocMonad m => ParserT Text ParserState m Blocks codeBlock = codeBlockBc <|> codeBlockPre -codeBlockBc :: PandocMonad m => ParserT [Char] ParserState m Blocks +codeBlockBc :: PandocMonad m => ParserT Text ParserState m Blocks codeBlockBc = try $ do string "bc." extended <- option False (True <$ char '.') @@ -150,31 +152,31 @@ codeBlockBc = try $ do rest <- many (notFollowedBy ender *> anyLine) return (f:rest) else manyTill anyLine blanklines - return $ B.codeBlock (trimTrailingNewlines (unlines contents)) + return $ B.codeBlock (trimTrailingNewlines (T.unlines contents)) -trimTrailingNewlines :: String -> String -trimTrailingNewlines = reverse . dropWhile (=='\n') . reverse +trimTrailingNewlines :: Text -> Text +trimTrailingNewlines = T.dropWhileEnd (=='\n') -- | Code Blocks in Textile are between <pre> and </pre> -codeBlockPre :: PandocMonad m => ParserT [Char] ParserState m Blocks +codeBlockPre :: PandocMonad m => ParserT Text ParserState m Blocks codeBlockPre = try $ do (t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True)) - result' <- manyTill anyChar (htmlTag (tagClose (=="pre"))) + result' <- T.pack <$> manyTill anyChar (htmlTag (tagClose (=="pre"))) -- drop leading newline if any - let result'' = case result' of - '\n':xs -> xs - _ -> result' + let result'' = case T.uncons result' of + Just ('\n', xs) -> xs + _ -> result' -- drop trailing newline if any - let result''' = case reverse result'' of - '\n':_ -> init result'' - _ -> result'' - let classes = words $ fromAttrib "class" t + let result''' = case T.unsnoc result'' of + Just (xs, '\n') -> xs + _ -> result'' + let classes = T.words $ fromAttrib "class" t let ident = fromAttrib "id" t let kvs = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] return $ B.codeBlockWith (ident,classes,kvs) result''' -- | Header of the form "hN. content" with N in 1..6 -header :: PandocMonad m => ParserT [Char] ParserState m Blocks +header :: PandocMonad m => ParserT Text ParserState m Blocks header = try $ do char 'h' level <- digitToInt <$> oneOf "123456" @@ -186,14 +188,14 @@ header = try $ do return $ B.headerWith attr' level name -- | Blockquote of the form "bq. content" -blockQuote :: PandocMonad m => ParserT [Char] ParserState m Blocks +blockQuote :: PandocMonad m => ParserT Text ParserState m Blocks blockQuote = try $ do string "bq" >> attributes >> char '.' >> whitespace B.blockQuote <$> para -- Horizontal rule -hrule :: PandocMonad m => ParserT [Char] st m Blocks +hrule :: PandocMonad m => ParserT Text st m Blocks hrule = try $ do skipSpaces start <- oneOf "-*" @@ -208,39 +210,39 @@ hrule = try $ do -- | Can be a bullet list or an ordered list. This implementation is -- strict in the nesting, sublist must start at exactly "parent depth -- plus one" -anyList :: PandocMonad m => ParserT [Char] ParserState m Blocks +anyList :: PandocMonad m => ParserT Text ParserState m Blocks anyList = try $ anyListAtDepth 1 <* blanklines -- | This allow one type of list to be nested into an other type, -- provided correct nesting -anyListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks +anyListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks anyListAtDepth depth = choice [ bulletListAtDepth depth, orderedListAtDepth depth, definitionList ] -- | Bullet List of given depth, depth being the number of leading '*' -bulletListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks +bulletListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks bulletListAtDepth depth = try $ B.bulletList <$> many1 (bulletListItemAtDepth depth) -- | Bullet List Item of given depth, depth being the number of -- leading '*' -bulletListItemAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks +bulletListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks bulletListItemAtDepth = genericListItemAtDepth '*' -- | Ordered List of given depth, depth being the number of -- leading '#' -orderedListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks +orderedListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks orderedListAtDepth depth = try $ do items <- many1 (orderedListItemAtDepth depth) return $ B.orderedList items -- | Ordered List Item of given depth, depth being the number of -- leading '#' -orderedListItemAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks +orderedListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks orderedListItemAtDepth = genericListItemAtDepth '#' -- | Common implementation of list items -genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT [Char] ParserState m Blocks +genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT Text ParserState m Blocks genericListItemAtDepth c depth = try $ do count depth (char c) >> attributes >> whitespace contents <- mconcat <$> many ((B.plain . mconcat <$> many1 inline) <|> @@ -250,25 +252,25 @@ genericListItemAtDepth c depth = try $ do return $ contents <> sublist -- | A definition list is a set of consecutive definition items -definitionList :: PandocMonad m => ParserT [Char] ParserState m Blocks +definitionList :: PandocMonad m => ParserT Text ParserState m Blocks definitionList = try $ B.definitionList <$> many1 definitionListItem -- | List start character. -listStart :: PandocMonad m => ParserT [Char] ParserState m () +listStart :: PandocMonad m => ParserT Text ParserState m () listStart = genericListStart '*' <|> () <$ genericListStart '#' <|> () <$ definitionListStart -genericListStart :: PandocMonad m => Char -> ParserT [Char] st m () +genericListStart :: PandocMonad m => Char -> ParserT Text st m () genericListStart c = () <$ try (many1 (char c) >> whitespace) -basicDLStart :: PandocMonad m => ParserT [Char] ParserState m () +basicDLStart :: PandocMonad m => ParserT Text ParserState m () basicDLStart = do char '-' whitespace notFollowedBy newline -definitionListStart :: PandocMonad m => ParserT [Char] ParserState m Inlines +definitionListStart :: PandocMonad m => ParserT Text ParserState m Inlines definitionListStart = try $ do basicDLStart trimInlines . mconcat <$> @@ -281,26 +283,26 @@ definitionListStart = try $ do -- the term defined, then spaces and ":=". The definition follows, on -- the same single line, or spaned on multiple line, after a line -- break. -definitionListItem :: PandocMonad m => ParserT [Char] ParserState m (Inlines, [Blocks]) +definitionListItem :: PandocMonad m => ParserT Text ParserState m (Inlines, [Blocks]) definitionListItem = try $ do term <- (mconcat . intersperse B.linebreak) <$> many1 definitionListStart def' <- string ":=" *> optional whitespace *> (multilineDef <|> inlineDef) return (term, def') - where inlineDef :: PandocMonad m => ParserT [Char] ParserState m [Blocks] + where inlineDef :: PandocMonad m => ParserT Text ParserState m [Blocks] inlineDef = liftM (\d -> [B.plain d]) $ optional whitespace >> (trimInlines . mconcat <$> many inline) <* newline - multilineDef :: PandocMonad m => ParserT [Char] ParserState m [Blocks] + multilineDef :: PandocMonad m => ParserT Text ParserState m [Blocks] multilineDef = try $ do optional whitespace >> newline - s <- many1Till anyChar (try (string "=:" >> newline)) - -- this ++ "\n\n" does not look very good - ds <- parseFromString' parseBlocks (s ++ "\n\n") + s <- T.pack <$> many1Till anyChar (try (string "=:" >> newline)) + -- this <> "\n\n" does not look very good + ds <- parseFromString' parseBlocks (s <> "\n\n") return [ds] -- raw content -- | A raw Html Block, optionally followed by blanklines -rawHtmlBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks +rawHtmlBlock :: PandocMonad m => ParserT Text ParserState m Blocks rawHtmlBlock = try $ do skipMany spaceChar (_,b) <- htmlTag isBlockTag @@ -308,14 +310,14 @@ rawHtmlBlock = try $ do return $ B.rawBlock "html" b -- | Raw block of LaTeX content -rawLaTeXBlock' :: PandocMonad m => ParserT [Char] ParserState m Blocks +rawLaTeXBlock' :: PandocMonad m => ParserT Text ParserState m Blocks rawLaTeXBlock' = do guardEnabled Ext_raw_tex B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces) -- | In textile, paragraphs are separated by blank lines. -para :: PandocMonad m => ParserT [Char] ParserState m Blocks +para :: PandocMonad m => ParserT Text ParserState m Blocks para = B.para . trimInlines . mconcat <$> many1 inline -- Tables @@ -326,7 +328,7 @@ toAlignment '>' = AlignRight toAlignment '=' = AlignCenter toAlignment _ = AlignDefault -cellAttributes :: PandocMonad m => ParserT [Char] ParserState m (Bool, Alignment) +cellAttributes :: PandocMonad m => ParserT Text ParserState m (Bool, Alignment) cellAttributes = try $ do isHeader <- option False (True <$ char '_') -- we just ignore colspan and rowspan markers: @@ -339,18 +341,18 @@ cellAttributes = try $ do return (isHeader, alignment) -- | A table cell spans until a pipe | -tableCell :: PandocMonad m => ParserT [Char] ParserState m ((Bool, Alignment), Blocks) +tableCell :: PandocMonad m => ParserT Text ParserState m ((Bool, Alignment), Blocks) tableCell = try $ do char '|' (isHeader, alignment) <- option (False, AlignDefault) cellAttributes notFollowedBy blankline - raw <- trim <$> + raw <- trim . T.pack <$> many (noneOf "|\n" <|> try (char '\n' <* notFollowedBy blankline)) content <- mconcat <$> parseFromString' (many inline) raw return ((isHeader, alignment), B.plain content) -- | A table row is made of many table cells -tableRow :: PandocMonad m => ParserT [Char] ParserState m [((Bool, Alignment), Blocks)] +tableRow :: PandocMonad m => ParserT Text ParserState m [((Bool, Alignment), Blocks)] tableRow = try $ do -- skip optional row attributes optional $ try $ do @@ -360,7 +362,7 @@ tableRow = try $ do many1 tableCell <* char '|' <* blankline -- | A table with an optional header. -table :: PandocMonad m => ParserT [Char] ParserState m Blocks +table :: PandocMonad m => ParserT Text ParserState m Blocks table = try $ do -- ignore table attributes caption <- option mempty $ try $ do @@ -384,7 +386,7 @@ table = try $ do (map (map snd) rows) -- | Ignore markers for cols, thead, tfoot. -ignorableRow :: PandocMonad m => ParserT [Char] ParserState m () +ignorableRow :: PandocMonad m => ParserT Text ParserState m () ignorableRow = try $ do char '|' oneOf ":^-~" @@ -393,9 +395,9 @@ ignorableRow = try $ do _ <- anyLine return () -explicitBlockStart :: PandocMonad m => String -> ParserT [Char] ParserState m () +explicitBlockStart :: PandocMonad m => Text -> ParserT Text ParserState m () explicitBlockStart name = try $ do - string name + string (T.unpack name) attributes char '.' optional whitespace @@ -404,9 +406,9 @@ explicitBlockStart name = try $ do -- | Blocks like 'p' and 'table' do not need explicit block tag. -- However, they can be used to set HTML/CSS attributes when needed. maybeExplicitBlock :: PandocMonad m - => String -- ^ block tag name - -> ParserT [Char] ParserState m Blocks -- ^ implicit block - -> ParserT [Char] ParserState m Blocks + => Text -- ^ block tag name + -> ParserT Text ParserState m Blocks -- ^ implicit block + -> ParserT Text ParserState m Blocks maybeExplicitBlock name blk = try $ do optional $ explicitBlockStart name blk @@ -419,11 +421,11 @@ maybeExplicitBlock name blk = try $ do -- | Any inline element -inline :: PandocMonad m => ParserT [Char] ParserState m Inlines +inline :: PandocMonad m => ParserT Text ParserState m Inlines inline = choice inlineParsers <?> "inline" -- | Inline parsers tried in order -inlineParsers :: PandocMonad m => [ParserT [Char] ParserState m Inlines] +inlineParsers :: PandocMonad m => [ParserT Text ParserState m Inlines] inlineParsers = [ str , whitespace , endline @@ -437,13 +439,13 @@ inlineParsers = [ str , link , image , mark - , (B.str . (:[])) <$> characterReference + , (B.str . T.singleton) <$> characterReference , smartPunctuation inline , symbol ] -- | Inline markups -inlineMarkup :: PandocMonad m => ParserT [Char] ParserState m Inlines +inlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines inlineMarkup = choice [ simpleInline (string "??") (B.cite []) , simpleInline (string "**") B.strong , simpleInline (string "__") B.emph @@ -457,33 +459,33 @@ inlineMarkup = choice [ simpleInline (string "??") (B.cite []) ] -- | Trademark, registered, copyright -mark :: PandocMonad m => ParserT [Char] st m Inlines +mark :: PandocMonad m => ParserT Text st m Inlines mark = try $ char '(' >> (try tm <|> try reg <|> copy) -reg :: PandocMonad m => ParserT [Char] st m Inlines +reg :: PandocMonad m => ParserT Text st m Inlines reg = do oneOf "Rr" char ')' return $ B.str "\174" -tm :: PandocMonad m => ParserT [Char] st m Inlines +tm :: PandocMonad m => ParserT Text st m Inlines tm = do oneOf "Tt" oneOf "Mm" char ')' return $ B.str "\8482" -copy :: PandocMonad m => ParserT [Char] st m Inlines +copy :: PandocMonad m => ParserT Text st m Inlines copy = do oneOf "Cc" char ')' return $ B.str "\169" -note :: PandocMonad m => ParserT [Char] ParserState m Inlines +note :: PandocMonad m => ParserT Text ParserState m Inlines note = try $ do ref <- char '[' *> many1 digit <* char ']' notes <- stateNotes <$> getState - case lookup ref notes of + case lookup (T.pack ref) notes of Nothing -> Prelude.fail "note not found" Just raw -> B.note <$> parseFromString' parseBlocks raw @@ -500,42 +502,42 @@ stringBreakers :: [Char] stringBreakers = " \t\n\r.,\"'?!;:<>«»„“”‚‘’()[]" wordBoundaries :: [Char] -wordBoundaries = markupChars ++ stringBreakers +wordBoundaries = markupChars <> stringBreakers -- | Parse a hyphened sequence of words -hyphenedWords :: PandocMonad m => ParserT [Char] ParserState m String +hyphenedWords :: PandocMonad m => ParserT Text ParserState m Text hyphenedWords = do x <- wordChunk xs <- many (try $ char '-' >> wordChunk) - return $ intercalate "-" (x:xs) + return $ T.intercalate "-" (x:xs) -wordChunk :: PandocMonad m => ParserT [Char] ParserState m String +wordChunk :: PandocMonad m => ParserT Text ParserState m Text wordChunk = try $ do hd <- noneOf wordBoundaries tl <- many ( noneOf wordBoundaries <|> try (notFollowedBy' note *> oneOf markupChars <* lookAhead (noneOf wordBoundaries) ) ) - return $ hd:tl + return $ T.pack $ hd:tl -- | Any string -str :: PandocMonad m => ParserT [Char] ParserState m Inlines +str :: PandocMonad m => ParserT Text ParserState m Inlines str = do baseStr <- hyphenedWords -- RedCloth compliance : if parsed word is uppercase and immediately -- followed by parens, parens content is unconditionally word acronym fullStr <- option baseStr $ try $ do - guard $ all isUpper baseStr - acro <- enclosed (char '(') (char ')') anyChar' - return $ concat [baseStr, " (", acro, ")"] + guard $ T.all isUpper baseStr + acro <- T.pack <$> enclosed (char '(') (char ')') anyChar' + return $ T.concat [baseStr, " (", acro, ")"] updateLastStrPos return $ B.str fullStr -- | Some number of space chars -whitespace :: PandocMonad m => ParserT [Char] st m Inlines +whitespace :: PandocMonad m => ParserT Text st m Inlines whitespace = many1 spaceChar >> return B.space <?> "whitespace" -- | In Textile, an isolated endline character is a line break -endline :: PandocMonad m => ParserT [Char] ParserState m Inlines +endline :: PandocMonad m => ParserT Text ParserState m Inlines endline = try $ do newline notFollowedBy blankline @@ -543,18 +545,18 @@ endline = try $ do notFollowedBy rawHtmlBlock return B.linebreak -rawHtmlInline :: PandocMonad m => ParserT [Char] ParserState m Inlines +rawHtmlInline :: PandocMonad m => ParserT Text ParserState m Inlines rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag -- | Raw LaTeX Inline -rawLaTeXInline' :: PandocMonad m => ParserT [Char] ParserState m Inlines +rawLaTeXInline' :: PandocMonad m => ParserT Text ParserState m Inlines rawLaTeXInline' = try $ do guardEnabled Ext_raw_tex B.rawInline "latex" <$> rawLaTeXInline -- | Textile standard link syntax is "label":target. But we -- can also have ["label":target]. -link :: PandocMonad m => ParserT [Char] ParserState m Inlines +link :: PandocMonad m => ParserT Text ParserState m Inlines link = try $ do bracketed <- (True <$ char '[') <|> return False char '"' *> notFollowedBy (oneOf " \t\n\r") @@ -567,121 +569,122 @@ link = try $ do else lookAhead $ space <|> eof' <|> try (oneOf "!.,;:" *> (space <|> newline <|> eof')) - url <- many1Till nonspaceChar stop + url <- T.pack <$> many1Till nonspaceChar stop let name' = if B.toList name == [Str "$"] then B.str url else name return $ if attr == nullAttr then B.link url "" name' else B.spanWith attr $ B.link url "" name' -- | image embedding -image :: PandocMonad m => ParserT [Char] ParserState m Inlines +image :: PandocMonad m => ParserT Text ParserState m Inlines image = try $ do char '!' >> notFollowedBy space (ident, cls, kvs) <- attributes let attr = case lookup "style" kvs of Just stls -> (ident, cls, pickStylesToKVs ["width", "height"] stls) Nothing -> (ident, cls, kvs) - src <- many1 (noneOf " \t\n\r!(") - alt <- option "" $ try $ char '(' *> manyTill anyChar (char ')') + src <- T.pack <$> many1 (noneOf " \t\n\r!(") + alt <- fmap T.pack $ option "" $ try $ char '(' *> manyTill anyChar (char ')') char '!' return $ B.imageWith attr src alt (B.str alt) -escapedInline :: PandocMonad m => ParserT [Char] ParserState m Inlines +escapedInline :: PandocMonad m => ParserT Text ParserState m Inlines escapedInline = escapedEqs <|> escapedTag -escapedEqs :: PandocMonad m => ParserT [Char] ParserState m Inlines -escapedEqs = B.str <$> +escapedEqs :: PandocMonad m => ParserT Text ParserState m Inlines +escapedEqs = B.str . T.pack <$> try (string "==" *> manyTill anyChar' (try $ string "==")) -- | literal text escaped btw <notextile> tags -escapedTag :: PandocMonad m => ParserT [Char] ParserState m Inlines -escapedTag = B.str <$> +escapedTag :: PandocMonad m => ParserT Text ParserState m Inlines +escapedTag = B.str . T.pack <$> try (string "<notextile>" *> manyTill anyChar' (try $ string "</notextile>")) -- | Any special symbol defined in wordBoundaries -symbol :: PandocMonad m => ParserT [Char] ParserState m Inlines -symbol = B.str . singleton <$> (notFollowedBy newline *> - notFollowedBy rawHtmlBlock *> - oneOf wordBoundaries) +symbol :: PandocMonad m => ParserT Text ParserState m Inlines +symbol = B.str . T.singleton <$> (notFollowedBy newline *> + notFollowedBy rawHtmlBlock *> + oneOf wordBoundaries) -- | Inline code -code :: PandocMonad m => ParserT [Char] ParserState m Inlines +code :: PandocMonad m => ParserT Text ParserState m Inlines code = code1 <|> code2 -- any character except a newline before a blank line -anyChar' :: PandocMonad m => ParserT [Char] ParserState m Char +anyChar' :: PandocMonad m => ParserT Text ParserState m Char anyChar' = satisfy (/='\n') <|> try (char '\n' <* notFollowedBy blankline) -code1 :: PandocMonad m => ParserT [Char] ParserState m Inlines -code1 = B.code <$> surrounded (char '@') anyChar' +code1 :: PandocMonad m => ParserT Text ParserState m Inlines +code1 = B.code . T.pack <$> surrounded (char '@') anyChar' -code2 :: PandocMonad m => ParserT [Char] ParserState m Inlines +code2 :: PandocMonad m => ParserT Text ParserState m Inlines code2 = do htmlTag (tagOpen (=="tt") null) - B.code <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt")) + B.code . T.pack <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt")) -- | Html / CSS attributes -attributes :: PandocMonad m => ParserT [Char] ParserState m Attr +attributes :: PandocMonad m => ParserT Text ParserState m Attr attributes = foldl (flip ($)) ("",[],[]) <$> try (do special <- option id specialAttribute attrs <- many attribute return (special : attrs)) -specialAttribute :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) +specialAttribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) specialAttribute = do alignStr <- ("center" <$ char '=') <|> ("justify" <$ try (string "<>")) <|> ("right" <$ char '>') <|> ("left" <$ char '<') notFollowedBy spaceChar - return $ addStyle ("text-align:" ++ alignStr) + return $ addStyle $ T.pack $ "text-align:" ++ alignStr -attribute :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) +attribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) attribute = try $ (classIdAttr <|> styleAttr <|> langAttr) <* notFollowedBy spaceChar -classIdAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) +classIdAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) classIdAttr = try $ do -- (class class #id) char '(' - ws <- words `fmap` manyTill anyChar' (char ')') + ws <- T.words `fmap` T.pack <$> manyTill anyChar' (char ')') case reverse ws of - [] -> return $ \(_,_,keyvals) -> ("",[],keyvals) - (('#':ident'):classes') -> return $ \(_,_,keyvals) -> - (ident',classes',keyvals) - classes' -> return $ \(_,_,keyvals) -> - ("",classes',keyvals) - -styleAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) + [] + -> return $ \(_,_,keyvals) -> ("",[],keyvals) + ((T.uncons -> Just ('#', ident')):classes') + -> return $ \(_,_,keyvals) -> (ident',classes',keyvals) + classes' + -> return $ \(_,_,keyvals) -> ("",classes',keyvals) + +styleAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) styleAttr = do style <- try $ enclosed (char '{') (char '}') anyChar' - return $ addStyle style + return $ addStyle $ T.pack style -addStyle :: String -> Attr -> Attr +addStyle :: Text -> Attr -> Attr addStyle style (id',classes,keyvals) = (id',classes,keyvals') where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"] - style' = style ++ ";" ++ concat [v | ("style",v) <- keyvals] + style' = style <> ";" <> T.concat [v | ("style",v) <- keyvals] -langAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) +langAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) langAttr = do lang <- try $ enclosed (char '[') (char ']') alphaNum - return $ \(id',classes,keyvals) -> (id',classes,("lang",lang):keyvals) + return $ \(id',classes,keyvals) -> (id',classes,("lang",T.pack lang):keyvals) -- | Parses material surrounded by a parser. surrounded :: (PandocMonad m, Show t) - => ParserT [Char] st m t -- ^ surrounding parser - -> ParserT [Char] st m a -- ^ content parser (to be used repeatedly) - -> ParserT [Char] st m [a] + => ParserT Text st m t -- ^ surrounding parser + -> ParserT Text st m a -- ^ content parser (to be used repeatedly) + -> ParserT Text st m [a] surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border) simpleInline :: PandocMonad m - => ParserT [Char] ParserState m t -- ^ surrounding parser + => ParserT Text ParserState m t -- ^ surrounding parser -> (Inlines -> Inlines) -- ^ Inline constructor - -> ParserT [Char] ParserState m Inlines -- ^ content parser (to be used repeatedly) + -> ParserT Text ParserState m Inlines -- ^ content parser (to be used repeatedly) simpleInline border construct = try $ do notAfterString border *> notFollowedBy (oneOf " \t\n\r") @@ -695,7 +698,7 @@ simpleInline border construct = try $ do then body else B.spanWith attr body -groupedInlineMarkup :: PandocMonad m => ParserT [Char] ParserState m Inlines +groupedInlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines groupedInlineMarkup = try $ do char '[' sp1 <- option mempty $ B.space <$ whitespace @@ -704,9 +707,5 @@ groupedInlineMarkup = try $ do char ']' return $ sp1 <> result <> sp2 --- | Create a singleton list -singleton :: a -> [a] -singleton x = [x] - -eof' :: Monad m => ParserT [Char] s m Char +eof' :: Monad m => ParserT Text s m Char eof' = '\n' <$ eof diff --git a/src/Text/Pandoc/Readers/TikiWiki.hs b/src/Text/Pandoc/Readers/TikiWiki.hs index 5daf6b0bb..501c204f5 100644 --- a/src/Text/Pandoc/Readers/TikiWiki.hs +++ b/src/Text/Pandoc/Readers/TikiWiki.hs @@ -43,19 +43,19 @@ readTikiWiki :: PandocMonad m -> m Pandoc readTikiWiki opts s = do res <- readWithM parseTikiWiki def{ stateOptions = opts } - (T.unpack (crFilter s) ++ "\n\n") + (crFilter s <> "\n\n") case res of Left e -> throwError e Right d -> return d -type TikiWikiParser = ParserT [Char] ParserState +type TikiWikiParser = ParserT Text ParserState -- -- utility functions -- -tryMsg :: String -> TikiWikiParser m a -> TikiWikiParser m a -tryMsg msg p = try p <?> msg +tryMsg :: Text -> TikiWikiParser m a -> TikiWikiParser m a +tryMsg msg p = try p <?> (T.unpack msg) skip :: TikiWikiParser m a -> TikiWikiParser m () skip parser = Control.Monad.void parser @@ -89,7 +89,7 @@ block = do <|> para skipMany blankline when (verbosity >= INFO) $ - trace (printf "line %d: %s" (sourceLine pos) (take 60 $ show $ B.toList res)) + trace (T.pack $ printf "line %d: %s" (sourceLine pos) (take 60 $ show $ B.toList res)) return res blockElements :: PandocMonad m => TikiWikiParser m B.Blocks @@ -133,7 +133,7 @@ tableRow :: PandocMonad m => TikiWikiParser m [B.Blocks] tableRow = try $ do -- row <- sepBy1 (many1Till inline $ oneOf "\n|") (try $ string "|" <* notFollowedBy (oneOf "|\n")) -- return $ map (B.plain . mconcat) row - row <- sepBy1 (many1 (noneOf "\n|") >>= parseColumn) (try $ string "|" <* notFollowedBy (oneOf "|\n")) + row <- sepBy1 (many1 (noneOf "\n|") >>= parseColumn . T.pack) (try $ string "|" <* notFollowedBy (oneOf "|\n")) return $ map B.plain row where parseColumn x = do @@ -342,15 +342,15 @@ listItemLine nest = lineContent >>= parseContent lineContent = do content <- anyLine continuation <- optionMaybe listContinuation - return $ filterSpaces content ++ "\n" ++ Data.Maybe.fromMaybe "" continuation - filterSpaces = reverse . dropWhile (== ' ') . reverse + return $ filterSpaces content <> "\n" <> Data.Maybe.fromMaybe "" continuation + filterSpaces = T.dropWhileEnd (== ' ') listContinuation = string (replicate nest '+') >> lineContent parseContent x = do parsed <- parseFromString (many1 inline) x return $ mconcat $ dropWhileEnd (== B.space) parsed -- Turn the CODE macro attributes into Pandoc code block attributes. -mungeAttrs :: [(String, String)] -> (String, [String], [(String, String)]) +mungeAttrs :: [(Text, Text)] -> (Text, [Text], [(Text, Text)]) mungeAttrs rawAttrs = ("", classes, rawAttrs) where -- "colors" is TikiWiki CODE macro for "name of language to do @@ -370,7 +370,7 @@ codeMacro = try $ do string "{CODE(" rawAttrs <- macroAttrs string ")}" - body <- manyTill anyChar (try (string "{CODE}")) + body <- T.pack <$> manyTill anyChar (try (string "{CODE}")) newline if not (null rawAttrs) then @@ -428,9 +428,9 @@ nbsp = try $ do htmlComment :: PandocMonad m => TikiWikiParser m B.Inlines htmlComment = try $ do string "~hc~" - inner <- many1 $ noneOf "~" + inner <- fmap T.pack $ many1 $ noneOf "~" string "~/hc~" - return $ B.str $ " NOT SUPPORTED: ~hc~ (html comment opener) BEGIN: " ++ inner ++ " ~/hc~ :END " + return $ B.str $ " NOT SUPPORTED: ~hc~ (html comment opener) BEGIN: " <> inner <> " ~/hc~ :END " linebreak :: PandocMonad m => TikiWikiParser m B.Inlines linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline) @@ -469,15 +469,15 @@ image = try $ do let title = fromMaybe src $ lookup "desc" rawAttrs let alt = fromMaybe title $ lookup "alt" rawAttrs let classes = map fst $ filter (\(_,b) -> b == "" || b == "y") rawAttrs - if not (null src) + if not (T.null src) then return $ B.imageWith ("", classes, rawAttrs) src title (B.str alt) else - return $ B.str $ " NOT SUPPORTED: image without src attribute BEGIN: {img " ++ printAttrs rawAttrs ++ "} :END " + return $ B.str $ " NOT SUPPORTED: image without src attribute BEGIN: {img " <> printAttrs rawAttrs <> "} :END " where - printAttrs attrs = unwords $ map (\(a, b) -> a ++ "=\"" ++ b ++ "\"") attrs + printAttrs attrs = T.unwords $ map (\(a, b) -> a <> "=\"" <> b <> "\"") attrs -imageAttr :: PandocMonad m => TikiWikiParser m (String, String) +imageAttr :: PandocMonad m => TikiWikiParser m (Text, Text) imageAttr = try $ do key <- many1 (noneOf "=} \t\n") char '=' @@ -485,7 +485,7 @@ imageAttr = try $ do value <- many1 (noneOf "}\"\n") optional $ char '"' optional $ char ',' - return (key, value) + return (T.pack key, T.pack value) -- __strong__ @@ -500,57 +500,57 @@ emph = try $ fmap B.emph (enclosed (string "''") nestedInlines) escapedChar :: PandocMonad m => TikiWikiParser m B.Inlines escapedChar = try $ do string "~" - mNumber <- safeRead <$> many1 digit + mNumber <- safeRead . T.pack <$> many1 digit string "~" return $ B.str $ case mNumber of - Just number -> [toEnum (number :: Int) :: Char] - Nothing -> [] + Just number -> T.singleton $ toEnum (number :: Int) + Nothing -> "" -- UNSUPPORTED, as there doesn't seem to be any facility in calibre -- for this centered :: PandocMonad m => TikiWikiParser m B.Inlines centered = try $ do string "::" - inner <- many1 $ noneOf ":\n" + inner <- fmap T.pack $ many1 $ noneOf ":\n" string "::" - return $ B.str $ " NOT SUPPORTED: :: (centered) BEGIN: ::" ++ inner ++ ":: :END " + return $ B.str $ " NOT SUPPORTED: :: (centered) BEGIN: ::" <> inner <> ":: :END " -- UNSUPPORTED, as there doesn't seem to be any facility in calibre -- for this colored :: PandocMonad m => TikiWikiParser m B.Inlines colored = try $ do string "~~" - inner <- many1 $ noneOf "~\n" + inner <- fmap T.pack $ many1 $ noneOf "~\n" string "~~" - return $ B.str $ " NOT SUPPORTED: ~~ (colored) BEGIN: ~~" ++ inner ++ "~~ :END " + return $ B.str $ " NOT SUPPORTED: ~~ (colored) BEGIN: ~~" <> inner <> "~~ :END " -- UNSUPPORTED, as there doesn't seem to be any facility in calibre -- for this underlined :: PandocMonad m => TikiWikiParser m B.Inlines underlined = try $ do string "===" - inner <- many1 $ noneOf "=\n" + inner <- fmap T.pack $ many1 $ noneOf "=\n" string "===" - return $ B.str $ " NOT SUPPORTED: ==== (underlined) BEGIN: ===" ++ inner ++ "=== :END " + return $ B.str $ " NOT SUPPORTED: ==== (underlined) BEGIN: ===" <> inner <> "=== :END " -- UNSUPPORTED, as there doesn't seem to be any facility in calibre -- for this boxed :: PandocMonad m => TikiWikiParser m B.Inlines boxed = try $ do string "^" - inner <- many1 $ noneOf "^\n" + inner <- fmap T.pack $ many1 $ noneOf "^\n" string "^" - return $ B.str $ " NOT SUPPORTED: ^ (boxed) BEGIN: ^" ++ inner ++ "^ :END " + return $ B.str $ " NOT SUPPORTED: ^ (boxed) BEGIN: ^" <> inner <> "^ :END " -- --text-- strikeout :: PandocMonad m => TikiWikiParser m B.Inlines strikeout = try $ fmap B.strikeout (enclosed (string "--") nestedInlines) -nestedString :: (Show a, PandocMonad m) => TikiWikiParser m a -> TikiWikiParser m String -nestedString end = innerSpace <|> count 1 nonspaceChar +nestedString :: (Show a, PandocMonad m) => TikiWikiParser m a -> TikiWikiParser m Text +nestedString end = innerSpace <|> countChar 1 nonspaceChar where - innerSpace = try $ many1 spaceChar <* notFollowedBy end + innerSpace = try $ T.pack <$> many1 spaceChar <* notFollowedBy end breakChars :: PandocMonad m => TikiWikiParser m B.Inlines breakChars = try $ string "%%%" >> return B.linebreak @@ -564,7 +564,7 @@ superMacro = try $ do string "{SUP(" manyTill anyChar (string ")}") body <- manyTill anyChar (string "{SUP}") - return $ B.superscript $ B.text body + return $ B.superscript $ B.text $ T.pack body -- subscript: baz{TAG(tag=>sub)}sub{TAG}qux / qux{SUB()}sub2{SUB}qux subTag :: PandocMonad m => TikiWikiParser m B.Inlines @@ -575,22 +575,22 @@ subMacro = try $ do string "{SUB(" manyTill anyChar (string ")}") body <- manyTill anyChar (string "{SUB}") - return $ B.subscript $ B.text body + return $ B.subscript $ B.text $ T.pack body -- -+text+- code :: PandocMonad m => TikiWikiParser m B.Inlines code = try $ fmap (B.code . fromEntities) ( between (string "-+") (string "+-") nestedString) -macroAttr :: PandocMonad m => TikiWikiParser m (String, String) +macroAttr :: PandocMonad m => TikiWikiParser m (Text, Text) macroAttr = try $ do key <- many1 (noneOf "=)") char '=' optional $ char '"' value <- many1 (noneOf " )\"") optional $ char '"' - return (key, value) + return (T.pack key, T.pack value) -macroAttrs :: PandocMonad m => TikiWikiParser m [(String, String)] +macroAttrs :: PandocMonad m => TikiWikiParser m [(Text, Text)] macroAttrs = try $ sepEndBy macroAttr spaces -- ~np~ __not bold__ ~/np~ @@ -598,13 +598,13 @@ noparse :: PandocMonad m => TikiWikiParser m B.Inlines noparse = try $ do string "~np~" body <- manyTill anyChar (string "~/np~") - return $ B.str body + return $ B.str $ T.pack body str :: PandocMonad m => TikiWikiParser m B.Inlines -str = fmap B.str (many1 alphaNum <|> count 1 characterReference) +str = fmap B.str (T.pack <$> many1 alphaNum <|> countChar 1 characterReference) symbol :: PandocMonad m => TikiWikiParser m B.Inlines -symbol = fmap B.str (count 1 nonspaceChar) +symbol = fmap B.str (countChar 1 nonspaceChar) -- [[not a link] notExternalLink :: PandocMonad m => TikiWikiParser m B.Inlines @@ -612,14 +612,14 @@ notExternalLink = try $ do start <- string "[[" body <- many (noneOf "\n[]") end <- string "]" - return $ B.text (start ++ body ++ end) + return $ B.text $ T.pack $ start ++ body ++ end -- [http://www.somesite.org url|Some Site title] -- ((internal link)) -- -- The ((...)) wiki links and [...] external links are handled -- exactly the same; this abstracts that out -makeLink :: PandocMonad m => String -> String -> String -> TikiWikiParser m B.Inlines +makeLink :: PandocMonad m => Text -> Text -> Text -> TikiWikiParser m B.Inlines makeLink start middle end = try $ do st <- getState guard $ stateAllowLinks st @@ -627,15 +627,15 @@ makeLink start middle end = try $ do (url, title, anchor) <- wikiLinkText start middle end parsedTitle <- parseFromString (many1 inline) title setState $ st{ stateAllowLinks = True } - return $ B.link (url++anchor) "" $mconcat parsedTitle + return $ B.link (url <> anchor) "" $ mconcat parsedTitle -wikiLinkText :: PandocMonad m => String -> String -> String -> TikiWikiParser m (String, String, String) +wikiLinkText :: PandocMonad m => Text -> Text -> Text -> TikiWikiParser m (Text, Text, Text) wikiLinkText start middle end = do - string start - url <- many1 (noneOf $ middle ++ "\n") + string (T.unpack start) + url <- T.pack <$> many1 (noneOf $ T.unpack middle ++ "\n") seg1 <- option url linkContent seg2 <- option "" linkContent - string end + string (T.unpack end) if seg2 /= "" then return (url, seg2, seg1) @@ -644,7 +644,7 @@ wikiLinkText start middle end = do where linkContent = do char '|' - many (noneOf middle) + T.pack <$> many (noneOf $ T.unpack middle) externalLink :: PandocMonad m => TikiWikiParser m B.Inlines externalLink = makeLink "[" "]|" "]" diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index 0af52e046..996a818fd 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Txt2Tags Copyright : Copyright (C) 2014 Matthew Pickering @@ -18,7 +19,6 @@ import Prelude import Control.Monad (guard, void, when) import Control.Monad.Except (catchError, throwError) import Control.Monad.Reader (Reader, asks, runReader) -import Data.Char (toLower) import Data.Default import Data.List (intercalate, transpose) import Data.Maybe (fromMaybe) @@ -36,13 +36,13 @@ import Text.Pandoc.Parsing hiding (space, spaces, uri) import Text.Pandoc.Shared (compactify, compactifyDL, crFilter, escapeURI, underlineSpan) -type T2T = ParserT String ParserState (Reader T2TMeta) +type T2T = ParserT Text ParserState (Reader T2TMeta) -- | An object for the T2T macros meta information -- the contents of each field is simply substituted verbatim into the file data T2TMeta = T2TMeta { - date :: String -- ^ Current date - , mtime :: String -- ^ Last modification time of infile + date :: Text -- ^ Current date + , mtime :: Text -- ^ Last modification time of infile , infile :: FilePath -- ^ Input file , outfile :: FilePath -- ^ Output file } deriving Show @@ -63,7 +63,7 @@ getT2TMeta = do _ -> catchError (maximum <$> mapM getModTime inps) (const (return "")) - return $ T2TMeta curDate curMtime (intercalate ", " inps) outp + return $ T2TMeta (T.pack curDate) (T.pack curMtime) (intercalate ", " inps) outp -- | Read Txt2Tags from an input string returning a Pandoc document readTxt2Tags :: PandocMonad m @@ -74,14 +74,14 @@ readTxt2Tags opts s = do meta <- getT2TMeta let parsed = flip runReader meta $ readWithM parseT2T (def {stateOptions = opts}) $ - T.unpack (crFilter s) ++ "\n\n" + crFilter s <> "\n\n" case parsed of Right result -> return result Left e -> throwError e -- | Read Txt2Tags (ignoring all macros) from an input string returning -- a Pandoc document --- readTxt2TagsNoMacros :: PandocMonad m => ReaderOptions -> String -> m Pandoc +-- readTxt2TagsNoMacros :: PandocMonad m => ReaderOptions -> Text -> m Pandoc -- readTxt2TagsNoMacros = readTxt2Tags parseT2T :: T2T Pandoc @@ -106,7 +106,7 @@ parseHeader = do header :: T2T () header = titleline >> authorline >> dateline -headerline :: B.ToMetaValue a => String -> T2T a -> T2T () +headerline :: B.ToMetaValue a => Text -> T2T a -> T2T () headerline field p = (() <$ try blankline) <|> (p >>= updateState . B.setMeta field) @@ -123,15 +123,15 @@ authorline = dateline :: T2T () dateline = headerline "date" (trimInlines . mconcat <$> manyTill inline newline) -type Keyword = String -type Value = String +type Keyword = Text +type Value = Text setting :: T2T (Keyword, Value) setting = do string "%!" - keyword <- ignoreSpacesCap (many1 alphaNum) + keyword <- ignoreSpacesCap (many1Char alphaNum) char ':' - value <- ignoreSpacesCap (manyTill anyChar newline) + value <- ignoreSpacesCap (manyTillChar anyChar newline) return (keyword, value) -- Blocks @@ -163,10 +163,10 @@ balancedTitle c = try $ do spaces level <- length <$> many1 (char c) guard (level <= 5) -- Max header level 5 - heading <- manyTill (noneOf "\n\r") (count level (char c)) + heading <- manyTillChar (noneOf "\n\r") (count level (char c)) label <- optionMaybe (enclosed (char '[') (char ']') (alphaNum <|> oneOf "_-")) many spaceChar *> newline - let attr = maybe nullAttr (\x -> (x, [], [])) label + let attr = maybe nullAttr (\x -> (T.pack x, [], [])) label return $ B.headerWith attr level (trimInlines $ B.text heading) para :: T2T Blocks @@ -192,7 +192,7 @@ quote :: T2T Blocks quote = try $ do lookAhead tab rawQuote <- many1 (tab *> optional spaces *> anyLine) - contents <- parseFromString' parseBlocks (intercalate "\n" rawQuote ++ "\n\n") + contents <- parseFromString' parseBlocks (T.intercalate "\n" rawQuote <> "\n\n") return $ B.blockQuote contents commentLine :: T2T Inlines @@ -243,17 +243,17 @@ listItem start end = try $ do markerLength <- try start firstLine <- anyLineNewline blank <- option "" ("\n" <$ blankline) - rest <- concat <$> many (listContinuation markerLength) - parseFromString' end $ firstLine ++ blank ++ rest + rest <- T.concat <$> many (listContinuation markerLength) + parseFromString' end $ firstLine <> blank <> rest -- continuation of a list item - indented and separated by blankline or endline. -- Note: nested lists are parsed as continuations. listContinuation :: Int - -> T2T String + -> T2T Text listContinuation markerLength = try $ notFollowedBy' (blankline >> blankline) - *> (mappend <$> (concat <$> many1 listLine) - <*> many blankline) + *> (mappend <$> (T.concat <$> many1 listLine) + <*> manyChar blankline) where listLine = try $ indentWith markerLength *> anyLineNewline -- Table @@ -327,16 +327,16 @@ taggedBlock = do -- Generic -genericBlock :: Monoid a => T2T a -> (a -> Blocks) -> String -> T2T Blocks +genericBlock :: Monoid a => T2T a -> (a -> Blocks) -> Text -> T2T Blocks genericBlock p f s = blockMarkupArea p f s <|> blockMarkupLine p f s -blockMarkupArea :: Monoid a => T2T a -> (a -> Blocks) -> String -> T2T Blocks +blockMarkupArea :: Monoid a => T2T a -> (a -> Blocks) -> Text -> T2T Blocks blockMarkupArea p f s = try (do - string s *> blankline - f . mconcat <$> manyTill p (eof <|> void (string s *> blankline))) + textStr s *> blankline + f . mconcat <$> manyTill p (eof <|> void (textStr s *> blankline))) -blockMarkupLine :: T2T a -> (a -> Blocks) -> String -> T2T Blocks -blockMarkupLine p f s = try (f <$> (string s *> space *> p)) +blockMarkupLine :: T2T a -> (a -> Blocks) -> Text -> T2T Blocks +blockMarkupLine p f s = try (f <$> (textStr s *> space *> p)) -- Can be in either block or inline position comment :: Monoid a => T2T a @@ -385,15 +385,15 @@ italic :: T2T Inlines italic = inlineMarkup inline B.emph '/' B.str code :: T2T Inlines -code = inlineMarkup ((:[]) <$> anyChar) B.code '`' id +code = inlineMarkup (T.singleton <$> anyChar) B.code '`' id raw :: T2T Inlines -raw = inlineMarkup ((:[]) <$> anyChar) B.text '"' id +raw = inlineMarkup (T.singleton <$> anyChar) B.text '"' id tagged :: T2T Inlines tagged = do target <- getTarget - inlineMarkup ((:[]) <$> anyChar) (B.rawInline target) '\'' id + inlineMarkup (T.singleton <$> anyChar) (B.rawInline target) '\'' id -- Parser for markup indicated by a double character. -- Inline markup is greedy and glued @@ -404,33 +404,33 @@ inlineMarkup :: Monoid a => T2T a -- Content parser -> (a -> Inlines) -- Constructor -> Char -- Fence - -> (String -> a) -- Special Case to handle ****** + -> (Text -> a) -- Special Case to handle ****** -> T2T Inlines inlineMarkup p f c special = try $ do - start <- many1 (char c) - let l = length start + start <- many1Char (char c) + let l = T.length start guard (l >= 2) when (l == 2) (void $ notFollowedBy space) -- We must make sure that there is no space before the start of the -- closing tags - body <- optionMaybe (try $ manyTill (noneOf "\n\r") + body <- optionMaybe (try $ manyTillChar (noneOf "\n\r") (try $ lookAhead (noneOf " " >> string [c,c] ))) case body of Just middle -> do lastChar <- anyChar - end <- many1 (char c) + end <- many1Char (char c) let parser inp = parseFromString' (mconcat <$> many p) inp - let start' = case drop 2 start of + let start' = case T.drop 2 start of "" -> mempty xs -> special xs - body' <- parser (middle ++ [lastChar]) - let end' = case drop 2 end of + body' <- parser (middle <> T.singleton lastChar) + let end' = case T.drop 2 end of "" -> mempty xs -> special xs return $ f (start' `mappend` body' `mappend` end') Nothing -> do -- Either bad or case such as ***** guard (l >= 5) - let body' = replicate (l - 4) c + let body' = T.replicate (l - 4) $ T.singleton c return $ f (special body') link :: T2T Inlines @@ -441,12 +441,12 @@ titleLink :: T2T Inlines titleLink = try $ do char '[' notFollowedBy space - tokens <- sepBy1 (many $ noneOf " ]") space + tokens <- sepBy1 (manyChar $ noneOf " ]") space guard (length tokens >= 2) char ']' let link' = last tokens - guard $ not $ null link' - let tit = unwords (init tokens) + guard $ not $ T.null link' + let tit = T.unwords (init tokens) return $ B.link link' "" (B.text tit) -- Link with image @@ -455,7 +455,7 @@ imageLink = try $ do char '[' body <- image many1 space - l <- manyTill (noneOf "\n\r ") (char ']') + l <- manyTillChar (noneOf "\n\r ") (char ']') return (B.link l "" body) macro :: T2T Inlines @@ -466,7 +466,7 @@ macro = try $ do maybe (return mempty) (\f -> B.str <$> asks f) (lookup name commands) where commands = [ ("date", date), ("mtime", mtime) - , ("infile", infile), ("outfile", outfile)] + , ("infile", T.pack . infile), ("outfile", T.pack . outfile)] -- raw URLs in text are automatically linked url :: T2T Inlines @@ -474,7 +474,7 @@ url = try $ do (rawUrl, escapedUrl) <- try uri <|> emailAddress return $ B.link rawUrl "" (B.str escapedUrl) -uri :: T2T (String, String) +uri :: T2T (Text, Text) uri = try $ do address <- t2tURI return (address, escapeURI address) @@ -486,25 +486,25 @@ uri = try $ do --isT2TURI (parse t2tURI "" -> Right _) = True --isT2TURI _ = False -t2tURI :: T2T String +t2tURI :: T2T Text t2tURI = do - start <- try ((++) <$> proto <*> urlLogin) <|> guess - domain <- many1 chars - sep <- many (char '/') - form' <- option mempty ((:) <$> char '?' <*> many1 form) - anchor' <- option mempty ((:) <$> char '#' <*> many anchor) - return (start ++ domain ++ sep ++ form' ++ anchor') + start <- try ((<>) <$> proto <*> urlLogin) <|> guess + domain <- many1Char chars + sep <- manyChar (char '/') + form' <- option mempty (T.cons <$> char '?' <*> many1Char form) + anchor' <- option mempty (T.cons <$> char '#' <*> manyChar anchor) + return (start <> domain <> sep <> form' <> anchor') where protos = ["http", "https", "ftp", "telnet", "gopher", "wais"] - proto = (++) <$> oneOfStrings protos <*> string "://" - guess = (++) <$> (((++) <$> stringAnyCase "www" <*> option mempty ((:[]) <$> oneOf "23")) - <|> stringAnyCase "ftp") <*> ((:[]) <$> char '.') + proto = (<>) <$> oneOfStrings protos <*> textStr "://" + guess = (<>) <$> (((<>) <$> stringAnyCase "www" <*> option mempty (T.singleton <$> oneOf "23")) + <|> stringAnyCase "ftp") <*> (T.singleton <$> char '.') login = alphaNum <|> oneOf "_.-" - pass = many (noneOf " @") + pass = manyChar (noneOf " @") chars = alphaNum <|> oneOf "%._/~:,=$@&+-" anchor = alphaNum <|> oneOf "%._0" form = chars <|> oneOf ";*" - urlLogin = option mempty $ try ((\x y z -> x ++ y ++ [z]) <$> many1 login <*> option mempty ((:) <$> char ':' <*> pass) <*> char '@') + urlLogin = option mempty $ try ((\x y z -> x <> y <> T.singleton z) <$> many1Char login <*> option mempty (T.cons <$> char ':' <*> pass) <*> char '@') image :: T2T Inlines @@ -512,12 +512,12 @@ image = try $ do -- List taken from txt2tags source let extensions = [".jpg", ".jpeg", ".gif", ".png", ".eps", ".bmp"] char '[' - (path, ext) <- manyUntil (noneOf "\n\t\r ") (oneOfStrings extensions) + (path, ext) <- manyUntilChar (noneOf "\n\t\r ") (oneOfStrings extensions) char ']' - return $ B.image (path ++ ext) "" mempty + return $ B.image (path <> ext) "" mempty -- Characters used in markup -specialChars :: String +specialChars :: [Char] specialChars = "%*-_/|:+;" tab :: T2T Char @@ -526,8 +526,8 @@ tab = char '\t' space :: T2T Char space = char ' ' -spaces :: T2T String -spaces = many space +spaces :: T2T Text +spaces = manyChar space endline :: T2T Inlines endline = try $ do @@ -544,17 +544,17 @@ endline = try $ do return B.softbreak str :: T2T Inlines -str = try $ B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") +str = try $ B.str <$> many1Char (noneOf $ specialChars ++ "\n\r ") whitespace :: T2T Inlines whitespace = try $ B.space <$ spaceChar symbol :: T2T Inlines -symbol = B.str . (:[]) <$> oneOf specialChars +symbol = B.str . T.singleton <$> oneOf specialChars -- Utility -getTarget :: T2T String +getTarget :: T2T Text getTarget = do mv <- lookupMeta "target" . stateMeta <$> getState return $ case mv of @@ -565,5 +565,5 @@ getTarget = do atStart :: T2T () atStart = (sourceColumn <$> getPosition) >>= guard . (== 1) -ignoreSpacesCap :: T2T String -> T2T String -ignoreSpacesCap p = map toLower <$> (spaces *> p <* spaces) +ignoreSpacesCap :: T2T Text -> T2T Text +ignoreSpacesCap p = T.toLower <$> (spaces *> p <* spaces) diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs index 27b7d7245..f7edabc48 100644 --- a/src/Text/Pandoc/Readers/Vimwiki.hs +++ b/src/Text/Pandoc/Readers/Vimwiki.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE CPP #-} {- | Module : Text.Pandoc.Readers.Vimwiki @@ -51,9 +52,10 @@ import Prelude import Control.Monad (guard) import Control.Monad.Except (throwError) import Data.Default -import Data.List (isInfixOf, isPrefixOf) +import Data.List (isInfixOf) import Data.Maybe import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Builder (Blocks, Inlines, fromList, toList, trimInlines) import qualified Text.Pandoc.Builder as B (blockQuote, bulletList, code, codeBlockWith, definitionList, @@ -73,12 +75,13 @@ import Text.Pandoc.Options (ReaderOptions) import Text.Pandoc.Parsing (ParserState, ParserT, blanklines, emailAddress, many1Till, orderedListMarker, readWithM, registerHeader, spaceChar, stateMeta, - stateOptions, uri) -import Text.Pandoc.Shared (crFilter, splitBy, stringify, stripFirstAndLast, - isURI) + stateOptions, uri, manyTillChar, manyChar, textStr, + many1Char, countChar, many1TillChar) +import Text.Pandoc.Shared (crFilter, splitTextBy, stringify, stripFirstAndLast, + isURI, tshow) import Text.Parsec.Char (alphaNum, anyChar, char, newline, noneOf, oneOf, space, spaces, string) -import Text.Parsec.Combinator (between, choice, count, eof, lookAhead, many1, +import Text.Parsec.Combinator (between, choice, eof, lookAhead, many1, manyTill, notFollowedBy, option, skipMany1) import Text.Parsec.Prim (getState, many, try, updateState, (<|>)) @@ -128,7 +131,7 @@ block = do , definitionList , para ] - trace (take 60 $ show $ toList res) + trace (T.take 60 $ tshow $ toList res) return res blockML :: PandocMonad m => VwParser m Blocks @@ -218,32 +221,32 @@ defMarkerM = string "::" >> spaceChar defMarkerE :: PandocMonad m => VwParser m Char defMarkerE = string "::" >> newline -hasDefMarkerM :: PandocMonad m => VwParser m String -hasDefMarkerM = manyTill (noneOf "\n") (try defMarkerM) +hasDefMarkerM :: PandocMonad m => VwParser m Text +hasDefMarkerM = manyTillChar (noneOf "\n") (try defMarkerM) preformatted :: PandocMonad m => VwParser m Blocks preformatted = try $ do many spaceChar >> string "{{{" - attrText <- many (noneOf "\n") + attrText <- manyChar (noneOf "\n") lookAhead newline - contents <- manyTill anyChar (try (char '\n' >> many spaceChar >> string "}}}" + contents <- manyTillChar anyChar (try (char '\n' >> many spaceChar >> string "}}}" >> many spaceChar >> newline)) - if (contents /= "") && (head contents == '\n') - then return $ B.codeBlockWith (makeAttr attrText) (tail contents) + if (contents /= "") && (T.head contents == '\n') + then return $ B.codeBlockWith (makeAttr attrText) (T.tail contents) else return $ B.codeBlockWith (makeAttr attrText) contents -makeAttr :: String -> Attr +makeAttr :: Text -> Attr makeAttr s = - let xs = splitBy (`elem` " \t") s in + let xs = splitTextBy (`elem` (" \t" :: String)) s in ("", [], mapMaybe nameValue xs) -nameValue :: String -> Maybe (String, String) +nameValue :: Text -> Maybe (Text, Text) nameValue s = - let t = splitBy (== '=') s in + let t = splitTextBy (== '=') s in if length t /= 2 then Nothing else let (a, b) = (head t, last t) in - if (length b < 2) || ((head b, last b) /= ('"', '"')) + if (T.length b < 2) || ((T.head b, T.last b) /= ('"', '"')) then Nothing else Just (a, stripFirstAndLast b) @@ -253,16 +256,16 @@ displayMath = try $ do many spaceChar >> string "{{$" mathTag <- option "" mathTagParser many space - contents <- manyTill anyChar (try (char '\n' >> many spaceChar >> string "}}$" + contents <- manyTillChar anyChar (try (char '\n' >> many spaceChar >> string "}}$" >> many spaceChar >> newline)) let contentsWithTags | mathTag == "" = contents - | otherwise = "\\begin{" ++ mathTag ++ "}\n" ++ contents - ++ "\n\\end{" ++ mathTag ++ "}" + | otherwise = "\\begin{" <> mathTag <> "}\n" <> contents + <> "\n\\end{" <> mathTag <> "}" return $ B.para $ B.displayMath contentsWithTags -mathTagLaTeX :: String -> String +mathTagLaTeX :: Text -> Text mathTagLaTeX s = case s of "equation" -> "" "equation*" -> "" @@ -360,17 +363,17 @@ combineList x [y] = case toList y of _ -> x:[y] combineList x xs = x:xs -listStart :: PandocMonad m => VwParser m (Int, String) +listStart :: PandocMonad m => VwParser m (Int, Text) listStart = try $ do s <- many spaceChar listType <- bulletListMarkers <|> orderedListMarkers spaceChar return (length s, listType) -bulletListMarkers :: PandocMonad m => VwParser m String +bulletListMarkers :: PandocMonad m => VwParser m Text bulletListMarkers = "ul" <$ (char '*' <|> char '-') -orderedListMarkers :: PandocMonad m => VwParser m String +orderedListMarkers :: PandocMonad m => VwParser m Text orderedListMarkers = ("ol" <$choice (orderedListMarker Decimal Period:(($OneParen) . orderedListMarker <$> [Decimal, LowerRoman, UpperRoman, LowerAlpha, UpperAlpha]))) <|> ("ol" <$ char '#') @@ -421,9 +424,9 @@ placeholder :: PandocMonad m => VwParser m () placeholder = try $ choice (ph <$> ["title", "date"]) <|> noHtmlPh <|> templatePh -ph :: PandocMonad m => String -> VwParser m () +ph :: PandocMonad m => Text -> VwParser m () ph s = try $ do - many spaceChar >>string ('%':s) >> spaceChar + many spaceChar >> textStr (T.cons '%' s) >> spaceChar contents <- trimInlines . mconcat <$> manyTill inline (lookAhead newline) --use lookAhead because of placeholder in the whitespace parser let meta' = B.setMeta s contents nullMeta @@ -476,7 +479,7 @@ inlineML :: PandocMonad m => VwParser m Inlines inlineML = choice $ whitespace endlineML:inlineList str :: PandocMonad m => VwParser m Inlines -str = B.str <$>many1 (noneOf $ spaceChars ++ specialChars) +str = B.str <$> many1Char (noneOf $ spaceChars ++ specialChars) whitespace :: PandocMonad m => VwParser m () -> VwParser m Inlines whitespace endline = B.space <$ (skipMany1 spaceChar <|> @@ -487,7 +490,7 @@ whitespace' :: PandocMonad m => VwParser m Inlines whitespace' = B.space <$ skipMany1 spaceChar special :: PandocMonad m => VwParser m Inlines -special = B.str <$> count 1 (oneOf specialChars) +special = B.str <$> countChar 1 (oneOf specialChars) bareURL :: PandocMonad m => VwParser m Inlines bareURL = try $ do @@ -505,8 +508,8 @@ strong = try $ do return $ B.spanWith (makeId contents, [], []) mempty <> B.strong contents -makeId :: Inlines -> String -makeId i = concat (stringify <$> toList i) +makeId :: Inlines -> Text +makeId i = T.concat (stringify <$> toList i) emph :: PandocMonad m => VwParser m Inlines emph = try $ do @@ -527,7 +530,7 @@ strikeout = try $ do code :: PandocMonad m => VwParser m Inlines code = try $ do char '`' - contents <- many1Till (noneOf "\n") (char '`') + contents <- many1TillChar (noneOf "\n") (char '`') return $ B.code contents superscript :: PandocMonad m => VwParser m Inlines @@ -542,8 +545,8 @@ subscript = try $ link :: PandocMonad m => VwParser m Inlines link = try $ do string "[[" - contents <- lookAhead $ manyTill anyChar (string "]]") - case '|' `elem` contents of + contents <- lookAhead $ manyTillChar anyChar (string "]]") + case T.any (== '|') contents of False -> do manyTill anyChar (string "]]") -- not using try here because [[hell]o]] is not rendered as a link in vimwiki @@ -552,7 +555,7 @@ link = try $ do else "wikilink" return $ B.link (procLink contents) tit (B.str contents) True -> do - url <- manyTill anyChar $ char '|' + url <- manyTillChar anyChar $ char '|' lab <- mconcat <$> manyTill inline (string "]]") let tit = if isURI url then "" @@ -568,52 +571,52 @@ image = try $ do images :: PandocMonad m => Int -> VwParser m Inlines images k | k == 0 = do - imgurl <- manyTill anyChar (try $ string "}}") + imgurl <- manyTillChar anyChar (try $ string "}}") return $ B.image (procImgurl imgurl) "" (B.str "") | k == 1 = do - imgurl <- manyTill anyChar (char '|') + imgurl <- manyTillChar anyChar (char '|') alt <- mconcat <$> manyTill inline (try $ string "}}") return $ B.image (procImgurl imgurl) "" alt | k == 2 = do - imgurl <- manyTill anyChar (char '|') - alt <- mconcat <$>manyTill inline (char '|') - attrText <- manyTill anyChar (try $ string "}}") + imgurl <- manyTillChar anyChar (char '|') + alt <- mconcat <$> manyTill inline (char '|') + attrText <- manyTillChar anyChar (try $ string "}}") return $ B.imageWith (makeAttr attrText) (procImgurl imgurl) "" alt | otherwise = do - imgurl <- manyTill anyChar (char '|') - alt <- mconcat <$>manyTill inline (char '|') - attrText <- manyTill anyChar (char '|') + imgurl <- manyTillChar anyChar (char '|') + alt <- mconcat <$> manyTill inline (char '|') + attrText <- manyTillChar anyChar (char '|') manyTill anyChar (try $ string "}}") return $ B.imageWith (makeAttr attrText) (procImgurl imgurl) "" alt -procLink' :: String -> String +procLink' :: Text -> Text procLink' s - | take 6 s == "local:" = "file" ++ drop 5 s - | take 6 s == "diary:" = "diary/" ++ drop 6 s - | or ((`isPrefixOf` s) <$> [ "http:", "https:", "ftp:", "file:", "mailto:", + | T.take 6 s == "local:" = "file" <> T.drop 5 s + | T.take 6 s == "diary:" = "diary/" <> T.drop 6 s + | or ((`T.isPrefixOf` s) <$> [ "http:", "https:", "ftp:", "file:", "mailto:", "news:", "telnet:" ]) = s | s == "" = "" - | last s == '/' = s + | T.last s == '/' = s | otherwise = s -procLink :: String -> String -procLink s = procLink' x ++ y - where (x, y) = break (=='#') s +procLink :: Text -> Text +procLink s = procLink' x <> y + where (x, y) = T.break (=='#') s -procImgurl :: String -> String -procImgurl s = if take 6 s == "local:" then "file" ++ drop 5 s else s +procImgurl :: Text -> Text +procImgurl s = if T.take 6 s == "local:" then "file" <> T.drop 5 s else s inlineMath :: PandocMonad m => VwParser m Inlines inlineMath = try $ - B.math <$ char '$' <*> many1Till (noneOf "\n") (char '$') + B.math <$ char '$' <*> many1TillChar (noneOf "\n") (char '$') tag :: PandocMonad m => VwParser m Inlines tag = try $ do char ':' - s <- manyTill (noneOf spaceChars) (try (char ':' >> lookAhead space)) - guard $ not $ "::" `isInfixOf` (":" ++ s ++ ":") - let ss = splitBy (==':') s + s <- manyTillChar (noneOf spaceChars) (try (char ':' >> lookAhead space)) + guard $ not $ "::" `T.isInfixOf` (":" <> s <> ":") + let ss = splitTextBy (==':') s return $ mconcat $ makeTagSpan' (head ss):(makeTagSpan <$> tail ss) todoMark :: PandocMonad m => VwParser m Inlines @@ -646,16 +649,16 @@ nFBTTBSB = hasDefMarker :: PandocMonad m => VwParser m () hasDefMarker = () <$ manyTill (noneOf "\n") (string "::" >> oneOf spaceChars) -makeTagSpan' :: String -> Inlines -makeTagSpan' s = B.spanWith ('-' : s, [], []) (B.str "") <> +makeTagSpan' :: Text -> Inlines +makeTagSpan' s = B.spanWith (T.cons '-' s, [], []) (B.str "") <> B.spanWith (s, ["tag"], []) (B.str s) -makeTagSpan :: String -> Inlines +makeTagSpan :: Text -> Inlines makeTagSpan s = B.space <> makeTagSpan' s -mathTagParser :: PandocMonad m => VwParser m String +mathTagParser :: PandocMonad m => VwParser m Text mathTagParser = do - s <- try $ lookAhead (char '%' >> manyTill (noneOf spaceChars) + s <- try $ lookAhead (char '%' >> manyTillChar (noneOf spaceChars) (try $ char '%' >> many (noneOf $ '%':spaceChars) >> space)) - char '%' >> string s >> char '%' + char '%' >> textStr s >> char '%' return $ mathTagLaTeX s diff --git a/src/Text/Pandoc/RoffChar.hs b/src/Text/Pandoc/RoffChar.hs index f5dee27f5..5e4a34603 100644 --- a/src/Text/Pandoc/RoffChar.hs +++ b/src/Text/Pandoc/RoffChar.hs @@ -18,10 +18,11 @@ module Text.Pandoc.RoffChar ( , combiningAccents ) where import Prelude +import qualified Data.Text as T -- | These are the escapes specifically mentioned in groff_man(7), -- plus @ and ellipsis. -standardEscapes :: [(Char, String)] +standardEscapes :: [(Char, T.Text)] standardEscapes = [ ('\160', "\\ ") , ('\'', "\\[aq]") @@ -40,7 +41,7 @@ standardEscapes = , ('\x2026', "\\&...") -- because u2026 doesn't render on tty ] -characterCodes :: [(Char, String)] +characterCodes :: [(Char, T.Text)] characterCodes = [ ('Ð', "-D") , ('ð', "Sd") @@ -402,7 +403,7 @@ characterCodes = ] -- use like: \\[E a^ aa] -combiningAccents :: [(Char, String)] +combiningAccents :: [(Char, T.Text)] combiningAccents = [ ('\779' , "a\"") , ('\772', "a-") diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index f3fca9c07..d9f330e29 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -24,8 +24,8 @@ import Data.ByteString (ByteString) import Data.ByteString.Base64 import qualified Data.ByteString.Char8 as B import qualified Data.ByteString.Lazy as L -import Data.Char (isAlphaNum, isAscii, toLower) -import Data.List (isPrefixOf) +import qualified Data.Text as T +import Data.Char (isAlphaNum, isAscii) import Network.URI (escapeURIString) import System.FilePath (takeDirectory, takeExtension, (</>)) import Text.HTML.TagSoup @@ -35,24 +35,24 @@ import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.MIME (MimeType) import Text.Pandoc.Shared (isURI, renderTags', trim) -import Text.Pandoc.UTF8 (toString) +import Text.Pandoc.UTF8 (toString, toText, fromText) import Text.Parsec (ParsecT, runParserT) import qualified Text.Parsec as P isOk :: Char -> Bool isOk c = isAscii c && isAlphaNum c -makeDataURI :: (MimeType, ByteString) -> String +makeDataURI :: (MimeType, ByteString) -> T.Text makeDataURI (mime, raw) = if textual - then "data:" ++ mime' ++ "," ++ escapeURIString isOk (toString raw) - else "data:" ++ mime' ++ ";base64," ++ toString (encode raw) - where textual = "text/" `Data.List.isPrefixOf` mime - mime' = if textual && ';' `notElem` mime - then mime ++ ";charset=utf-8" + then "data:" <> mime' <> "," <> T.pack (escapeURIString isOk (toString raw)) + else "data:" <> mime' <> ";base64," <> toText (encode raw) + where textual = "text/" `T.isPrefixOf` mime + mime' = if textual && T.any (== ';') mime + then mime <> ";charset=utf-8" else mime -- mime type already has charset -convertTags :: PandocMonad m => [Tag String] -> m [Tag String] +convertTags :: PandocMonad m => [Tag T.Text] -> m [Tag T.Text] convertTags [] = return [] convertTags (t@TagOpen{}:ts) | fromAttrib "data-external" t == "1" = (t:) <$> convertTags ts @@ -69,10 +69,10 @@ convertTags (t@(TagOpen tagname as):ts) enc <- getDataURI (fromAttrib "type" t) y return (x, enc) else return (x,y) -convertTags (t@(TagOpen "script" as):TagClose "script":ts) = +convertTags (t@(TagOpen "script" as):TagClose "script":ts) = case fromAttrib "src" t of - [] -> (t:) <$> convertTags ts - src -> do + "" -> (t:) <$> convertTags ts + src -> do let typeAttr = fromAttrib "type" t res <- getData typeAttr src rest <- convertTags ts @@ -81,13 +81,13 @@ convertTags (t@(TagOpen "script" as):TagClose "script":ts) = (("src",dataUri) : [(x,y) | (x,y) <- as, x /= "src"]) : TagClose "script" : rest Right (mime, bs) - | ("text/javascript" `isPrefixOf` mime || - "application/javascript" `isPrefixOf` mime || - "application/x-javascript" `isPrefixOf` mime) && + | ("text/javascript" `T.isPrefixOf` mime || + "application/javascript" `T.isPrefixOf` mime || + "application/x-javascript" `T.isPrefixOf` mime) && not ("</script" `B.isInfixOf` bs) -> return $ - TagOpen "script" [("type", typeAttr)|not (null typeAttr)] - : TagText (toString bs) + TagOpen "script" [("type", typeAttr)|not (T.null typeAttr)] + : TagText (toText bs) : TagClose "script" : rest | otherwise -> @@ -97,7 +97,7 @@ convertTags (t@(TagOpen "script" as):TagClose "script":ts) = TagClose "script" : rest convertTags (t@(TagOpen "link" as):ts) = case fromAttrib "href" t of - [] -> (t:) <$> convertTags ts + "" -> (t:) <$> convertTags ts src -> do res <- getData (fromAttrib "type" t) src case res of @@ -107,14 +107,14 @@ convertTags (t@(TagOpen "link" as):ts) = (("href",dataUri) : [(x,y) | (x,y) <- as, x /= "href"]) : rest Right (mime, bs) - | "text/css" `isPrefixOf` mime - && null (fromAttrib "media" t) + | "text/css" `T.isPrefixOf` mime + && T.null (fromAttrib "media" t) && not ("</" `B.isInfixOf` bs) -> do rest <- convertTags $ dropWhile (==TagClose "link") ts return $ TagOpen "style" [("type", "text/css")] -- see #5725 - : TagText (toString bs) + : TagText (toText bs) : TagClose "style" : rest | otherwise -> do @@ -130,7 +130,7 @@ cssURLs d orig = do res <- runParserT (parseCSSUrls d) () "css" orig case res of Left e -> do - report $ CouldNotParseCSS (show e) + report $ CouldNotParseCSS $ T.pack $ show e return orig Right bs -> return bs @@ -176,52 +176,52 @@ pCSSUrl d = P.try $ do Left b -> return b Right (mt,b) -> do let enc = makeDataURI (mt, b) - return (B.pack $ "url(" ++ enc ++ ")") + return $ fromText $ "url(" <> enc <> ")" pQuoted :: PandocMonad m - => ParsecT ByteString () m (String, ByteString) + => ParsecT ByteString () m (T.Text, ByteString) pQuoted = P.try $ do quote <- P.oneOf "\"'" - url <- P.manyTill P.anyChar (P.char quote) - let fallback = B.pack ([quote] ++ trim url ++ [quote]) + url <- T.pack <$> P.manyTill P.anyChar (P.char quote) + let fallback = fromText $ T.singleton quote <> trim url <> T.singleton quote return (url, fallback) pUrl :: PandocMonad m - => ParsecT ByteString () m (String, ByteString) + => ParsecT ByteString () m (T.Text, ByteString) pUrl = P.try $ do P.string "url(" P.spaces quote <- P.option Nothing (Just <$> P.oneOf "\"'") - url <- P.manyTill P.anyChar (maybe (P.lookAhead (P.char ')')) P.char quote) + url <- T.pack <$> P.manyTill P.anyChar (maybe (P.lookAhead (P.char ')')) P.char quote) P.spaces P.char ')' - let fallback = B.pack ("url(" ++ maybe "" (:[]) quote ++ trim url ++ - maybe "" (:[]) quote ++ ")") + let fallback = fromText ("url(" <> maybe "" T.singleton quote <> trim url <> + maybe "" T.singleton quote <> ")") return (url, fallback) handleCSSUrl :: PandocMonad m - => FilePath -> (String, ByteString) + => FilePath -> (T.Text, ByteString) -> ParsecT ByteString () m (Either ByteString (MimeType, ByteString)) handleCSSUrl d (url, fallback) = - case escapeURIString (/='|') (trim url) of + case escapeURIString (/='|') (T.unpack $ trim url) of '#':_ -> return $ Left fallback 'd':'a':'t':'a':':':_ -> return $ Left fallback - u -> do let url' = if isURI u then u else d </> u + u -> do let url' = if isURI (T.pack u) then T.pack u else T.pack (d </> u) res <- lift $ getData "" url' case res of - Left uri -> return $ Left (B.pack $ "url(" ++ uri ++ ")") + Left uri -> return $ Left (fromText $ "url(" <> uri <> ")") Right (mt', raw) -> do -- note that the downloaded CSS may -- itself contain url(...). - (mt, b) <- if "text/css" `isPrefixOf` mt' + (mt, b) <- if "text/css" `T.isPrefixOf` mt' -- see #5725: in HTML5, content type -- isn't allowed on style type attribute then ("text/css",) <$> cssURLs d raw else return (mt', raw) return $ Right (mt, b) -getDataURI :: PandocMonad m => MimeType -> String -> m String +getDataURI :: PandocMonad m => MimeType -> T.Text -> m T.Text getDataURI mimetype src = do res <- getData mimetype src case res of @@ -229,35 +229,36 @@ getDataURI mimetype src = do Right x -> return $ makeDataURI x getData :: PandocMonad m - => MimeType -> String - -> m (Either String (MimeType, ByteString)) -getData _ src@('d':'a':'t':'a':':':_) = return $ Left src-- already data: uri -getData mimetype src = do - let ext = map toLower $ takeExtension src - (raw, respMime) <- fetchItem src - let raw' = if ext `elem` [".gz", ".svgz"] - then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks [raw] - else raw - mime <- case (mimetype, respMime) of - ("",Nothing) -> throwError $ PandocSomeError - $ "Could not determine mime type for `" ++ src ++ "'" - (x, Nothing) -> return x - (_, Just x ) -> return x - result <- if "text/css" `isPrefixOf` mime - then do - oldInputs <- getInputFiles - setInputFiles [src] - res <- cssURLs (takeDirectory src) raw' - setInputFiles oldInputs - return res + => MimeType -> T.Text + -> m (Either T.Text (MimeType, ByteString)) +getData mimetype src + | "data:" `T.isPrefixOf` src = return $ Left src -- already data: uri + | otherwise = do + let ext = T.toLower $ T.pack $ takeExtension $ T.unpack src + (raw, respMime) <- fetchItem src + let raw' = if ext `elem` [".gz", ".svgz"] + then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks [raw] + else raw + mime <- case (mimetype, respMime) of + ("",Nothing) -> throwError $ PandocSomeError + $ "Could not determine mime type for `" <> src <> "'" + (x, Nothing) -> return x + (_, Just x ) -> return x + result <- if "text/css" `T.isPrefixOf` mime + then do + oldInputs <- getInputFiles + setInputFiles [T.unpack src] + res <- cssURLs (takeDirectory $ T.unpack src) raw' + setInputFiles oldInputs + return res else return raw' - return $ Right (mime, result) + return $ Right (mime, result) -- | Convert HTML into self-contained HTML, incorporating images, -- scripts, and CSS using data: URIs. -makeSelfContained :: PandocMonad m => String -> m String +makeSelfContained :: PandocMonad m => T.Text -> m T.Text makeSelfContained inp = do let tags = parseTags inp out' <- convertTags tags diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 797a0a0b0..926116e23 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -8,6 +8,7 @@ {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE TypeSynonymInstances #-} {-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Shared Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -22,14 +23,20 @@ Utility functions and definitions used by the various Pandoc modules. module Text.Pandoc.Shared ( -- * List processing splitBy, + splitTextBy, splitByIndices, splitStringByIndices, + splitTextByIndices, substitute, ordNub, -- * Text processing ToString (..), + ToText (..), + tshow, backslashEscapes, escapeStringUsing, + elemText, + notElemText, stripTrailingNewlines, trim, triml, @@ -37,6 +44,7 @@ module Text.Pandoc.Shared ( trimMath, stripFirstAndLast, camelCaseToHyphenated, + camelCaseStrToHyphenated, toRomanNumeral, escapeURI, tabFilter, @@ -90,6 +98,7 @@ module Text.Pandoc.Shared ( defaultBlocksSeparator, -- * Safe read safeRead, + safeStrRead, -- * User data directory defaultUserDataDirs, -- * Version @@ -133,8 +142,8 @@ import Text.DocLayout (charWidth) import Text.Pandoc.Walk -- | Version number of pandoc library. -pandocVersion :: String -pandocVersion = showVersion version +pandocVersion :: T.Text +pandocVersion = T.pack $ showVersion version -- -- List processing @@ -148,6 +157,13 @@ splitBy isSep lst = rest' = dropWhile isSep rest in first:splitBy isSep rest' +splitTextBy :: (Char -> Bool) -> T.Text -> [T.Text] +splitTextBy isSep t + | T.null t = [] + | otherwise = let (first, rest) = T.break isSep t + rest' = T.dropWhile isSep rest + in first : splitTextBy isSep rest' + splitByIndices :: [Int] -> [a] -> [[a]] splitByIndices [] lst = [lst] splitByIndices (x:xs) lst = first:splitByIndices (map (\y -> y - x) xs) rest @@ -160,6 +176,9 @@ splitStringByIndices (x:xs) lst = let (first, rest) = splitAt' x lst in first : splitStringByIndices (map (\y -> y - x) xs) rest +splitTextByIndices :: [Int] -> T.Text -> [T.Text] +splitTextByIndices ns = fmap T.pack . splitStringByIndices ns . T.unpack + splitAt' :: Int -> [Char] -> ([Char],[Char]) splitAt' _ [] = ([],[]) splitAt' n xs | n <= 0 = ([],xs) @@ -195,89 +214,115 @@ instance ToString String where instance ToString T.Text where toString = T.unpack +class ToText a where + toText :: a -> T.Text + +instance ToText String where + toText = T.pack + +instance ToText T.Text where + toText = id + +tshow :: Show a => a -> T.Text +tshow = T.pack . show + -- | Returns an association list of backslash escapes for the -- designated characters. backslashEscapes :: [Char] -- ^ list of special characters to escape - -> [(Char, String)] -backslashEscapes = map (\ch -> (ch, ['\\',ch])) + -> [(Char, T.Text)] +backslashEscapes = map (\ch -> (ch, T.pack ['\\',ch])) -- | Escape a string of characters, using an association list of -- characters and strings. -escapeStringUsing :: [(Char, String)] -> String -> String -escapeStringUsing _ [] = "" -escapeStringUsing escapeTable (x:xs) = - case lookup x escapeTable of - Just str -> str ++ rest - Nothing -> x:rest - where rest = escapeStringUsing escapeTable xs +escapeStringUsing :: [(Char, T.Text)] -> T.Text -> T.Text +escapeStringUsing tbl = T.concatMap $ \c -> fromMaybe (T.singleton c) $ lookup c tbl + +-- | @True@ exactly when the @Char@ appears in the @Text@. +elemText :: Char -> T.Text -> Bool +elemText c = T.any (== c) + +-- | @True@ exactly when the @Char@ does not appear in the @Text@. +notElemText :: Char -> T.Text -> Bool +notElemText c = T.all (/= c) -- | Strip trailing newlines from string. -stripTrailingNewlines :: String -> String -stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse +stripTrailingNewlines :: T.Text -> T.Text +stripTrailingNewlines = T.dropWhileEnd (== '\n') -- | Remove leading and trailing space (including newlines) from string. -trim :: String -> String -trim = triml . trimr +trim :: T.Text -> T.Text +trim = T.dropAround (`elemText` " \r\n\t") -- | Remove leading space (including newlines) from string. -triml :: String -> String -triml = dropWhile (`elem` " \r\n\t") +triml :: T.Text -> T.Text +triml = T.dropWhile (`elemText` " \r\n\t") -- | Remove trailing space (including newlines) from string. -trimr :: String -> String -trimr = reverse . triml . reverse +trimr :: T.Text -> T.Text +trimr = T.dropWhileEnd (`elemText` " \r\n\t") -- | Trim leading space and trailing space unless after \. -trimMath :: String -> String -trimMath = triml . reverse . stripspace . reverse +trimMath :: T.Text -> T.Text +trimMath = triml . T.reverse . stripBeginSpace . T.reverse -- no Text.spanEnd where - stripspace (c1:c2:cs) - | c1 `elem` [' ','\t','\n','\r'] - , c2 /= '\\' = stripspace (c2:cs) - stripspace cs = cs + stripBeginSpace t + | T.null pref = t + | Just ('\\', _) <- T.uncons suff = T.cons (T.last pref) suff + | otherwise = suff + where + (pref, suff) = T.span (`elemText` " \t\n\r") t -- | Strip leading and trailing characters from string -stripFirstAndLast :: String -> String -stripFirstAndLast str = - drop 1 $ take (length str - 1) str +stripFirstAndLast :: T.Text -> T.Text +stripFirstAndLast t = case T.uncons t of + Just (_, t') -> case T.unsnoc t' of + Just (t'', _) -> t'' + _ -> t' + _ -> "" -- | Change CamelCase word to hyphenated lowercase (e.g., camel-case). -camelCaseToHyphenated :: String -> String -camelCaseToHyphenated [] = "" -camelCaseToHyphenated (a:b:rest) +camelCaseToHyphenated :: T.Text -> T.Text +camelCaseToHyphenated = T.pack . camelCaseStrToHyphenated . T.unpack + +-- This may not work as expected on general Unicode, if it contains +-- letters with a longer lower case form than upper case. I don't know +-- what the camel case practices of affected scripts are, though. +camelCaseStrToHyphenated :: String -> String +camelCaseStrToHyphenated [] = "" +camelCaseStrToHyphenated (a:b:rest) | isLower a - , isUpper b = a:'-':toLower b:camelCaseToHyphenated rest + , isUpper b = a:'-':toLower b:camelCaseStrToHyphenated rest -- handle ABCDef = abc-def -camelCaseToHyphenated (a:b:c:rest) +camelCaseStrToHyphenated (a:b:c:rest) | isUpper a , isUpper b - , isLower c = toLower a:'-':toLower b:camelCaseToHyphenated (c:rest) -camelCaseToHyphenated (a:rest) = toLower a:camelCaseToHyphenated rest + , isLower c = toLower a:'-':toLower b:camelCaseStrToHyphenated (c:rest) +camelCaseStrToHyphenated (a:rest) = toLower a:camelCaseStrToHyphenated rest -- | Convert number < 4000 to uppercase roman numeral. -toRomanNumeral :: Int -> String +toRomanNumeral :: Int -> T.Text toRomanNumeral x | x >= 4000 || x < 0 = "?" - | x >= 1000 = "M" ++ toRomanNumeral (x - 1000) - | x >= 900 = "CM" ++ toRomanNumeral (x - 900) - | x >= 500 = "D" ++ toRomanNumeral (x - 500) - | x >= 400 = "CD" ++ toRomanNumeral (x - 400) - | x >= 100 = "C" ++ toRomanNumeral (x - 100) - | x >= 90 = "XC" ++ toRomanNumeral (x - 90) - | x >= 50 = "L" ++ toRomanNumeral (x - 50) - | x >= 40 = "XL" ++ toRomanNumeral (x - 40) - | x >= 10 = "X" ++ toRomanNumeral (x - 10) + | x >= 1000 = "M" <> toRomanNumeral (x - 1000) + | x >= 900 = "CM" <> toRomanNumeral (x - 900) + | x >= 500 = "D" <> toRomanNumeral (x - 500) + | x >= 400 = "CD" <> toRomanNumeral (x - 400) + | x >= 100 = "C" <> toRomanNumeral (x - 100) + | x >= 90 = "XC" <> toRomanNumeral (x - 90) + | x >= 50 = "L" <> toRomanNumeral (x - 50) + | x >= 40 = "XL" <> toRomanNumeral (x - 40) + | x >= 10 = "X" <> toRomanNumeral (x - 10) | x == 9 = "IX" - | x >= 5 = "V" ++ toRomanNumeral (x - 5) + | x >= 5 = "V" <> toRomanNumeral (x - 5) | x == 4 = "IV" - | x >= 1 = "I" ++ toRomanNumeral (x - 1) + | x >= 1 = "I" <> toRomanNumeral (x - 1) | otherwise = "" -- | Escape whitespace and some punctuation characters in URI. -escapeURI :: String -> String -escapeURI = escapeURIString (not . needsEscaping) - where needsEscaping c = isSpace c || c `elem` - ['<','>','|','"','{','}','[',']','^', '`'] +escapeURI :: T.Text -> T.Text +escapeURI = T.pack . escapeURIString (not . needsEscaping) . T.unpack + where needsEscaping c = isSpace c || c `elemText` "<>|\"{}[]^`" + -- | Convert tabs to spaces. Tabs will be preserved if tab stop is set to 0. tabFilter :: Int -- ^ Tab stop @@ -304,8 +349,11 @@ crFilter = T.filter (/= '\r') -- | Parse a date and convert (if possible) to "YYYY-MM-DD" format. We -- limit years to the range 1601-9999 (ISO 8601 accepts greater than -- or equal to 1583, but MS Word only accepts dates starting 1601). -normalizeDate :: String -> Maybe String -normalizeDate s = fmap (formatTime defaultTimeLocale "%F") +normalizeDate :: T.Text -> Maybe T.Text +normalizeDate = fmap T.pack . normalizeDate' . T.unpack + +normalizeDate' :: String -> Maybe String +normalizeDate' s = fmap (formatTime defaultTimeLocale "%F") (msum $ map (\fs -> parsetimeWith fs s >>= rejectBadYear) formats :: Maybe Day) where rejectBadYear day = case toGregorian day of (y, _, _) | y >= 1601 && y <= 9999 -> Just day @@ -321,26 +369,26 @@ normalizeDate s = fmap (formatTime defaultTimeLocale "%F") -- | Generate infinite lazy list of markers for an ordered list, -- depending on list attributes. -orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String] +orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [T.Text] orderedListMarkers (start, numstyle, numdelim) = - let singleton c = [c] - nums = case numstyle of - DefaultStyle -> map show [start..] - Example -> map show [start..] - Decimal -> map show [start..] + let nums = case numstyle of + DefaultStyle -> map tshow [start..] + Example -> map tshow [start..] + Decimal -> map tshow [start..] UpperAlpha -> drop (start - 1) $ cycle $ - map singleton ['A'..'Z'] + map T.singleton ['A'..'Z'] LowerAlpha -> drop (start - 1) $ cycle $ - map singleton ['a'..'z'] + map T.singleton ['a'..'z'] UpperRoman -> map toRomanNumeral [start..] - LowerRoman -> map (map toLower . toRomanNumeral) [start..] + LowerRoman -> map (T.toLower . toRomanNumeral) [start..] inDelim str = case numdelim of - DefaultDelim -> str ++ "." - Period -> str ++ "." - OneParen -> str ++ ")" - TwoParens -> "(" ++ str ++ ")" + DefaultDelim -> str <> "." + Period -> str <> "." + OneParen -> str <> ")" + TwoParens -> "(" <> str <> ")" in map inDelim nums + -- | Extract the leading and trailing spaces from inside an inline element -- and place them outside the element. SoftBreaks count as Spaces for -- these purposes. @@ -387,15 +435,16 @@ deQuote x = x -- | Convert pandoc structure to a string with formatting removed. -- Footnotes are skipped (since we don't want their contents in link -- labels). -stringify :: Walkable Inline a => a -> String +stringify :: Walkable Inline a => a -> T.Text stringify = query go . walk (deNote . deQuote) - where go :: Inline -> [Char] + where go :: Inline -> T.Text go Space = " " go SoftBreak = " " go (Str x) = x go (Code _ x) = x go (Math _ x) = x - go (RawInline (Format "html") ('<':'b':'r':_)) = " " -- see #2105 + go (RawInline (Format "html") (T.unpack -> ('<':'b':'r':_))) + = " " -- see #2105 go LineBreak = " " go _ = "" @@ -407,7 +456,7 @@ stringify = query go . walk (deNote . deQuote) capitalize :: Walkable Inline a => a -> a capitalize = walk go where go :: Inline -> Inline - go (Str s) = Str (T.unpack $ T.toUpper $ T.pack s) + go (Str s) = Str $ T.toUpper s go x = x -- | Change final list item from @Para@ to @Plain@ if the list contains @@ -463,7 +512,7 @@ isPara _ = False -- | Convert Pandoc inline list to plain text identifier. HTML -- identifiers must start with a letter, and may contain only -- letters, digits, and the characters _-. -inlineListToIdentifier :: Extensions -> [Inline] -> String +inlineListToIdentifier :: Extensions -> [Inline] -> T.Text inlineListToIdentifier exts = dropNonLetter . filterAscii . toIdent . stringify . walk unEmojify where @@ -476,23 +525,23 @@ inlineListToIdentifier exts = unEmoji x = x dropNonLetter | extensionEnabled Ext_gfm_auto_identifiers exts = id - | otherwise = dropWhile (not . isAlpha) + | otherwise = T.dropWhile (not . isAlpha) filterAscii | extensionEnabled Ext_ascii_identifiers exts - = mapMaybe toAsciiChar + = T.pack . mapMaybe toAsciiChar . T.unpack | otherwise = id toIdent | extensionEnabled Ext_gfm_auto_identifiers exts = - filterPunct . spaceToDash . map toLower - | otherwise = intercalate "-" . words . filterPunct . map toLower - filterPunct = filter (\c -> isSpace c || isAlphaNum c || isAllowedPunct c) + filterPunct . spaceToDash . T.toLower + | otherwise = T.intercalate "-" . T.words . filterPunct . T.toLower + filterPunct = T.filter (\c -> isSpace c || isAlphaNum c || isAllowedPunct c) isAllowedPunct c | extensionEnabled Ext_gfm_auto_identifiers exts = c == '-' || c == '_' || generalCategory c `elem` [NonSpacingMark, SpacingCombiningMark, EnclosingMark, ConnectorPunctuation] | otherwise = c == '_' || c == '-' || c == '.' - spaceToDash = map (\c -> if isSpace c then '-' else c) + spaceToDash = T.map (\c -> if isSpace c then '-' else c) -- | Put a list of Pandoc blocks into a hierarchical structure: @@ -529,7 +578,7 @@ makeSections numbering mbBaseLevel bs = -- don't touch number if already present case lookup "number" kvs of Nothing | numbering -> - ("number", intercalate "." (map show newnum)) : kvs + ("number", T.intercalate "." (map tshow newnum)) : kvs _ -> kvs) return $ Div divattr (Header level' attr title' : sectionContents') : rest' @@ -542,7 +591,7 @@ makeSections numbering mbBaseLevel bs = let inner' = case inner of (Div (dident',dclasses',dkvs') zs@(Header{}:zs') : ws) - | null dident -> + | T.null dident -> Div (dident',dclasses' ++ dclasses,dkvs' ++ dkvs) zs : ws | otherwise -> -- keep id on header so we don't lose anchor Div (dident,dclasses ++ dclasses',dkvs ++ dkvs') @@ -564,7 +613,7 @@ headerLtEq _ _ = False -- | Generate a unique identifier from a list of inlines. -- Second argument is a list of already used identifiers. -uniqueIdent :: Extensions -> [Inline] -> Set.Set String -> String +uniqueIdent :: Extensions -> [Inline] -> Set.Set T.Text -> T.Text uniqueIdent exts title' usedIdents = if baseIdent `Set.member` usedIdents then case find (\x -> not $ numIdent x `Set.member` usedIdents) @@ -577,7 +626,7 @@ uniqueIdent exts title' usedIdents = baseIdent = case inlineListToIdentifier exts title' of "" -> "section" x -> x - numIdent n = baseIdent ++ "-" ++ show n + numIdent n = baseIdent <> "-" <> tshow n -- | True if block is a Header block. isHeaderBlock :: Block -> Bool @@ -664,7 +713,7 @@ handleTaskListItem handleInlines exts bls = -- | Set a field of a 'Meta' object. If the field already has a value, -- convert it into a list with the new value appended to the old value(s). addMetaField :: ToMetaValue a - => String + => T.Text -> a -> Meta -> Meta @@ -686,12 +735,16 @@ makeMeta title authors date = -- | Remove soft breaks between East Asian characters. eastAsianLineBreakFilter :: Pandoc -> Pandoc eastAsianLineBreakFilter = bottomUp go - where go (x:SoftBreak:y:zs) = - case (stringify x, stringify y) of - (xs@(_:_), c:_) - | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs - _ -> x:SoftBreak:y:zs - go xs = xs + where go (x:SoftBreak:y:zs) + | Just (_, b) <- T.unsnoc $ stringify x + , Just (c, _) <- T.uncons $ stringify y + , charWidth b == 2 + , charWidth c == 2 + = x:y:zs + | otherwise + = x:SoftBreak:y:zs + go xs + = xs -- | Builder for underline. -- This probably belongs in Builder.hs in pandoc-types. @@ -702,27 +755,28 @@ underlineSpan = B.spanWith ("", ["underline"], []) -- | Set of HTML elements that are represented as Span with a class equal as -- the element tag itself. htmlSpanLikeElements :: Set.Set T.Text -htmlSpanLikeElements = Set.fromList [T.pack "kbd", T.pack "mark", T.pack "dfn"] +htmlSpanLikeElements = Set.fromList ["kbd", "mark", "dfn"] -- | Returns the first sentence in a list of inlines, and the rest. breakSentence :: [Inline] -> ([Inline], [Inline]) breakSentence [] = ([],[]) breakSentence xs = - let isSentenceEndInline (Str ys@(_:_)) | last ys == '.' = True - isSentenceEndInline (Str ys@(_:_)) | last ys == '?' = True - isSentenceEndInline LineBreak = True - isSentenceEndInline _ = False + let isSentenceEndInline (Str ys) + | Just (_, c) <- T.unsnoc ys = c == '.' || c == '?' + isSentenceEndInline LineBreak = True + isSentenceEndInline _ = False (as, bs) = break isSentenceEndInline xs in case bs of - [] -> (as, []) - [c] -> (as ++ [c], []) - (c:Space:cs) -> (as ++ [c], cs) - (c:SoftBreak:cs) -> (as ++ [c], cs) - (Str ".":Str (')':ys):cs) -> (as ++ [Str ".", Str (')':ys)], cs) - (x@(Str ('.':')':_)):cs) -> (as ++ [x], cs) - (LineBreak:x@(Str ('.':_)):cs) -> (as ++[LineBreak], x:cs) - (c:cs) -> (as ++ [c] ++ ds, es) - where (ds, es) = breakSentence cs + [] -> (as, []) + [c] -> (as ++ [c], []) + (c:Space:cs) -> (as ++ [c], cs) + (c:SoftBreak:cs) -> (as ++ [c], cs) + (Str ".":Str s@(T.uncons -> Just (')',_)):cs) + -> (as ++ [Str ".", Str s], cs) + (x@(Str (T.stripPrefix ".)" -> Just _)):cs) -> (as ++ [x], cs) + (LineBreak:x@(Str (T.uncons -> Just ('.',_))):cs) -> (as ++[LineBreak], x:cs) + (c:cs) -> (as ++ [c] ++ ds, es) + where (ds, es) = breakSentence cs -- | Split a list of inlines into sentences. splitSentences :: [Inline] -> [[Inline]] @@ -763,10 +817,11 @@ filterIpynbOutput mode = walk go removeANSI (CodeBlock attr code) = CodeBlock attr (removeANSIEscapes code) removeANSI x = x - removeANSIEscapes [] = [] - removeANSIEscapes ('\x1b':'[':cs) = - removeANSIEscapes (drop 1 $ dropWhile (/='m') cs) - removeANSIEscapes (c:cs) = c : removeANSIEscapes cs + removeANSIEscapes t + | Just cs <- T.stripPrefix "\x1b[" t = + removeANSIEscapes $ T.drop 1 $ T.dropWhile (/='m') cs + | Just (c, cs) <- T.uncons t = T.cons c $ removeANSIEscapes cs + | otherwise = "" go x = x -- @@ -774,12 +829,12 @@ filterIpynbOutput mode = walk go -- -- | Render HTML tags. -renderTags' :: [Tag String] -> String +renderTags' :: [Tag T.Text] -> T.Text renderTags' = renderTagsOptions renderOptions{ optMinimize = matchTags ["hr", "br", "img", "meta", "link"] , optRawTag = matchTags ["script", "style"] } - where matchTags tags = flip elem tags . map toLower + where matchTags tags = flip elem tags . T.toLower -- -- File handling @@ -826,8 +881,8 @@ collapseFilePath = Posix.joinPath . reverse . foldl go [] . splitDirectories -- Convert the path part of a file: URI to a regular path. -- On windows, @/c:/foo@ should be @c:/foo@. -- On linux, @/foo@ should be @/foo@. -uriPathToPath :: String -> FilePath -uriPathToPath path = +uriPathToPath :: T.Text -> FilePath +uriPathToPath (T.unpack -> path) = #ifdef _WINDOWS case path of '/':ps -> ps @@ -853,7 +908,7 @@ filteredFilesFromArchive zf f = -- | Schemes from http://www.iana.org/assignments/uri-schemes.html plus -- the unofficial schemes doi, javascript, isbn, pmid. -schemes :: Set.Set String +schemes :: Set.Set T.Text schemes = Set.fromList -- Official IANA schemes [ "aaa", "aaas", "about", "acap", "acct", "acr", "adiumxtra", "afp", "afs" @@ -905,11 +960,11 @@ schemes = Set.fromList -- | Check if the string is a valid URL with a IANA or frequently used but -- unofficial scheme (see @schemes@). -isURI :: String -> Bool -isURI = maybe False hasKnownScheme . parseURI +isURI :: T.Text -> Bool +isURI = maybe False hasKnownScheme . parseURI . T.unpack where - hasKnownScheme = (`Set.member` schemes) . map toLower . - filter (/= ':') . uriScheme + hasKnownScheme = (`Set.member` schemes) . T.toLower . + T.filter (/= ':') . T.pack . uriScheme --- --- Squash blocks into inlines @@ -962,12 +1017,14 @@ defaultBlocksSeparator = -- Safe read -- -safeRead :: (MonadPlus m, Read a) => String -> m a -safeRead s = case reads s of +safeRead :: (MonadPlus m, Read a) => T.Text -> m a +safeRead = safeStrRead . T.unpack + +safeStrRead :: (MonadPlus m, Read a) => String -> m a +safeStrRead s = case reads s of (d,x):_ | all isSpace x -> return d _ -> mzero - -- -- User data directory -- diff --git a/src/Text/Pandoc/Slides.hs b/src/Text/Pandoc/Slides.hs index 4a53a1c23..324731c11 100644 --- a/src/Text/Pandoc/Slides.hs +++ b/src/Text/Pandoc/Slides.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Slides Copyright : Copyright (C) 2012-2019 John MacFarlane diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index 640197c45..8d92e306b 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -25,13 +25,14 @@ import Text.DocTemplates (Template, compileTemplate, renderTemplate) import Text.Pandoc.Class (PandocMonad, readDataFile) import qualified Text.Pandoc.UTF8 as UTF8 import Data.Text (Text) +import qualified Data.Text as T -- | Get default template for the specified writer. getDefaultTemplate :: PandocMonad m - => String -- ^ Name of writer + => Text -- ^ Name of writer -> m Text getDefaultTemplate writer = do - let format = takeWhile (`notElem` ("+-" :: String)) writer -- strip off extensions + let format = T.takeWhile (`notElem` ("+-" :: String)) writer -- strip off extensions case format of "native" -> return "" "json" -> return "" @@ -51,7 +52,7 @@ getDefaultTemplate writer = do "markdown_phpextra" -> getDefaultTemplate "markdown" "gfm" -> getDefaultTemplate "commonmark" _ -> do - let fname = "templates" </> "default" <.> format + let fname = "templates" </> "default" <.> T.unpack format UTF8.toText <$> readDataFile fname diff --git a/src/Text/Pandoc/Translations.hs b/src/Text/Pandoc/Translations.hs index 50b172eda..cbee5ef8c 100644 --- a/src/Text/Pandoc/Translations.hs +++ b/src/Text/Pandoc/Translations.hs @@ -1,7 +1,8 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE CPP #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Translations Copyright : Copyright (C) 2017-2019 John MacFarlane @@ -34,7 +35,7 @@ import Data.Aeson.Types (Value(..), FromJSON(..)) import qualified Data.Aeson.Types as Aeson import qualified Data.HashMap.Strict as HM import qualified Data.Map as M -import Data.Text as T +import qualified Data.Text as T import qualified Data.YAML as YAML import GHC.Generics (Generic) import Text.Pandoc.Shared (safeRead) @@ -65,11 +66,11 @@ data Term = | To deriving (Show, Eq, Ord, Generic, Enum, Read) -newtype Translations = Translations (M.Map Term String) +newtype Translations = Translations (M.Map Term T.Text) deriving (Show, Generic, Semigroup, Monoid) instance FromJSON Term where - parseJSON (String t) = case safeRead (T.unpack t) of + parseJSON (String t) = case safeRead t of Just t' -> pure t' Nothing -> Prelude.fail $ "Invalid Term name " ++ show t @@ -77,7 +78,7 @@ instance FromJSON Term where instance YAML.FromYAML Term where parseYAML (YAML.Scalar _ (YAML.SStr t)) = - case safeRead (T.unpack t) of + case safeRead t of Just t' -> pure t' Nothing -> Prelude.fail $ "Invalid Term name " ++ show t @@ -88,11 +89,11 @@ instance FromJSON Translations where xs <- mapM addItem (HM.toList hm) return $ Translations (M.fromList xs) where addItem (k,v) = - case safeRead (T.unpack k) of + case safeRead k of Nothing -> Prelude.fail $ "Invalid Term name " ++ show k Just t -> case v of - (String s) -> return (t, T.unpack $ T.strip s) + (String s) -> return (t, T.strip s) inv -> Aeson.typeMismatch "String" inv parseJSON invalid = Aeson.typeMismatch "Translations" invalid @@ -100,22 +101,22 @@ instance YAML.FromYAML Translations where parseYAML = YAML.withMap "Translations" $ \tr -> Translations .M.fromList <$> mapM addItem (M.toList tr) where addItem (n@(YAML.Scalar _ (YAML.SStr k)), v) = - case safeRead (T.unpack k) of + case safeRead k of Nothing -> YAML.typeMismatch "Term" n Just t -> case v of (YAML.Scalar _ (YAML.SStr s)) -> - return (t, T.unpack (T.strip s)) + return (t, T.strip s) n' -> YAML.typeMismatch "String" n' addItem (n, _) = YAML.typeMismatch "String" n -lookupTerm :: Term -> Translations -> Maybe String +lookupTerm :: Term -> Translations -> Maybe T.Text lookupTerm t (Translations tm) = M.lookup t tm -readTranslations :: String -> Either String Translations +readTranslations :: T.Text -> Either T.Text Translations readTranslations s = - case YAML.decodeStrict $ UTF8.fromString s of - Left (pos,err') -> Left $ err' ++ + case YAML.decodeStrict $ UTF8.fromText s of + Left (pos,err') -> Left $ T.pack $ err' ++ " (line " ++ show (YAML.posLine pos) ++ " column " ++ show (YAML.posColumn pos) ++ ")" Right (t:_) -> Right t diff --git a/src/Text/Pandoc/Writers.hs b/src/Text/Pandoc/Writers.hs index c88f860bb..724c22a50 100644 --- a/src/Text/Pandoc/Writers.hs +++ b/src/Text/Pandoc/Writers.hs @@ -1,7 +1,8 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -74,8 +75,8 @@ import Control.Monad.Except (throwError) import Control.Monad (unless) import Data.Aeson import qualified Data.ByteString.Lazy as BL -import Data.List (intercalate) import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Class import Text.Pandoc.Definition import Text.Pandoc.Options @@ -121,7 +122,7 @@ data Writer m = TextWriter (WriterOptions -> Pandoc -> m Text) | ByteStringWriter (WriterOptions -> Pandoc -> m BL.ByteString) -- | Association list of formats and writers. -writers :: PandocMonad m => [ ( String, Writer m) ] +writers :: PandocMonad m => [ (Text, Writer m) ] writers = [ ("native" , TextWriter writeNative) ,("json" , TextWriter $ \o d -> writeJSON o d) @@ -179,11 +180,11 @@ writers = [ ] -- | Retrieve writer, extensions based on formatSpec (format+extensions). -getWriter :: PandocMonad m => String -> m (Writer m, Extensions) +getWriter :: PandocMonad m => Text -> m (Writer m, Extensions) getWriter s = case parseFormatSpec s of Left e -> throwError $ PandocAppError - $ intercalate "\n" [m | Message m <- errorMessages e] + $ T.intercalate "\n" [T.pack m | Message m <- errorMessages e] Right (writerName, extsToEnable, extsToDisable) -> case lookup writerName writers of Nothing -> throwError $ @@ -198,7 +199,7 @@ getWriter s = unless (extensionEnabled ext allExts) $ throwError $ PandocUnsupportedExtensionError - (drop 4 $ show ext) writerName) + (T.drop 4 $ T.pack $ show ext) writerName) (extsToEnable ++ extsToDisable) return (w, exts) diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index c0f215d57..1c4c24f7f 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -22,9 +22,9 @@ AsciiDoc: <http://www.methods.co.nz/asciidoc/> module Text.Pandoc.Writers.AsciiDoc (writeAsciiDoc, writeAsciiDoctor) where import Prelude import Control.Monad.State.Strict -import Data.Char (isPunctuation, isSpace, toLower, toUpper) -import Data.List (intercalate, intersperse, stripPrefix) -import Data.Maybe (fromMaybe, isJust, listToMaybe) +import Data.Char (isPunctuation, isSpace) +import Data.List (intercalate, intersperse) +import Data.Maybe (fromMaybe, isJust) import qualified Data.Set as Set import qualified Data.Text as T import Data.Text (Text) @@ -39,11 +39,11 @@ import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Writers.Shared -data WriterState = WriterState { defListMarker :: String +data WriterState = WriterState { defListMarker :: Text , orderedListLevel :: Int , bulletListLevel :: Int , intraword :: Bool - , autoIds :: Set.Set String + , autoIds :: Set.Set Text , asciidoctorVariant :: Bool , inList :: Bool , hasMath :: Bool @@ -98,12 +98,12 @@ pandocToAsciiDoc opts (Pandoc meta blocks) = do Just tpl -> renderTemplate tpl context -- | Escape special characters for AsciiDoc. -escapeString :: String -> String +escapeString :: Text -> Text escapeString = escapeStringUsing escs where escs = backslashEscapes "{" -- | Ordered list start parser for use in Para below. -olMarker :: Parser [Char] ParserState Char +olMarker :: Parser Text ParserState Char olMarker = do (start, style', delim) <- anyOrderedListMarker if delim == Period && (style' == UpperAlpha || (style' == UpperRoman && @@ -113,15 +113,18 @@ olMarker = do (start, style', delim) <- anyOrderedListMarker -- | True if string begins with an ordered list marker -- or would be interpreted as an AsciiDoc option command -needsEscaping :: String -> Bool +needsEscaping :: Text -> Bool needsEscaping s = beginsWithOrderedListMarker s || isBracketed s where beginsWithOrderedListMarker str = - case runParser olMarker defaultParserState "para start" (take 10 str) of + case runParser olMarker defaultParserState "para start" (T.take 10 str) of Left _ -> False Right _ -> True - isBracketed ('[':cs) = listToMaybe (reverse cs) == Just ']' - isBracketed _ = False + isBracketed t + | Just ('[', t') <- T.uncons t + , Just (_, ']') <- T.unsnoc t' + = True + | otherwise = False -- | Convert Pandoc block element to asciidoc. blockToAsciiDoc :: PandocMonad m @@ -137,12 +140,13 @@ blockToAsciiDoc opts (Div (id',"section":_,_) blockToAsciiDoc opts (Plain inlines) = do contents <- inlineListToAsciiDoc opts inlines return $ contents <> blankline -blockToAsciiDoc opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = - blockToAsciiDoc opts (Para [Image attr alt (src,tit)]) +blockToAsciiDoc opts (Para [Image attr alt (src,tgt)]) + | Just tit <- T.stripPrefix "fig:" tgt + = blockToAsciiDoc opts (Para [Image attr alt (src,tit)]) blockToAsciiDoc opts (Para inlines) = do contents <- inlineListToAsciiDoc opts inlines -- escape if para starts with ordered list marker - let esc = if needsEscaping (T.unpack $ render Nothing contents) + let esc = if needsEscaping (render Nothing contents) then text "{empty}" else empty return $ esc <> contents <> blankline @@ -154,7 +158,7 @@ blockToAsciiDoc opts (LineBlock lns) = do contents <- joinWithLinefeeds <$> mapM docify lns return $ "[verse]" $$ text "--" $$ contents $$ text "--" $$ blankline blockToAsciiDoc _ b@(RawBlock f s) - | f == "asciidoc" = return $ text s + | f == "asciidoc" = return $ literal s | otherwise = do report $ BlockNotRendered b return empty @@ -165,20 +169,20 @@ blockToAsciiDoc opts (Header level (ident,_,_) inlines) = do ids <- gets autoIds let autoId = uniqueIdent (writerExtensions opts) inlines ids modify $ \st -> st{ autoIds = Set.insert autoId ids } - let identifier = if null ident || + let identifier = if T.null ident || (isEnabled Ext_auto_identifiers opts && ident == autoId) then empty - else "[[" <> text ident <> "]]" + else "[[" <> literal ident <> "]]" return $ identifier $$ nowrap (text (replicate (level + 1) '=') <> space <> contents) <> blankline blockToAsciiDoc _ (CodeBlock (_,classes,_) str) = return $ flush ( if null classes - then "...." $$ text str $$ "...." - else attrs $$ "----" $$ text str $$ "----") + then "...." $$ literal str $$ "...." + else attrs $$ "----" $$ literal str $$ "----") <> blankline - where attrs = "[" <> text (intercalate "," ("source" : classes)) <> "]" + where attrs = "[" <> literal (T.intercalate "," ("source" : classes)) <> "]" blockToAsciiDoc opts (BlockQuote blocks) = do contents <- blockListToAsciiDoc opts blocks let isBlock (BlockQuote _) = True @@ -258,11 +262,11 @@ blockToAsciiDoc opts (OrderedList (start, sty, _delim) items) = do DefaultStyle -> [] Decimal -> ["arabic"] Example -> [] - _ -> [map toLower (show sty)] - let listStart = if start == 1 then [] else ["start=" ++ show start] - let listoptions = case intercalate ", " (listStyle ++ listStart) of - [] -> empty - x -> brackets (text x) + _ -> [T.toLower (tshow sty)] + let listStart = if start == 1 then [] else ["start=" <> tshow start] + let listoptions = case T.intercalate ", " (listStyle ++ listStart) of + "" -> empty + x -> brackets (literal x) inlist <- gets inList modify $ \st -> st{ inList = True } contents <- mapM (orderedListItemToAsciiDoc opts) items @@ -275,7 +279,7 @@ blockToAsciiDoc opts (DefinitionList items) = do modify $ \st -> st{ inList = inlist } return $ mconcat contents <> blankline blockToAsciiDoc opts (Div (ident,classes,_) bs) = do - let identifier = if null ident then empty else "[[" <> text ident <> "]]" + let identifier = if T.null ident then empty else "[[" <> literal ident <> "]]" let admonitions = ["attention","caution","danger","error","hint", "important","note","tip","warning"] contents <- @@ -290,7 +294,7 @@ blockToAsciiDoc opts (Div (ident,classes,_) bs) = do else ("." <>) <$> blockListToAsciiDoc opts titleBs admonitionBody <- blockListToAsciiDoc opts bodyBs - return $ "[" <> text (map toUpper l) <> "]" $$ + return $ "[" <> literal (T.toUpper l) <> "]" $$ chomp admonitionTitle $$ "====" $$ chomp admonitionBody $$ @@ -365,7 +369,7 @@ definitionListItemToAsciiDoc opts (label, defs) = do defs' <- mapM defsToAsciiDoc defs modify (\st -> st{ defListMarker = marker }) let contents = nest 2 $ vcat $ intersperse divider $ map chomp defs' - return $ labelText <> text marker <> cr <> contents <> cr + return $ labelText <> literal marker <> cr <> contents <> cr -- | Convert list of Pandoc block elements to asciidoc. blockListToAsciiDoc :: PandocMonad m @@ -408,10 +412,11 @@ inlineListToAsciiDoc opts lst = do isSpacy _ SoftBreak = True -- Note that \W characters count as spacy in AsciiDoc -- for purposes of determining interword: - isSpacy End (Str xs) = case reverse xs of - c:_ -> isPunctuation c || isSpace c - _ -> False - isSpacy Start (Str (c:_)) = isPunctuation c || isSpace c + isSpacy End (Str xs) = case T.unsnoc xs of + Just (_, c) -> isPunctuation c || isSpace c + _ -> False + isSpacy Start (Str xs) + | Just (c, _) <- T.uncons xs = isPunctuation c || isSpace c isSpacy _ _ = False setIntraword :: PandocMonad m => Bool -> ADW m () @@ -456,25 +461,25 @@ inlineToAsciiDoc opts (Quoted qt lst) = do | otherwise -> [Str "``"] ++ lst ++ [Str "''"] inlineToAsciiDoc _ (Code _ str) = do isAsciidoctor <- gets asciidoctorVariant - let contents = text (escapeStringUsing (backslashEscapes "`") str) + let contents = literal (escapeStringUsing (backslashEscapes "`") str) return $ if isAsciidoctor then text "`+" <> contents <> "+`" else text "`" <> contents <> "`" -inlineToAsciiDoc _ (Str str) = return $ text $ escapeString str +inlineToAsciiDoc _ (Str str) = return $ literal $ escapeString str inlineToAsciiDoc _ (Math InlineMath str) = do isAsciidoctor <- gets asciidoctorVariant modify $ \st -> st{ hasMath = True } let content = if isAsciidoctor - then text str - else "$" <> text str <> "$" + then literal str + else "$" <> literal str <> "$" return $ "latexmath:[" <> content <> "]" inlineToAsciiDoc _ (Math DisplayMath str) = do isAsciidoctor <- gets asciidoctorVariant modify $ \st -> st{ hasMath = True } let content = if isAsciidoctor - then text str - else "\\[" <> text str <> "\\]" + then literal str + else "\\[" <> literal str <> "\\]" inlist <- gets inList let sepline = if inlist then text "+" @@ -483,7 +488,7 @@ inlineToAsciiDoc _ (Math DisplayMath str) = do (cr <> sepline) $$ "[latexmath]" $$ "++++" $$ content $$ "++++" <> cr inlineToAsciiDoc _ il@(RawInline f s) - | f == "asciidoc" = return $ text s + | f == "asciidoc" = return $ literal s | otherwise = do report $ InlineNotRendered il return empty @@ -501,38 +506,38 @@ inlineToAsciiDoc opts (Link _ txt (src, _tit)) = do -- abs: http://google.cod[Google] -- or my@email.com[email john] linktext <- inlineListToAsciiDoc opts txt - let isRelative = ':' `notElem` src + let isRelative = T.all (/= ':') src let prefix = if isRelative then text "link:" else empty - let srcSuffix = fromMaybe src (stripPrefix "mailto:" src) + let srcSuffix = fromMaybe src (T.stripPrefix "mailto:" src) let useAuto = case txt of [Str s] | escapeURI s == srcSuffix -> True _ -> False return $ if useAuto - then text srcSuffix - else prefix <> text src <> "[" <> linktext <> "]" + then literal srcSuffix + else prefix <> literal src <> "[" <> linktext <> "]" inlineToAsciiDoc opts (Image attr alternate (src, tit)) = do -- image:images/logo.png[Company logo, title="blah"] let txt = if null alternate || (alternate == [Str ""]) then [Str "image"] else alternate linktext <- inlineListToAsciiDoc opts txt - let linktitle = if null tit + let linktitle = if T.null tit then empty - else ",title=\"" <> text tit <> "\"" + else ",title=\"" <> literal tit <> "\"" showDim dir = case dimension dir attr of Just (Percent a) -> ["scaledwidth=" <> text (show (Percent a))] Just dim -> - [text (show dir) <> "=" <> text (showInPixel opts dim)] + [text (show dir) <> "=" <> literal (showInPixel opts dim)] Nothing -> [] dimList = showDim Width ++ showDim Height dims = if null dimList then empty else "," <> mconcat (intersperse "," dimList) - return $ "image:" <> text src <> "[" <> linktext <> linktitle <> dims <> "]" + return $ "image:" <> literal src <> "[" <> linktext <> linktitle <> dims <> "]" inlineToAsciiDoc opts (Note [Para inlines]) = inlineToAsciiDoc opts (Note [Plain inlines]) inlineToAsciiDoc opts (Note [Plain inlines]) = do @@ -544,9 +549,9 @@ inlineToAsciiDoc opts (Span (ident,classes,_) ils) = do contents <- inlineListToAsciiDoc opts ils isIntraword <- gets intraword let marker = if isIntraword then "##" else "#" - if null ident && null classes + if T.null ident && null classes then return contents else do - let modifier = brackets $ text $ unwords $ - [ '#':ident | not (null ident)] ++ map ('.':) classes + let modifier = brackets $ literal $ T.unwords $ + [ "#" <> ident | not (T.null ident)] ++ map ("." <>) classes return $ modifier <> marker <> contents <> marker diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs index 8e6e8af51..e2d2b8e4d 100644 --- a/src/Text/Pandoc/Writers/CommonMark.hs +++ b/src/Text/Pandoc/Writers/CommonMark.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.CommonMark Copyright : Copyright (C) 2015-2019 John MacFarlane @@ -28,7 +29,7 @@ import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Shared (capitalize, isHeaderBlock, isTightList, - linesToPara, onlySimpleTableCells, substitute, taskListItemToAscii) + linesToPara, onlySimpleTableCells, taskListItemToAscii, tshow) import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Walk (walk, walkM) import Text.Pandoc.Writers.HTML (writeHtml5String, tagWithAttributes) @@ -73,7 +74,7 @@ processNotes :: Inline -> State [[Block]] Inline processNotes (Note bs) = do modify (bs :) notes <- get - return $ Str $ "[" ++ show (length notes) ++ "]" + return $ Str $ "[" <> tshow (length notes) <> "]" processNotes x = return x node :: NodeType -> [Node] -> Node @@ -109,14 +110,14 @@ blockToNodes opts (Para xs) ns = return (node PARAGRAPH (inlinesToNodes opts xs) : ns) blockToNodes opts (LineBlock lns) ns = blockToNodes opts (linesToPara lns) ns blockToNodes _ (CodeBlock (_,classes,_) xs) ns = return - (node (CODE_BLOCK (T.pack (unwords classes)) (T.pack xs)) [] : ns) + (node (CODE_BLOCK (T.unwords classes) xs) [] : ns) blockToNodes opts (RawBlock (Format f) xs) ns | f == "html" && isEnabled Ext_raw_html opts - = return (node (HTML_BLOCK (T.pack xs)) [] : ns) + = return (node (HTML_BLOCK xs) [] : ns) | (f == "latex" || f == "tex") && isEnabled Ext_raw_tex opts - = return (node (CUSTOM_BLOCK (T.pack xs) T.empty) [] : ns) + = return (node (CUSTOM_BLOCK xs T.empty) [] : ns) | f == "markdown" - = return (node (CUSTOM_BLOCK (T.pack xs) T.empty) [] : ns) + = return (node (CUSTOM_BLOCK xs T.empty) [] : ns) | otherwise = return ns blockToNodes opts (BlockQuote bs) ns = do nodes <- blocksToNodes opts bs @@ -169,9 +170,9 @@ blockToNodes opts t@(Table capt aligns _widths headers rows) ns = do let capt' = node PARAGRAPH (inlinesToNodes opts capt) -- backslash | in code and raw: let fixPipe (Code attr xs) = - Code attr (substitute "|" "\\|" xs) + Code attr (T.replace "|" "\\|" xs) fixPipe (RawInline format xs) = - RawInline format (substitute "|" "\\|" xs) + RawInline format (T.replace "|" "\\|" xs) fixPipe x = x let toCell [Plain ils] = T.strip $ nodeToCommonmark [] Nothing @@ -276,19 +277,19 @@ inlineToNodes opts (SmallCaps xs) = [node (HTML_INLINE (T.pack "</span>")) []]) ++ ) else (inlinesToNodes opts (capitalize xs) ++) inlineToNodes opts (Link _ ils (url,tit)) = - (node (LINK (T.pack url) (T.pack tit)) (inlinesToNodes opts ils) :) + (node (LINK url tit) (inlinesToNodes opts ils) :) -- title beginning with fig: indicates implicit figure -inlineToNodes opts (Image alt ils (url,'f':'i':'g':':':tit)) = +inlineToNodes opts (Image alt ils (url,T.stripPrefix "fig:" -> Just tit)) = inlineToNodes opts (Image alt ils (url,tit)) inlineToNodes opts (Image _ ils (url,tit)) = - (node (IMAGE (T.pack url) (T.pack tit)) (inlinesToNodes opts ils) :) + (node (IMAGE url tit) (inlinesToNodes opts ils) :) inlineToNodes opts (RawInline (Format f) xs) | f == "html" && isEnabled Ext_raw_html opts - = (node (HTML_INLINE (T.pack xs)) [] :) + = (node (HTML_INLINE xs) [] :) | (f == "latex" || f == "tex") && isEnabled Ext_raw_tex opts - = (node (CUSTOM_INLINE (T.pack xs) T.empty) [] :) + = (node (CUSTOM_INLINE xs T.empty) [] :) | f == "markdown" - = (node (CUSTOM_INLINE (T.pack xs) T.empty) [] :) + = (node (CUSTOM_INLINE xs T.empty) [] :) | otherwise = id inlineToNodes opts (Quoted qt ils) = ((node (HTML_INLINE start) [] : @@ -304,12 +305,12 @@ inlineToNodes opts (Quoted qt ils) = | writerPreferAscii opts -> ("“", "”") | otherwise -> ("“", "”") -inlineToNodes _ (Code _ str) = (node (CODE (T.pack str)) [] :) +inlineToNodes _ (Code _ str) = (node (CODE str) [] :) inlineToNodes opts (Math mt str) = case writerHTMLMathMethod opts of WebTeX url -> let core = inlineToNodes opts - (Image nullAttr [Str str] (url ++ urlEncode str, str)) + (Image nullAttr [Str str] (url <> T.pack (urlEncode $ T.unpack str), str)) sep = if mt == DisplayMath then (node LINEBREAK [] :) else id @@ -317,14 +318,14 @@ inlineToNodes opts (Math mt str) = _ -> case mt of InlineMath -> - (node (HTML_INLINE (T.pack ("\\(" ++ str ++ "\\)"))) [] :) + (node (HTML_INLINE ("\\(" <> str <> "\\)")) [] :) DisplayMath -> - (node (HTML_INLINE (T.pack ("\\[" ++ str ++ "\\]"))) [] :) + (node (HTML_INLINE ("\\[" <> str <> "\\]")) [] :) inlineToNodes opts (Span ("",["emoji"],kvs) [Str s]) = do case lookup "data-emoji" kvs of Just emojiname | isEnabled Ext_emoji opts -> - (node (TEXT (":" <> T.pack emojiname <> ":")) [] :) - _ -> (node (TEXT (T.pack s)) [] :) + (node (TEXT (":" <> emojiname <> ":")) [] :) + _ -> (node (TEXT s) [] :) inlineToNodes opts (Span attr ils) = let nodes = inlinesToNodes opts ils op = tagWithAttributes opts True False "span" attr @@ -336,17 +337,17 @@ inlineToNodes opts (Cite _ ils) = (inlinesToNodes opts ils ++) inlineToNodes _ (Note _) = id -- should not occur -- we remove Note elements in preprocessing -stringToNodes :: WriterOptions -> String -> [Node] -> [Node] +stringToNodes :: WriterOptions -> Text -> [Node] -> [Node] stringToNodes opts s - | not (writerPreferAscii opts) = (node (TEXT (T.pack s)) [] :) + | not (writerPreferAscii opts) = (node (TEXT s) [] :) | otherwise = step s where step input = - let (ascii, rest) = span isAscii input - this = node (TEXT (T.pack ascii)) [] - nodes = case rest of - [] -> id - (nonAscii : rest') -> + let (ascii, rest) = T.span isAscii input + this = node (TEXT ascii) [] + nodes = case T.uncons rest of + Nothing -> id + Just (nonAscii, rest') -> let escaped = toHtml5Entities (T.singleton nonAscii) in (node (HTML_INLINE escaped) [] :) . step rest' in (this :) . nodes @@ -354,7 +355,7 @@ stringToNodes opts s toSubscriptInline :: Inline -> Maybe Inline toSubscriptInline Space = Just Space toSubscriptInline (Span attr ils) = Span attr <$> traverse toSubscriptInline ils -toSubscriptInline (Str s) = Str <$> traverse toSubscript s +toSubscriptInline (Str s) = Str . T.pack <$> traverse toSubscript (T.unpack s) toSubscriptInline LineBreak = Just LineBreak toSubscriptInline SoftBreak = Just SoftBreak toSubscriptInline _ = Nothing @@ -362,7 +363,7 @@ toSubscriptInline _ = Nothing toSuperscriptInline :: Inline -> Maybe Inline toSuperscriptInline Space = Just Space toSuperscriptInline (Span attr ils) = Span attr <$> traverse toSuperscriptInline ils -toSuperscriptInline (Str s) = Str <$> traverse toSuperscript s +toSuperscriptInline (Str s) = Str . T.pack <$> traverse toSuperscript (T.unpack s) toSuperscriptInline LineBreak = Just LineBreak toSuperscriptInline SoftBreak = Just SoftBreak toSuperscriptInline _ = Nothing diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index bef1e6265..2ec86fd78 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -1,6 +1,7 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.ConTeXt Copyright : Copyright (C) 2007-2019 John MacFarlane @@ -15,8 +16,8 @@ Conversion of 'Pandoc' format into ConTeXt. module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where import Prelude import Control.Monad.State.Strict -import Data.Char (ord, isDigit, toLower) -import Data.List (intercalate, intersperse) +import Data.Char (ord, isDigit) +import Data.List (intersperse) import Data.Maybe (mapMaybe) import Data.Text (Text) import qualified Data.Text as T @@ -89,14 +90,14 @@ pandocToConTeXt options (Pandoc meta blocks) = do $ defField "layout" layoutFromMargins $ defField "number-sections" (writerNumberSections options) $ maybe id (\l -> - defField "context-lang" (text l :: Doc Text)) mblang + defField "context-lang" (literal l :: Doc Text)) mblang $ (case T.unpack . render Nothing <$> getField "papersize" metadata of Just (('a':d:ds) :: String) | all isDigit (d:ds) -> resetField "papersize" (T.pack ('A':d:ds)) _ -> id) - $ (case toLower <$> lookupMetaString "pdfa" meta of + $ (case T.toLower $ lookupMetaString "pdfa" meta of "true" -> resetField "pdfa" (T.pack "1b:2005") _ -> id) metadata let context' = defField "context-dir" (maybe mempty toContextDir @@ -114,7 +115,7 @@ toContextDir = fmap (\t -> case t of _ -> t) -- | escape things as needed for ConTeXt -escapeCharForConTeXt :: WriterOptions -> Char -> String +escapeCharForConTeXt :: WriterOptions -> Char -> Text escapeCharForConTeXt opts ch = let ligatures = isEnabled Ext_smart opts in case ch of @@ -133,18 +134,18 @@ escapeCharForConTeXt opts ch = '\x2013' | ligatures -> "--" '\x2019' | ligatures -> "'" '\x2026' -> "\\ldots{}" - x -> [x] + x -> T.singleton x -- | Escape string for ConTeXt -stringToConTeXt :: WriterOptions -> String -> String -stringToConTeXt opts = concatMap (escapeCharForConTeXt opts) +stringToConTeXt :: WriterOptions -> Text -> Text +stringToConTeXt opts = T.concatMap (escapeCharForConTeXt opts) -- | Sanitize labels -toLabel :: String -> String -toLabel z = concatMap go z +toLabel :: Text -> Text +toLabel z = T.concatMap go z where go x - | x `elem` ("\\#[]\",{}%()|=" :: String) = "ux" ++ printf "%x" (ord x) - | otherwise = [x] + | x `elem` ("\\#[]\",{}%()|=" :: String) = "ux" <> T.pack (printf "%x" (ord x)) + | otherwise = T.singleton x -- | Convert Pandoc block element to ConTeXt. blockToConTeXt :: PandocMonad m => Block -> WM m (Doc Text) @@ -157,14 +158,16 @@ blockToConTeXt (Div attr@(_,"section":_,_) return $ header' $$ innerContents $$ footer' blockToConTeXt (Plain lst) = inlineListToConTeXt lst -- title beginning with fig: indicates that the image is a figure -blockToConTeXt (Para [Image attr txt (src,'f':'i':'g':':':_)]) = do - capt <- inlineListToConTeXt txt - img <- inlineToConTeXt (Image attr txt (src, "")) - let (ident, _, _) = attr - label = if null ident - then empty - else "[]" <> brackets (text $ toLabel ident) - return $ blankline $$ "\\placefigure" <> label <> braces capt <> img <> blankline +blockToConTeXt (Para [Image attr txt (src,tgt)]) + | Just _ <- T.stripPrefix "fig:" tgt + = do + capt <- inlineListToConTeXt txt + img <- inlineToConTeXt (Image attr txt (src, "")) + let (ident, _, _) = attr + label = if T.null ident + then empty + else "[]" <> brackets (literal $ toLabel ident) + return $ blankline $$ "\\placefigure" <> label <> braces capt <> img <> blankline blockToConTeXt (Para lst) = do contents <- inlineListToConTeXt lst return $ contents <> blankline @@ -175,17 +178,17 @@ blockToConTeXt (BlockQuote lst) = do contents <- blockListToConTeXt lst return $ "\\startblockquote" $$ nest 0 contents $$ "\\stopblockquote" <> blankline blockToConTeXt (CodeBlock _ str) = - return $ flush ("\\starttyping" <> cr <> text str <> cr <> "\\stoptyping") $$ blankline + return $ flush ("\\starttyping" <> cr <> literal str <> cr <> "\\stoptyping") $$ blankline -- blankline because \stoptyping can't have anything after it, inc. '}' blockToConTeXt b@(RawBlock f str) - | f == Format "context" || f == Format "tex" = return $ text str <> blankline + | f == Format "context" || f == Format "tex" = return $ literal str <> blankline | otherwise = empty <$ report (BlockNotRendered b) blockToConTeXt (Div (ident,_,kvs) bs) = do let align dir txt = "\\startalignment[" <> dir <> "]" $$ txt $$ "\\stopalignment" mblang <- fromBCP47 (lookup "lang" kvs) - let wrapRef txt = if null ident + let wrapRef txt = if T.null ident then txt - else ("\\reference" <> brackets (text $ toLabel ident) <> + else ("\\reference" <> brackets (literal $ toLabel ident) <> braces empty <> "%") $$ txt wrapDir = case lookup "dir" kvs of Just "rtl" -> align "righttoleft" @@ -193,7 +196,7 @@ blockToConTeXt (Div (ident,_,kvs) bs) = do _ -> id wrapLang txt = case mblang of Just lng -> "\\start\\language[" - <> text lng <> "]" $$ txt $$ "\\stop" + <> literal lng <> "]" $$ txt $$ "\\stop" Nothing -> txt wrapBlank txt = blankline <> txt <> blankline (wrapBlank . wrapLang . wrapDir . wrapRef) <$> blockListToConTeXt bs @@ -202,29 +205,29 @@ blockToConTeXt (BulletList lst) = do return $ ("\\startitemize" <> if isTightList lst then brackets "packed" else empty) $$ - vcat contents $$ text "\\stopitemize" <> blankline + vcat contents $$ literal "\\stopitemize" <> blankline blockToConTeXt (OrderedList (start, style', delim) lst) = do st <- get let level = stOrderedListLevel st put st {stOrderedListLevel = level + 1} contents <- mapM listItemToConTeXt lst put st {stOrderedListLevel = level} - let start' = if start == 1 then "" else "start=" ++ show start + let start' = if start == 1 then "" else "start=" <> tshow start let delim' = case delim of DefaultDelim -> "" Period -> "stopper=." OneParen -> "stopper=)" TwoParens -> "left=(,stopper=)" - let width = maximum $ map length $ take (length contents) + let width = maximum $ map T.length $ take (length contents) (orderedListMarkers (start, style', delim)) let width' = (toEnum width + 1) / 2 let width'' = if width' > (1.5 :: Double) - then "width=" ++ show width' ++ "em" + then "width=" <> tshow width' <> "em" else "" - let specs2Items = filter (not . null) [start', delim', width''] + let specs2Items = filter (not . T.null) [start', delim', width''] let specs2 = if null specs2Items then "" - else "[" ++ intercalate "," specs2Items ++ "]" + else "[" <> T.intercalate "," specs2Items <> "]" let style'' = '[': (case style' of DefaultStyle -> orderedListStyles !! level Decimal -> 'n' @@ -234,8 +237,8 @@ blockToConTeXt (OrderedList (start, style', delim) lst) = do LowerAlpha -> 'a' UpperAlpha -> 'A') : if isTightList lst then ",packed]" else "]" - let specs = style'' ++ specs2 - return $ "\\startitemize" <> text specs $$ vcat contents $$ + let specs = T.pack style'' <> specs2 + return $ "\\startitemize" <> literal specs $$ vcat contents $$ "\\stopitemize" <> blankline blockToConTeXt (DefinitionList lst) = liftM vcat $ mapM defListItemToConTeXt lst @@ -343,9 +346,9 @@ inlineListToConTeXt lst = liftM hcat $ mapM inlineToConTeXt $ addStruts lst addStruts xs addStruts (x:xs) = x : addStruts xs addStruts [] = [] - isSpacey Space = True - isSpacey (Str ('\160':_)) = True - isSpacey _ = False + isSpacey Space = True + isSpacey (Str (T.uncons -> Just ('\160',_))) = True + isSpacey _ = False -- | Convert inline element to ConTeXt inlineToConTeXt :: PandocMonad m @@ -369,11 +372,11 @@ inlineToConTeXt (Subscript lst) = do inlineToConTeXt (SmallCaps lst) = do contents <- inlineListToConTeXt lst return $ braces $ "\\sc " <> contents -inlineToConTeXt (Code _ str) | not ('{' `elem` str || '}' `elem` str) = - return $ "\\type" <> braces (text str) +inlineToConTeXt (Code _ str) | not ('{' `elemText` str || '}' `elemText` str) = + return $ "\\type" <> braces (literal str) inlineToConTeXt (Code _ str) = do opts <- gets stOptions - return $ "\\mono" <> braces (text $ stringToConTeXt opts str) + return $ "\\mono" <> braces (literal $ stringToConTeXt opts str) inlineToConTeXt (Quoted SingleQuote lst) = do contents <- inlineListToConTeXt lst return $ "\\quote" <> braces contents @@ -383,15 +386,15 @@ inlineToConTeXt (Quoted DoubleQuote lst) = do inlineToConTeXt (Cite _ lst) = inlineListToConTeXt lst inlineToConTeXt (Str str) = do opts <- gets stOptions - return $ text $ stringToConTeXt opts str + return $ literal $ stringToConTeXt opts str inlineToConTeXt (Math InlineMath str) = - return $ char '$' <> text str <> char '$' + return $ char '$' <> literal str <> char '$' inlineToConTeXt (Math DisplayMath str) = - return $ text "\\startformula " <> text str <> text " \\stopformula" <> space + return $ literal "\\startformula " <> literal str <> literal " \\stopformula" <> space inlineToConTeXt il@(RawInline f str) - | f == Format "tex" || f == Format "context" = return $ text str + | f == Format "tex" || f == Format "context" = return $ literal str | otherwise = empty <$ report (InlineNotRendered il) -inlineToConTeXt LineBreak = return $ text "\\crlf" <> cr +inlineToConTeXt LineBreak = return $ literal "\\crlf" <> cr inlineToConTeXt SoftBreak = do wrapText <- gets (writerWrapText . stOptions) return $ case wrapText of @@ -400,39 +403,39 @@ inlineToConTeXt SoftBreak = do WrapPreserve -> cr inlineToConTeXt Space = return space -- Handle HTML-like internal document references to sections -inlineToConTeXt (Link _ txt ('#' : ref, _)) = do +inlineToConTeXt (Link _ txt (T.uncons -> Just ('#', ref), _)) = do opts <- gets stOptions contents <- inlineListToConTeXt txt let ref' = toLabel $ stringToConTeXt opts ref - return $ text "\\goto" + return $ literal "\\goto" <> braces contents - <> brackets (text ref') + <> brackets (literal ref') inlineToConTeXt (Link _ txt (src, _)) = do - let isAutolink = txt == [Str (unEscapeString src)] + let isAutolink = txt == [Str (T.pack $ unEscapeString $ T.unpack src)] st <- get let next = stNextRef st put $ st {stNextRef = next + 1} - let ref = "url" ++ show next + let ref = "url" <> tshow next contents <- inlineListToConTeXt txt return $ "\\useURL" - <> brackets (text ref) - <> brackets (text $ escapeStringUsing [('#',"\\#"),('%',"\\%")] src) + <> brackets (literal ref) + <> brackets (literal $ escapeStringUsing [('#',"\\#"),('%',"\\%")] src) <> (if isAutolink then empty else brackets empty <> brackets contents) <> "\\from" - <> brackets (text ref) + <> brackets (literal ref) inlineToConTeXt (Image attr@(_,cls,_) _ (src, _)) = do opts <- gets stOptions - let showDim dir = let d = text (show dir) <> "=" + let showDim dir = let d = literal (tshow dir) <> "=" in case dimension dir attr of Just (Pixel a) -> - [d <> text (showInInch opts (Pixel a)) <> "in"] + [d <> literal (showInInch opts (Pixel a)) <> "in"] Just (Percent a) -> - [d <> text (showFl (a / 100)) <> "\\textwidth"] + [d <> literal (showFl (a / 100)) <> "\\textwidth"] Just dim -> - [d <> text (show dim)] + [d <> literal (tshow dim)] Nothing -> [] dimList = showDim Width ++ showDim Height @@ -441,25 +444,25 @@ inlineToConTeXt (Image attr@(_,cls,_) _ (src, _)) = do else brackets $ mconcat (intersperse "," dimList) clas = if null cls then empty - else brackets $ text $ toLabel $ head cls + else brackets $ literal $ toLabel $ head cls -- Use / for path separators on Windows; see #4918 - fixPathSeparators = map $ \c -> case c of - '\\' -> '/' - _ -> c + fixPathSeparators = T.map $ \c -> case c of + '\\' -> '/' + _ -> c src' = fixPathSeparators $ if isURI src then src - else unEscapeString src - return $ braces $ "\\externalfigure" <> brackets (text src') <> dims <> clas + else T.pack $ unEscapeString $ T.unpack src + return $ braces $ "\\externalfigure" <> brackets (literal src') <> dims <> clas inlineToConTeXt (Note contents) = do contents' <- blockListToConTeXt contents let codeBlock x@(CodeBlock _ _) = [x] codeBlock _ = [] let codeBlocks = query codeBlock contents return $ if null codeBlocks - then text "\\footnote{" <> nest 2 (chomp contents') <> char '}' - else text "\\startbuffer " <> nest 2 (chomp contents') <> - text "\\stopbuffer\\footnote{\\getbuffer}" + then literal "\\footnote{" <> nest 2 (chomp contents') <> char '}' + else literal "\\startbuffer " <> nest 2 (chomp contents') <> + literal "\\stopbuffer\\footnote{\\getbuffer}" inlineToConTeXt (Span (_,_,kvs) ils) = do mblang <- fromBCP47 (lookup "lang" kvs) let wrapDir txt = case lookup "dir" kvs of @@ -467,7 +470,7 @@ inlineToConTeXt (Span (_,_,kvs) ils) = do Just "ltr" -> braces $ "\\lefttoright " <> txt _ -> txt wrapLang txt = case mblang of - Just lng -> "\\start\\language[" <> text lng + Just lng -> "\\start\\language[" <> literal lng <> "]" <> txt <> "\\stop " Nothing -> txt (wrapLang . wrapDir) <$> inlineListToConTeXt ils @@ -482,9 +485,9 @@ sectionHeader (ident,classes,kvs) hdrLevel lst = do opts <- gets stOptions contents <- inlineListToConTeXt lst levelText <- sectionLevelToText opts (ident,classes,kvs) hdrLevel - let ident' = if null ident + let ident' = if T.null ident then empty - else "reference=" <> braces (text (toLabel ident)) + else "reference=" <> braces (literal (toLabel ident)) let contents' = if isEmpty contents then empty else "title=" <> braces contents @@ -515,23 +518,23 @@ sectionLevelToText opts (_,classes,_) hdrLevel = do TopLevelSection -> hdrLevel TopLevelDefault -> hdrLevel let (section, chapter) = if "unnumbered" `elem` classes - then (text "subject", text "title") - else (text "section", text "chapter") + then (literal "subject", literal "title") + else (literal "section", literal "chapter") return $ case level' of - -1 -> text "part" + -1 -> literal "part" 0 -> chapter n | n >= 1 -> text (concat (replicate (n - 1) "sub")) <> section _ -> empty -- cannot happen -fromBCP47 :: PandocMonad m => Maybe String -> WM m (Maybe String) +fromBCP47 :: PandocMonad m => Maybe Text -> WM m (Maybe Text) fromBCP47 mbs = fromBCP47' <$> toLang mbs -- Takes a list of the constituents of a BCP 47 language code -- and irons out ConTeXt's exceptions -- https://tools.ietf.org/html/bcp47#section-2.1 -- http://wiki.contextgarden.net/Language_Codes -fromBCP47' :: Maybe Lang -> Maybe String +fromBCP47' :: Maybe Lang -> Maybe Text fromBCP47' (Just (Lang "ar" _ "SY" _) ) = Just "ar-sy" fromBCP47' (Just (Lang "ar" _ "IQ" _) ) = Just "ar-iq" fromBCP47' (Just (Lang "ar" _ "JO" _) ) = Just "ar-jo" diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index 6c4f92db0..733b29ac7 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -1,5 +1,6 @@ {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.Custom Copyright : Copyright (C) 2012-2019 John MacFarlane @@ -17,9 +18,9 @@ import Prelude import Control.Arrow ((***)) import Control.Exception import Control.Monad (when) -import Data.Char (toLower) import Data.List (intersperse) import qualified Data.Map as M +import qualified Data.Text as T import Data.Text (Text, pack) import Data.Typeable import Foreign.Lua (Lua, Pushable) @@ -36,16 +37,16 @@ import Text.Pandoc.Writers.Shared import qualified Foreign.Lua as Lua -attrToMap :: Attr -> M.Map String String +attrToMap :: Attr -> M.Map T.Text T.Text attrToMap (id',classes,keyvals) = M.fromList $ ("id", id') - : ("class", unwords classes) + : ("class", T.unwords classes) : keyvals newtype Stringify a = Stringify a instance Pushable (Stringify Format) where - push (Stringify (Format f)) = Lua.push (map toLower f) + push (Stringify (Format f)) = Lua.push (T.toLower f) instance Pushable (Stringify [Inline]) where push (Stringify ils) = Lua.push =<< inlineListToCustom ils @@ -82,7 +83,7 @@ instance (Pushable a, Pushable b) => Pushable (KeyValue a b) where Lua.push v Lua.rawset (Lua.nthFromTop 3) -data PandocLuaException = PandocLuaException String +data PandocLuaException = PandocLuaException Text deriving (Show, Typeable) instance Exception PandocLuaException @@ -99,7 +100,7 @@ writeCustom luaFile opts doc@(Pandoc meta _) = do -- check for error in lua script (later we'll change the return type -- to handle this more gracefully): when (stat /= Lua.OK) $ - Lua.tostring' (-1) >>= throw . PandocLuaException . UTF8.toString + Lua.tostring' (-1) >>= throw . PandocLuaException . UTF8.toText rendered <- docToCustom opts doc context <- metaToContext opts (fmap (literal . pack) . blockListToCustom) diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index b0472e1d1..a72d121e1 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -1,6 +1,7 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.Docbook Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -15,9 +16,7 @@ Conversion of 'Pandoc' documents to Docbook XML. module Text.Pandoc.Writers.Docbook ( writeDocbook4, writeDocbook5 ) where import Prelude import Control.Monad.Reader -import Data.Char (toLower) import Data.Generics (everywhere, mkT) -import Data.List (isPrefixOf, stripPrefix) import Data.Monoid (Any (..)) import Data.Text (Text) import qualified Data.Text as T @@ -46,26 +45,26 @@ type DB = ReaderT DocBookVersion -- | Convert list of authors to a docbook <author> section authorToDocbook :: PandocMonad m => WriterOptions -> [Inline] -> DB m B.Inlines authorToDocbook opts name' = do - name <- T.unpack . render Nothing <$> inlinesToDocbook opts name' + name <- render Nothing <$> inlinesToDocbook opts name' let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing return $ B.rawInline "docbook" $ render colwidth $ - if ',' `elem` name + if T.any (== ',') name then -- last name first - let (lastname, rest) = break (==',') name + let (lastname, rest) = T.break (==',') name firstname = triml rest in - inTagsSimple "firstname" (text $ escapeStringForXML firstname) <> - inTagsSimple "surname" (text $ escapeStringForXML lastname) + inTagsSimple "firstname" (literal $ escapeStringForXML firstname) <> + inTagsSimple "surname" (literal $ escapeStringForXML lastname) else -- last name last - let namewords = words name + let namewords = T.words name lengthname = length namewords (firstname, lastname) = case lengthname of 0 -> ("","") 1 -> ("", name) - n -> (unwords (take (n-1) namewords), last namewords) - in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$ - inTagsSimple "surname" (text $ escapeStringForXML lastname) + n -> (T.unwords (take (n-1) namewords), last namewords) + in inTagsSimple "firstname" (literal $ escapeStringForXML firstname) $$ + inTagsSimple "surname" (literal $ escapeStringForXML lastname) writeDocbook4 :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeDocbook4 opts d = @@ -141,13 +140,13 @@ listItemToDocbook :: PandocMonad m => WriterOptions -> [Block] -> DB m (Doc Text listItemToDocbook opts item = inTagsIndented "listitem" <$> blocksToDocbook opts (map plainToPara item) -imageToDocbook :: WriterOptions -> Attr -> String -> Doc Text +imageToDocbook :: WriterOptions -> Attr -> Text -> Doc Text imageToDocbook _ attr src = selfClosingTag "imagedata" $ - ("fileref", src) : idAndRole attr ++ dims + ("fileref", src) : idAndRole attr <> dims where - dims = go Width "width" ++ go Height "depth" + dims = go Width "width" <> go Height "depth" go dir dstr = case dimension dir attr of - Just a -> [(dstr, show a)] + Just a -> [(dstr, tshow a)] Nothing -> [] -- | Convert a Pandoc block element to Docbook. @@ -166,20 +165,20 @@ blockToDocbook opts (Div (id',"section":_,_) (Header lvl _ ils : xs)) = do 0 -> "chapter" n | n >= 1 && n <= 5 -> if version == DocBook5 then "section" - else "sect" ++ show n + else "sect" <> tshow n _ -> "simplesect" idName = if version == DocBook5 then "xml:id" else "id" - idAttr = [(idName, writerIdentifierPrefix opts ++ id') | not (null id')] + idAttr = [(idName, writerIdentifierPrefix opts <> id') | not (T.null id')] nsAttr = if version == DocBook5 && lvl == 0 then [("xmlns", "http://docbook.org/ns/docbook"),("xmlns:xlink", "http://www.w3.org/1999/xlink")] else [] - attribs = nsAttr ++ idAttr + attribs = nsAttr <> idAttr title' <- inlinesToDocbook opts ils contents <- blocksToDocbook opts bs return $ inTags True tag attribs $ inTagsSimple "title" title' $$ contents blockToDocbook opts (Div (ident,_,_) [Para lst]) = - let attribs = [("id", ident) | not (null ident)] in + let attribs = [("id", ident) | not (T.null ident)] in if hasLineBreaks lst then (flush . nowrap . inTags False "literallayout" attribs) <$> inlinesToDocbook opts lst @@ -187,7 +186,7 @@ blockToDocbook opts (Div (ident,_,_) [Para lst]) = blockToDocbook opts (Div (ident,_,_) bs) = do contents <- blocksToDocbook opts (map plainToPara bs) return $ - (if null ident + (if T.null ident then mempty else selfClosingTag "anchor" [("id", ident)]) $$ contents blockToDocbook _ h@Header{} = do @@ -196,7 +195,7 @@ blockToDocbook _ h@Header{} = do return empty blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst -- title beginning with fig: indicates that the image is a figure -blockToDocbook opts (Para [Image attr txt (src,'f':'i':'g':':':_)]) = do +blockToDocbook opts (Para [Image attr txt (src,T.stripPrefix "fig:" -> Just _)]) = do alt <- inlinesToDocbook opts txt let capt = if null txt then empty @@ -216,16 +215,16 @@ blockToDocbook opts (LineBlock lns) = blockToDocbook opts (BlockQuote blocks) = inTagsIndented "blockquote" <$> blocksToDocbook opts blocks blockToDocbook _ (CodeBlock (_,classes,_) str) = return $ - text ("<programlisting" ++ lang ++ ">") <> cr <> - flush (text (escapeStringForXML str) <> cr <> text "</programlisting>") + literal ("<programlisting" <> lang <> ">") <> cr <> + flush (literal (escapeStringForXML str) <> cr <> literal "</programlisting>") where lang = if null langs then "" - else " language=\"" ++ escapeStringForXML (head langs) ++ + else " language=\"" <> escapeStringForXML (head langs) <> "\"" - isLang l = map toLower l `elem` map (map toLower) languages + isLang l = T.toLower l `elem` map T.toLower languages langsFrom s = if isLang s then [s] - else languagesByExtension . map toLower $ s + else languagesByExtension . T.toLower $ s langs = concatMap langsFrom classes blockToDocbook opts (BulletList lst) = do let attribs = [("spacing", "compact") | isTightList lst] @@ -241,26 +240,26 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = do UpperRoman -> [("numeration", "upperroman")] LowerRoman -> [("numeration", "lowerroman")] spacing = [("spacing", "compact") | isTightList (first:rest)] - attribs = numeration ++ spacing + attribs = numeration <> spacing items <- if start == 1 then listItemsToDocbook opts (first:rest) else do first' <- blocksToDocbook opts (map plainToPara first) rest' <- listItemsToDocbook opts rest return $ - inTags True "listitem" [("override",show start)] first' $$ + inTags True "listitem" [("override",tshow start)] first' $$ rest' return $ inTags True "orderedlist" attribs items blockToDocbook opts (DefinitionList lst) = do let attribs = [("spacing", "compact") | isTightList $ concatMap snd lst] inTags True "variablelist" attribs <$> deflistItemsToDocbook opts lst blockToDocbook _ b@(RawBlock f str) - | f == "docbook" = return $ text str -- raw XML block + | f == "docbook" = return $ literal str -- raw XML block | f == "html" = do version <- ask if version == DocBook5 then return empty -- No html in Docbook5 - else return $ text str -- allow html for backwards compatibility + else return $ literal str -- allow html for backwards compatibility | otherwise = do report $ BlockNotRendered b return empty @@ -271,9 +270,9 @@ blockToDocbook opts (Table caption aligns widths headers rows) = do else inTagsIndented "title" <$> inlinesToDocbook opts caption let tableType = if isEmpty captionDoc then "informaltable" else "table" - percent w = show (truncate (100*w) :: Integer) ++ "*" + percent w = tshow (truncate (100*w) :: Integer) <> "*" coltags = vcat $ zipWith (\w al -> selfClosingTag "colspec" - ([("colwidth", percent w) | w > 0] ++ + ([("colwidth", percent w) | w > 0] <> [("align", alignmentToString al)])) widths aligns head' <- if all null headers then return empty @@ -281,7 +280,7 @@ blockToDocbook opts (Table caption aligns widths headers rows) = do body' <- (inTagsIndented "tbody" . vcat) <$> mapM (tableRowToDocbook opts) rows return $ inTagsIndented tableType $ captionDoc $$ - inTags True "tgroup" [("cols", show (length headers))] ( + inTags True "tgroup" [("cols", tshow (length headers))] ( coltags $$ head' $$ body') hasLineBreaks :: [Inline] -> Bool @@ -294,7 +293,7 @@ hasLineBreaks = getAny . query isLineBreak . walk removeNote isLineBreak LineBreak = Any True isLineBreak _ = Any False -alignmentToString :: Alignment -> [Char] +alignmentToString :: Alignment -> Text alignmentToString alignment = case alignment of AlignLeft -> "left" AlignRight -> "right" @@ -321,7 +320,7 @@ inlinesToDocbook opts lst = hcat <$> mapM (inlineToDocbook opts) lst -- | Convert an inline element to Docbook. inlineToDocbook :: PandocMonad m => WriterOptions -> Inline -> DB m (Doc Text) -inlineToDocbook _ (Str str) = return $ text $ escapeStringForXML str +inlineToDocbook _ (Str str) = return $ literal $ escapeStringForXML str inlineToDocbook opts (Emph lst) = inTagsSimple "emphasis" <$> inlinesToDocbook opts lst inlineToDocbook opts (Strong lst) = @@ -341,18 +340,18 @@ inlineToDocbook opts (Quoted _ lst) = inlineToDocbook opts (Cite _ lst) = inlinesToDocbook opts lst inlineToDocbook opts (Span (ident,_,_) ils) = - ((if null ident + ((if T.null ident then mempty else selfClosingTag "anchor" [("id", ident)]) <>) <$> inlinesToDocbook opts ils inlineToDocbook _ (Code _ str) = - return $ inTagsSimple "literal" $ text (escapeStringForXML str) + return $ inTagsSimple "literal" $ literal (escapeStringForXML str) inlineToDocbook opts (Math t str) | isMathML (writerHTMLMathMethod opts) = do res <- convertMath writeMathML t str case res of Right r -> return $ inTagsSimple tagtype - $ text $ Xml.ppcElement conf + $ literal $ T.pack $ Xml.ppcElement conf $ fixNS $ removeAttr r Left il -> inlineToDocbook opts il @@ -366,19 +365,19 @@ inlineToDocbook opts (Math t str) fixNS' qname = qname{ Xml.qPrefix = Just "mml" } fixNS = everywhere (mkT fixNS') inlineToDocbook _ il@(RawInline f x) - | f == "html" || f == "docbook" = return $ text x + | f == "html" || f == "docbook" = return $ literal x | otherwise = do report $ InlineNotRendered il return empty -inlineToDocbook _ LineBreak = return $ text "\n" +inlineToDocbook _ LineBreak = return $ literal "\n" -- currently ignore, would require the option to add custom -- styles to the document inlineToDocbook _ Space = return space -- because we use \n for LineBreak, we can't do soft breaks: inlineToDocbook _ SoftBreak = return space inlineToDocbook opts (Link attr txt (src, _)) - | Just email <- stripPrefix "mailto:" src = - let emailLink = inTagsSimple "email" $ text $ + | Just email <- T.stripPrefix "mailto:" src = + let emailLink = inTagsSimple "email" $ literal $ escapeStringForXML email in case txt of [Str s] | escapeURI s == email -> return emailLink @@ -387,17 +386,17 @@ inlineToDocbook opts (Link attr txt (src, _)) char '(' <> emailLink <> char ')' | otherwise = do version <- ask - (if "#" `isPrefixOf` src - then inTags False "link" $ ("linkend", writerIdentifierPrefix opts ++ drop 1 src) : idAndRole attr + (if "#" `T.isPrefixOf` src + then inTags False "link" $ ("linkend", writerIdentifierPrefix opts <> T.drop 1 src) : idAndRole attr else if version == DocBook5 then inTags False "link" $ ("xlink:href", src) : idAndRole attr else inTags False "ulink" $ ("url", src) : idAndRole attr ) <$> inlinesToDocbook opts txt inlineToDocbook opts (Image attr _ (src, tit)) = return $ - let titleDoc = if null tit + let titleDoc = if T.null tit then empty else inTagsIndented "objectinfo" $ - inTagsIndented "title" (text $ escapeStringForXML tit) + inTagsIndented "title" (literal $ escapeStringForXML tit) in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $ titleDoc $$ imageToDocbook opts attr src inlineToDocbook opts (Note contents) = @@ -407,12 +406,12 @@ isMathML :: HTMLMathMethod -> Bool isMathML MathML = True isMathML _ = False -idAndRole :: Attr -> [(String, String)] -idAndRole (id',cls,_) = ident ++ role +idAndRole :: Attr -> [(Text, Text)] +idAndRole (id',cls,_) = ident <> role where - ident = if null id' + ident = if T.null id' then [] else [("id", id')] role = if null cls then [] - else [("role", unwords cls)] + else [("role", T.unwords cls)] diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 1a8ea0118..3c387d9d9 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -32,6 +32,7 @@ import qualified Data.Map as M import Data.Maybe (fromMaybe, isNothing, mapMaybe, maybeToList) import qualified Data.Set as Set import qualified Data.Text as T +import qualified Data.Text.Lazy as TL import Data.Time.Clock.POSIX import Data.Digest.Pure.SHA (sha1, showDigest) import Skylighting @@ -40,7 +41,7 @@ import Text.Pandoc.BCP47 (getLang, renderLang) import Text.Pandoc.Class (PandocMonad, report, toLang) import qualified Text.Pandoc.Class as P import Data.Time -import Text.Pandoc.UTF8 (fromStringLazy) +import Text.Pandoc.UTF8 (fromTextLazy) import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.Highlighting (highlight) @@ -107,8 +108,8 @@ data WriterEnv = WriterEnv{ envTextProperties :: EnvProps , envListLevel :: Int , envListNumId :: Int , envInDel :: Bool - , envChangesAuthor :: String - , envChangesDate :: String + , envChangesAuthor :: T.Text + , envChangesDate :: T.Text , envPrintWidth :: Integer } @@ -126,8 +127,8 @@ defaultWriterEnv = WriterEnv{ envTextProperties = mempty data WriterState = WriterState{ stFootnotes :: [Element] - , stComments :: [([(String,String)], [Inline])] - , stSectionIds :: Set.Set String + , stComments :: [([(T.Text, T.Text)], [Inline])] + , stSectionIds :: Set.Set T.Text , stExternalLinks :: M.Map String String , stImages :: M.Map FilePath (String, String, Maybe MimeType, B.ByteString) , stLists :: [ListMarker] @@ -163,7 +164,6 @@ defaultWriterState = WriterState{ type WS m = ReaderT WriterEnv (StateT WriterState m) - renumIdMap :: Int -> [Element] -> M.Map String String renumIdMap _ [] = M.empty renumIdMap n (e:es) @@ -189,10 +189,16 @@ renumId f renumMap e renumIds :: (QName -> Bool) -> M.Map String String -> [Element] -> [Element] renumIds f renumMap = map (renumId f renumMap) +findAttrTextBy :: (QName -> Bool) -> Element -> Maybe T.Text +findAttrTextBy x = fmap T.pack . findAttrBy x + +lookupAttrTextBy :: (QName -> Bool) -> [XML.Attr] -> Maybe T.Text +lookupAttrTextBy x = fmap T.pack . lookupAttrBy x + -- | Certain characters are invalid in XML even if escaped. -- See #1992 -stripInvalidChars :: String -> String -stripInvalidChars = filter isValidChar +stripInvalidChars :: T.Text -> T.Text +stripInvalidChars = T.filter isValidChar -- | See XML reference isValidChar :: Char -> Bool @@ -230,11 +236,11 @@ writeDocx opts doc@(Pandoc meta _) = do -- Gets the template size let mbpgsz = mbsectpr >>= filterElementName (wname (=="pgSz")) - let mbAttrSzWidth = (elAttribs <$> mbpgsz) >>= lookupAttrBy ((=="w") . qName) + let mbAttrSzWidth = (elAttribs <$> mbpgsz) >>= lookupAttrTextBy ((=="w") . qName) let mbpgmar = mbsectpr >>= filterElementName (wname (=="pgMar")) - let mbAttrMarLeft = (elAttribs <$> mbpgmar) >>= lookupAttrBy ((=="left") . qName) - let mbAttrMarRight = (elAttribs <$> mbpgmar) >>= lookupAttrBy ((=="right") . qName) + let mbAttrMarLeft = (elAttribs <$> mbpgmar) >>= lookupAttrTextBy ((=="left") . qName) + let mbAttrMarRight = (elAttribs <$> mbpgmar) >>= lookupAttrTextBy ((=="right") . qName) -- Get the available area (converting the size and the margins to int and -- doing the difference @@ -248,7 +254,7 @@ writeDocx opts doc@(Pandoc meta _) = do mblang <- toLang $ getLang opts meta let addLang :: Element -> Element addLang e = case mblang >>= \l -> - (return . XMLC.toTree . go (renderLang l) + (return . XMLC.toTree . go (T.unpack $ renderLang l) . XMLC.fromElement) e of Just (Elem e') -> e' _ -> e -- return original @@ -289,7 +295,7 @@ writeDocx opts doc@(Pandoc meta _) = do let env = defaultWriterEnv { envRTL = isRTLmeta , envChangesAuthor = fromMaybe "unknown" username - , envChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime + , envChangesDate = T.pack $ formatTime defaultTimeLocale "%FT%XZ" utctime , envPrintWidth = maybe 420 (`quot` 20) pgContentWidth } @@ -337,9 +343,9 @@ writeDocx opts doc@(Pandoc meta _) = do [("PartName",part'),("ContentType",contentType')] () let mkImageOverride (_, imgpath, mbMimeType, _) = mkOverrideNode ("/word/" ++ imgpath, - fromMaybe "application/octet-stream" mbMimeType) + maybe "application/octet-stream" T.unpack mbMimeType) let mkMediaOverride imgpath = - mkOverrideNode ('/':imgpath, getMimeTypeDef imgpath) + mkOverrideNode ('/':imgpath, T.unpack $ getMimeTypeDef imgpath) let overrides = map mkOverrideNode ( [("/word/webSettings.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml") @@ -488,10 +494,10 @@ writeDocx opts doc@(Pandoc meta _) = do numbering <- parseXml refArchive distArchive numpath newNumElts <- mkNumbering (stLists st) let pandocAdded e = - case findAttrBy ((== "abstractNumId") . qName) e >>= safeRead of + case findAttrTextBy ((== "abstractNumId") . qName) e >>= safeRead of Just numid -> numid >= (990 :: Int) Nothing -> - case findAttrBy ((== "numId") . qName) e >>= safeRead of + case findAttrTextBy ((== "numId") . qName) e >>= safeRead of Just numid -> numid >= (1000 :: Int) Nothing -> False let oldElts = filter (not . pandocAdded) $ onlyElems (elContent numbering) @@ -513,11 +519,11 @@ writeDocx opts doc@(Pandoc meta _) = do let extraCoreProps = ["subject","lang","category","description"] let extraCorePropsMap = M.fromList $ zip extraCoreProps ["dc:subject","dc:language","cp:category","dc:description"] - let lookupMetaString' :: String -> Meta -> String + let lookupMetaString' :: T.Text -> Meta -> T.Text lookupMetaString' key' meta' = case key' of - "description" -> intercalate "_x000d_\n" (map stringify $ lookupMetaBlocks "description" meta') - _ -> lookupMetaString key' meta' + "description" -> T.intercalate "_x000d_\n" (map stringify $ lookupMetaBlocks "description" meta') + key'' -> lookupMetaString key'' meta' let docProps = mknode "cp:coreProperties" [("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties") @@ -525,11 +531,11 @@ writeDocx opts doc@(Pandoc meta _) = do ,("xmlns:dcterms","http://purl.org/dc/terms/") ,("xmlns:dcmitype","http://purl.org/dc/dcmitype/") ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")] - $ mknode "dc:title" [] (stringify $ docTitle meta) - : mknode "dc:creator" [] (intercalate "; " (map stringify $ docAuthors meta)) - : [ mknode (M.findWithDefault "" k extraCorePropsMap) [] (lookupMetaString' k meta) + $ mktnode "dc:title" [] (stringify $ docTitle meta) + : mktnode "dc:creator" [] (T.intercalate "; " (map stringify $ docAuthors meta)) + : [ mktnode (M.findWithDefault "" k extraCorePropsMap) [] (lookupMetaString' k meta) | k <- M.keys (unMeta meta), k `elem` extraCoreProps] - ++ mknode "cp:keywords" [] (intercalate ", " keywords) + ++ mknode "cp:keywords" [] (T.unpack $ T.intercalate ", " keywords) : (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x ]) (formatTime defaultTimeLocale "%FT%XZ" utctime) @@ -537,7 +543,7 @@ writeDocx opts doc@(Pandoc meta _) = do -- docProps/custom.xml let customProperties :: [(String, String)] - customProperties = [(k, lookupMetaString k meta) | k <- M.keys (unMeta meta) + customProperties = [(T.unpack k, T.unpack $ lookupMetaString k meta) | k <- M.keys (unMeta meta) , k `notElem` (["title", "author", "keywords"] ++ extraCoreProps)] let mkCustomProp (k, v) pid = mknode "property" @@ -584,7 +590,7 @@ writeDocx opts doc@(Pandoc meta _) = do let entryFromArchive arch path = maybe (throwError $ PandocSomeError - $ path ++ " missing in reference docx") + $ T.pack $ path ++ " missing in reference docx") return (findEntryByPath path arch `mplus` findEntryByPath path distArchive) docPropsAppEntry <- entryFromArchive refArchive "docProps/app.xml" @@ -614,25 +620,24 @@ writeDocx opts doc@(Pandoc meta _) = do miscRelEntries ++ otherMediaEntries return $ fromArchive archive - newParaPropToOpenXml :: ParaStyleName -> Element newParaPropToOpenXml (fromStyleName -> s) = - let styleId = filter (not . isSpace) s + let styleId = T.filter (not . isSpace) s in mknode "w:style" [ ("w:type", "paragraph") , ("w:customStyle", "1") - , ("w:styleId", styleId)] - [ mknode "w:name" [("w:val", s)] () + , ("w:styleId", T.unpack styleId)] + [ mknode "w:name" [("w:val", T.unpack s)] () , mknode "w:basedOn" [("w:val","BodyText")] () , mknode "w:qFormat" [] () ] newTextPropToOpenXml :: CharStyleName -> Element newTextPropToOpenXml (fromStyleName -> s) = - let styleId = filter (not . isSpace) s + let styleId = T.filter (not . isSpace) s in mknode "w:style" [ ("w:type", "character") , ("w:customStyle", "1") - , ("w:styleId", styleId)] - [ mknode "w:name" [("w:val", s)] () + , ("w:styleId", T.unpack styleId)] + [ mknode "w:name" [("w:val", T.unpack s)] () , mknode "w:basedOn" [("w:val","BodyTextChar")] () ] @@ -821,8 +826,8 @@ writeOpenXML opts (Pandoc meta blocks) = do abstract <- if null abstract' then return [] else withParaPropM (pStyleM "Abstract") $ blocksToOpenXML opts abstract' - let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs - convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs + let convertSpace (Str x : Space : Str y : xs) = Str (x <> " " <> y) : xs + convertSpace (Str x : Str y : xs) = Str (x <> y) : xs convertSpace xs = xs let blocks' = bottomUp convertSpace blocks doc' <- setFirstPara >> blocksToOpenXML opts blocks' @@ -831,7 +836,7 @@ writeOpenXML opts (Pandoc meta blocks) = do let toComment (kvs, ils) = do annotation <- inlinesToOpenXML opts ils return $ - mknode "w:comment" [('w':':':k,v) | (k,v) <- kvs] + mknode "w:comment" [('w':':':T.unpack k,T.unpack v) | (k,v) <- kvs] [ mknode "w:p" [] $ [ mknode "w:pPr" [] [ mknode "w:pStyle" [("w:val", "CommentText")] () ] @@ -858,13 +863,13 @@ pStyleM :: (PandocMonad m) => ParaStyleName -> WS m XML.Element pStyleM styleName = do pStyleMap <- gets (smParaStyle . stStyleMaps) let sty' = getStyleIdFromName styleName pStyleMap - return $ mknode "w:pStyle" [("w:val", fromStyleId sty')] () + return $ mknode "w:pStyle" [("w:val", T.unpack $ fromStyleId sty')] () rStyleM :: (PandocMonad m) => CharStyleName -> WS m XML.Element rStyleM styleName = do cStyleMap <- gets (smCharStyle . stStyleMaps) let sty' = getStyleIdFromName styleName cStyleMap - return $ mknode "w:rStyle" [("w:val", fromStyleId sty')] () + return $ mknode "w:rStyle" [("w:val", T.unpack $ fromStyleId sty')] () getUniqueId :: (PandocMonad m) => WS m String -- the + 20 is to ensure that there are no clashes with the rIds @@ -875,7 +880,7 @@ getUniqueId = do return $ show n -- | Key for specifying user-defined docx styles. -dynamicStyleKey :: String +dynamicStyleKey :: T.Text dynamicStyleKey = "custom-style" -- | Convert a Pandoc block element to OpenXML. @@ -886,7 +891,7 @@ blockToOpenXML' :: (PandocMonad m) => WriterOptions -> Block -> WS m [Element] blockToOpenXML' _ Null = return [] blockToOpenXML' opts (Div (ident,_classes,kvs) bs) = do stylemod <- case lookup dynamicStyleKey kvs of - Just (fromString -> sty) -> do + Just (fromString . T.unpack -> sty) -> do modify $ \s -> s{stDynamicParaProps = Set.insert sty (stDynamicParaProps s)} @@ -904,14 +909,14 @@ blockToOpenXML' opts (Div (ident,_classes,kvs) bs) = do else id header <- dirmod $ stylemod $ blocksToOpenXML opts hs contents <- dirmod $ bibmod $ stylemod $ blocksToOpenXML opts bs' - wrapBookmark ident $ header ++ contents + wrapBookmark ident $ header <> contents blockToOpenXML' opts (Header lev (ident,_,_) lst) = do setFirstPara paraProps <- withParaPropM (pStyleM (fromString $ "Heading "++show lev)) $ getParaProps False contents <- inlinesToOpenXML opts lst - if null ident - then return [mknode "w:p" [] (paraProps ++contents)] + if T.null ident + then return [mknode "w:p" [] (paraProps ++ contents)] else do let bookmarkName = ident modify $ \s -> s{ stSectionIds = Set.insert bookmarkName @@ -924,7 +929,7 @@ blockToOpenXML' opts (Plain lst) = do prop <- pStyleM "Compact" if isInTable then withParaProp prop block else block -- title beginning with fig: indicates that the image is a figure -blockToOpenXML' opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = do +blockToOpenXML' opts (Para [Image attr alt (src,T.stripPrefix "fig:" -> Just tit)]) = do setFirstPara prop <- pStyleM $ if null alt @@ -1021,7 +1026,7 @@ blockToOpenXML' opts (Table caption aligns widths headers rows) = do ( mknode "w:tblStyle" [("w:val","Table")] () : mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () : mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0") ] () : - [ mknode "w:tblCaption" [("w:val", captionStr)] () + [ mknode "w:tblCaption" [("w:val", T.unpack captionStr)] () | not (null caption) ] ) : mknode "w:tblGrid" [] (if all (==0) widths @@ -1122,19 +1127,19 @@ withParaProp d p = withParaPropM :: PandocMonad m => WS m Element -> WS m a -> WS m a withParaPropM = (. flip withParaProp) . (>>=) -formattedString :: PandocMonad m => String -> WS m [Element] +formattedString :: PandocMonad m => T.Text -> WS m [Element] formattedString str = -- properly handle soft hyphens - case splitBy (=='\173') str of + case splitTextBy (=='\173') str of [w] -> formattedString' w ws -> do sh <- formattedRun [mknode "w:softHyphen" [] ()] intercalate sh <$> mapM formattedString' ws -formattedString' :: PandocMonad m => String -> WS m [Element] +formattedString' :: PandocMonad m => T.Text -> WS m [Element] formattedString' str = do inDel <- asks envInDel - formattedRun [ mknode (if inDel then "w:delText" else "w:t") + formattedRun [ mktnode (if inDel then "w:delText" else "w:t") [("xml:space","preserve")] (stripInvalidChars str) ] formattedRun :: PandocMonad m => [Element] -> WS m [Element] @@ -1163,21 +1168,21 @@ inlineToOpenXML' _ (Span (ident,["comment-start"],kvs) ils) = do let ident' = fromMaybe ident (lookup "id" kvs) kvs' = filter (("id" /=) . fst) kvs modify $ \st -> st{ stComments = (("id",ident'):kvs', ils) : stComments st } - return [ mknode "w:commentRangeStart" [("w:id", ident')] () ] + return [ mknode "w:commentRangeStart" [("w:id", T.unpack ident')] () ] inlineToOpenXML' _ (Span (ident,["comment-end"],kvs) _) = -- prefer the "id" in kvs, since that is the one produced by the docx -- reader. let ident' = fromMaybe ident (lookup "id" kvs) in - return [ mknode "w:commentRangeEnd" [("w:id", ident')] () + return [ mknode "w:commentRangeEnd" [("w:id", T.unpack ident')] () , mknode "w:r" [] [ mknode "w:rPr" [] [ mknode "w:rStyle" [("w:val", "CommentReference")] () ] - , mknode "w:commentReference" [("w:id", ident')] () ] + , mknode "w:commentReference" [("w:id", T.unpack ident')] () ] ] inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do stylemod <- case lookup dynamicStyleKey kvs of - Just (fromString -> sty) -> do + Just (fromString . T.unpack -> sty) -> do modify $ \s -> s{stDynamicTextProps = Set.insert sty (stDynamicTextProps s)} @@ -1208,8 +1213,8 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do x <- f return [ mknode "w:ins" [("w:id", show insId), - ("w:author", author), - ("w:date", date)] x ] + ("w:author", T.unpack author), + ("w:date", T.unpack date)] x ] else return id delmod <- if "deletion" `elem` classes then do @@ -1220,8 +1225,8 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do x <- f return [mknode "w:del" [("w:id", show delId), - ("w:author", author), - ("w:date", date)] x] + ("w:author", T.unpack author), + ("w:date", T.unpack date)] x] else return id contents <- insmod $ delmod $ dirmod $ stylemod $ pmod $ inlinesToOpenXML opts ils @@ -1264,7 +1269,7 @@ inlineToOpenXML' opts (Code attrs str) = do let alltoktypes = [KeywordTok ..] tokTypesMap <- mapM (\tt -> (,) tt <$> rStyleM (fromString $ show tt)) alltoktypes let unhighlighted = intercalate [br] `fmap` - mapM formattedString (lines str) + mapM formattedString (T.lines str) formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) toHlTok (toktype,tok) = mknode "w:r" [] @@ -1278,7 +1283,7 @@ inlineToOpenXML' opts (Code attrs str) = do formatOpenXML attrs str of Right h -> return h Left msg -> do - unless (null msg) $ report $ CouldNotHighlight msg + unless (T.null msg) $ report $ CouldNotHighlight msg unhighlighted inlineToOpenXML' opts (Note bs) = do notes <- gets stFootnotes @@ -1287,7 +1292,7 @@ inlineToOpenXML' opts (Note bs) = do let notemarker = mknode "w:r" [] [ mknode "w:rPr" [] footnoteStyle , mknode "w:footnoteRef" [] () ] - let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker + let notemarkerXml = RawInline (Format "openxml") $ T.pack $ ppElement notemarker let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : Space : ils) : xs insertNoteRef (Para ils : xs) = Para (notemarkerXml : Space : ils) : xs insertNoteRef xs = Para [notemarkerXml] : xs @@ -1303,27 +1308,27 @@ inlineToOpenXML' opts (Note bs) = do [ mknode "w:rPr" [] footnoteStyle , mknode "w:footnoteReference" [("w:id", notenum)] () ] ] -- internal link: -inlineToOpenXML' opts (Link _ txt ('#':xs,_)) = do +inlineToOpenXML' opts (Link _ txt (T.uncons -> Just ('#', xs),_)) = do contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt return - [ mknode "w:hyperlink" [("w:anchor", toBookmarkName xs)] contents ] + [ mknode "w:hyperlink" [("w:anchor", T.unpack $ toBookmarkName xs)] contents ] -- external link: inlineToOpenXML' opts (Link _ txt (src,_)) = do contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt extlinks <- gets stExternalLinks - id' <- case M.lookup src extlinks of + id' <- case M.lookup (T.unpack src) extlinks of Just i -> return i Nothing -> do i <- ("rId"++) `fmap` getUniqueId modify $ \st -> st{ stExternalLinks = - M.insert src i extlinks } + M.insert (T.unpack src) i extlinks } return i return [ mknode "w:hyperlink" [("r:id",id')] contents ] inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do pageWidth <- asks envPrintWidth imgs <- gets stImages let - stImage = M.lookup src imgs + stImage = M.lookup (T.unpack src) imgs generateImgElt (ident, _, _, img) = let (xpt,ypt) = desiredSizeInPoints opts attr @@ -1336,7 +1341,7 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do ,("noChangeAspect","1")] () nvPicPr = mknode "pic:nvPicPr" [] [ mknode "pic:cNvPr" - [("descr",src),("id","0"),("name","Picture")] () + [("descr",T.unpack src),("id","0"),("name","Picture")] () , cNvPicPr ] blipFill = mknode "pic:blipFill" [] [ mknode "a:blip" [("r:embed",ident)] () @@ -1371,8 +1376,8 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do , mknode "wp:effectExtent" [("b","0"),("l","0"),("r","0"),("t","0")] () , mknode "wp:docPr" - [ ("descr", stringify alt) - , ("title", title) + [ ("descr", T.unpack $ stringify alt) + , ("title", T.unpack title) , ("id","1") , ("name","Picture") ] () @@ -1389,7 +1394,7 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do let imgext = case mt >>= extensionFromMimeType of - Just x -> '.':x + Just x -> "." <> x Nothing -> case imageType img of Just Png -> ".png" Just Jpeg -> ".jpeg" @@ -1399,21 +1404,21 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do Just Svg -> ".svg" Just Emf -> ".emf" Nothing -> "" - imgpath = "media/" ++ ident ++ imgext + imgpath = "media/" <> ident <> T.unpack imgext mbMimeType = mt <|> getMimeType imgpath imgData = (ident, imgpath, mbMimeType, img) - if null imgext + if T.null imgext then -- without an extension there is no rule for content type inlinesToOpenXML opts alt -- return alt to avoid corrupted docx else do -- insert mime type to use in constructing [Content_Types].xml - modify $ \st -> st { stImages = M.insert src imgData $ stImages st } + modify $ \st -> st { stImages = M.insert (T.unpack src) imgData $ stImages st } return [generateImgElt imgData] ) `catchError` ( \e -> do - report $ CouldNotFetchResource src (show e) + report $ CouldNotFetchResource src $ T.pack (show e) -- emit alt text inlinesToOpenXML opts alt ) @@ -1460,22 +1465,22 @@ withDirection x = do , envTextProperties = EnvProps textStyle textProps' } -wrapBookmark :: (PandocMonad m) => String -> [Element] -> WS m [Element] -wrapBookmark [] contents = return contents +wrapBookmark :: (PandocMonad m) => T.Text -> [Element] -> WS m [Element] +wrapBookmark "" contents = return contents wrapBookmark ident contents = do id' <- getUniqueId let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id') - ,("w:name", toBookmarkName ident)] () + ,("w:name", T.unpack $ toBookmarkName ident)] () bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] () return $ bookmarkStart : contents ++ [bookmarkEnd] -- Word imposes a 40 character limit on bookmark names and requires -- that they begin with a letter. So we just use a hash of the -- identifier when otherwise we'd have an illegal bookmark name. -toBookmarkName :: String -> String -toBookmarkName s = - case s of - (c:_) | isLetter c - , length s <= 40 -> s - _ -> 'X' : drop 1 (showDigest (sha1 (fromStringLazy s))) +toBookmarkName :: T.Text -> T.Text +toBookmarkName s + | Just (c, _) <- T.uncons s + , isLetter c + , T.length s <= 40 = s + | otherwise = T.pack $ 'X' : drop 1 (showDigest (sha1 (fromTextLazy $ TL.fromStrict s))) diff --git a/src/Text/Pandoc/Writers/Docx/StyleMap.hs b/src/Text/Pandoc/Writers/Docx/StyleMap.hs index 4f0b0c3f9..18956ee52 100644 --- a/src/Text/Pandoc/Writers/Docx/StyleMap.hs +++ b/src/Text/Pandoc/Writers/Docx/StyleMap.hs @@ -27,6 +27,7 @@ module Text.Pandoc.Writers.Docx.StyleMap ( StyleMaps(..) import Text.Pandoc.Readers.Docx.Parse.Styles import Codec.Archive.Zip import qualified Data.Map as M +import qualified Data.Text as T import Data.String import Data.Char (isSpace) import Prelude @@ -38,7 +39,7 @@ type CharStyleNameMap = M.Map CharStyleName CharStyle getStyleIdFromName :: (Ord sn, FromStyleName sn, IsString (StyleId sty), HasStyleId sty) => sn -> M.Map sn sty -> StyleId sty getStyleIdFromName s = maybe (fallback s) getStyleId . M.lookup s - where fallback = fromString . filter (not . isSpace) . fromStyleName + where fallback = fromString . T.unpack . T.filter (not . isSpace) . fromStyleName hasStyleName :: (Ord sn, HasStyleId sty) => sn -> M.Map sn sty -> Bool diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs index 8111da9ba..541939f3b 100644 --- a/src/Text/Pandoc/Writers/DokuWiki.hs +++ b/src/Text/Pandoc/Writers/DokuWiki.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.DokuWiki Copyright : Copyright (C) 2008-2019 John MacFarlane @@ -27,15 +28,16 @@ import Control.Monad (zipWithM) import Control.Monad.Reader (ReaderT, asks, local, runReaderT) import Control.Monad.State.Strict (StateT, evalStateT) import Data.Default (Default (..)) -import Data.List (intercalate, intersect, isPrefixOf, transpose) -import Data.Text (Text, pack) +import Data.List (intersect, transpose) +import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.ImageSize import Text.Pandoc.Logging import Text.Pandoc.Options (WrapOption (..), WriterOptions (writerTableOfContents, writerTemplate, writerWrapText)) import Text.Pandoc.Shared (camelCaseToHyphenated, escapeURI, isURI, linesToPara, - removeFormatting, substitute, trimr) + removeFormatting, trimr, tshow) import Text.Pandoc.Templates (renderTemplate) import Text.DocLayout (render, literal) import Text.Pandoc.Writers.Shared (defField, metaToContext) @@ -44,7 +46,7 @@ data WriterState = WriterState { } data WriterEnvironment = WriterEnvironment { - stIndent :: String -- Indent after the marker at the beginning of list items + stIndent :: Text -- Indent after the marker at the beginning of list items , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list , stBackSlashLB :: Bool -- True if we should produce formatted strings with newlines (as in a table cell) } @@ -72,57 +74,58 @@ pandocToDokuWiki :: PandocMonad m => WriterOptions -> Pandoc -> DokuWiki m Text pandocToDokuWiki opts (Pandoc meta blocks) = do metadata <- metaToContext opts - (fmap (literal . pack . trimr) . blockListToDokuWiki opts) - (fmap (literal . pack . trimr) . inlineListToDokuWiki opts) + (fmap (literal . trimr) . blockListToDokuWiki opts) + (fmap (literal . trimr) . inlineListToDokuWiki opts) meta body <- blockListToDokuWiki opts blocks - let main = pack body - let context = defField "body" main + let context = defField "body" body $ defField "toc" (writerTableOfContents opts) metadata return $ case writerTemplate opts of - Nothing -> main + Nothing -> body Just tpl -> render Nothing $ renderTemplate tpl context -- | Escape special characters for DokuWiki. -escapeString :: String -> String -escapeString = substitute "__" "%%__%%" . - substitute "**" "%%**%%" . - substitute "//" "%%//%%" +escapeString :: Text -> Text +escapeString = T.replace "__" "%%__%%" . + T.replace "**" "%%**%%" . + T.replace "//" "%%//%%" -- | Convert Pandoc block element to DokuWiki. blockToDokuWiki :: PandocMonad m => WriterOptions -- ^ Options -> Block -- ^ Block element - -> DokuWiki m String + -> DokuWiki m Text blockToDokuWiki _ Null = return "" blockToDokuWiki opts (Div _attrs bs) = do contents <- blockListToDokuWiki opts bs - return $ contents ++ "\n" + return $ contents <> "\n" blockToDokuWiki opts (Plain inlines) = inlineListToDokuWiki opts inlines -- title beginning with fig: indicates that the image is a figure -- dokuwiki doesn't support captions - so combine together alt and caption into alt -blockToDokuWiki opts (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do - capt <- if null txt - then return "" - else (" " ++) `fmap` inlineListToDokuWiki opts txt - let opt = if null txt - then "" - else "|" ++ if null tit then capt else tit ++ capt - return $ "{{" ++ src ++ imageDims opts attr ++ opt ++ "}}\n" +blockToDokuWiki opts (Para [Image attr txt (src,tgt)]) + | Just tit <- T.stripPrefix "fig:" tgt + = do + capt <- if null txt + then return "" + else (" " <>) `fmap` inlineListToDokuWiki opts txt + let opt = if null txt + then "" + else "|" <> if T.null tit then capt else tit <> capt + return $ "{{" <> src <> imageDims opts attr <> opt <> "}}\n" blockToDokuWiki opts (Para inlines) = do indent <- asks stIndent useTags <- asks stUseTags contents <- inlineListToDokuWiki opts inlines return $ if useTags - then "<HTML><p></HTML>" ++ contents ++ "<HTML></p></HTML>" - else contents ++ if null indent then "\n" else "" + then "<HTML><p></HTML>" <> contents <> "<HTML></p></HTML>" + else contents <> if T.null indent then "\n" else "" blockToDokuWiki opts (LineBlock lns) = blockToDokuWiki opts $ linesToPara lns @@ -131,7 +134,7 @@ blockToDokuWiki _ b@(RawBlock f str) | f == Format "dokuwiki" = return str -- See https://www.dokuwiki.org/wiki:syntax -- use uppercase HTML tag for block-level content: - | f == Format "html" = return $ "<HTML>\n" ++ str ++ "\n</HTML>" + | f == Format "html" = return $ "<HTML>\n" <> str <> "\n</HTML>" | otherwise = "" <$ report (BlockNotRendered b) @@ -141,8 +144,8 @@ blockToDokuWiki opts (Header level _ inlines) = do -- emphasis, links etc. not allowed in headers, apparently, -- so we remove formatting: contents <- inlineListToDokuWiki opts $ removeFormatting inlines - let eqs = replicate ( 7 - level ) '=' - return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n" + let eqs = T.replicate ( 7 - level ) "=" + return $ eqs <> " " <> contents <> " " <> eqs <> "\n" blockToDokuWiki _ (CodeBlock (_,classes,_) str) = do let at = classes `intersect` ["actionscript", "ada", "apache", "applescript", "asm", "asp", @@ -154,43 +157,43 @@ blockToDokuWiki _ (CodeBlock (_,classes,_) str) = do "python", "qbasic", "rails", "reg", "robots", "ruby", "sas", "scheme", "sdlbasic", "smalltalk", "smarty", "sql", "tcl", "", "thinbasic", "tsql", "vb", "vbnet", "vhdl", "visualfoxpro", "winbatch", "xml", "xpp", "z80"] - return $ "<code" ++ + return $ "<code" <> (case at of [] -> ">\n" - (x:_) -> " " ++ x ++ ">\n") ++ str ++ "\n</code>" + (x:_) -> " " <> x <> ">\n") <> str <> "\n</code>" blockToDokuWiki opts (BlockQuote blocks) = do contents <- blockListToDokuWiki opts blocks if isSimpleBlockQuote blocks - then return $ unlines $ map ("> " ++) $ lines contents - else return $ "<HTML><blockquote>\n" ++ contents ++ "</blockquote></HTML>" + then return $ T.unlines $ map ("> " <>) $ T.lines contents + else return $ "<HTML><blockquote>\n" <> contents <> "</blockquote></HTML>" blockToDokuWiki opts (Table capt aligns _ headers rows) = do captionDoc <- if null capt then return "" else do c <- inlineListToDokuWiki opts capt - return $ "" ++ c ++ "\n" + return $ "" <> c <> "\n" headers' <- if all null headers then return [] else zipWithM (tableItemToDokuWiki opts) aligns headers rows' <- mapM (zipWithM (tableItemToDokuWiki opts) aligns) rows - let widths = map (maximum . map length) $ transpose (headers':rows') + let widths = map (maximum . map T.length) $ transpose (headers':rows') let padTo (width, al) s = - case width - length s of + case width - T.length s of x | x > 0 -> if al == AlignLeft || al == AlignDefault - then s ++ replicate x ' ' + then s <> T.replicate x " " else if al == AlignRight - then replicate x ' ' ++ s - else replicate (x `div` 2) ' ' ++ - s ++ replicate (x - x `div` 2) ' ' + then T.replicate x " " <> s + else T.replicate (x `div` 2) " " <> + s <> T.replicate (x - x `div` 2) " " | otherwise -> s - let renderRow sep cells = sep ++ - intercalate sep (zipWith padTo (zip widths aligns) cells) ++ sep - return $ captionDoc ++ - (if null headers' then "" else renderRow "^" headers' ++ "\n") ++ - unlines (map (renderRow "|") rows') + let renderRow sep cells = sep <> + T.intercalate sep (zipWith padTo (zip widths aligns) cells) <> sep + return $ captionDoc <> + (if null headers' then "" else renderRow "^" headers' <> "\n") <> + T.unlines (map (renderRow "|") rows') blockToDokuWiki opts x@(BulletList items) = do oldUseTags <- asks stUseTags @@ -201,12 +204,12 @@ blockToDokuWiki opts x@(BulletList items) = do then do contents <- local (\s -> s { stUseTags = True }) (mapM (listItemToDokuWiki opts) items) - return $ "<HTML><ul></HTML>\n" ++ vcat contents ++ "<HTML></ul></HTML>\n" + return $ "<HTML><ul></HTML>\n" <> vcat contents <> "<HTML></ul></HTML>\n" else do - contents <- local (\s -> s { stIndent = stIndent s ++ " " + contents <- local (\s -> s { stIndent = stIndent s <> " " , stBackSlashLB = backSlash}) (mapM (listItemToDokuWiki opts) items) - return $ vcat contents ++ if null indent then "\n" else "" + return $ vcat contents <> if T.null indent then "\n" else "" blockToDokuWiki opts x@(OrderedList attribs items) = do oldUseTags <- asks stUseTags @@ -217,12 +220,12 @@ blockToDokuWiki opts x@(OrderedList attribs items) = do then do contents <- local (\s -> s { stUseTags = True }) (mapM (orderedListItemToDokuWiki opts) items) - return $ "<HTML><ol" ++ listAttribsToString attribs ++ "></HTML>\n" ++ vcat contents ++ "<HTML></ol></HTML>\n" + return $ "<HTML><ol" <> listAttribsToString attribs <> "></HTML>\n" <> vcat contents <> "<HTML></ol></HTML>\n" else do - contents <- local (\s -> s { stIndent = stIndent s ++ " " + contents <- local (\s -> s { stIndent = stIndent s <> " " , stBackSlashLB = backSlash}) (mapM (orderedListItemToDokuWiki opts) items) - return $ vcat contents ++ if null indent then "\n" else "" + return $ vcat contents <> if T.null indent then "\n" else "" -- TODO Need to decide how to make definition lists work on dokuwiki - I don't think there -- is a specific representation of them. @@ -236,76 +239,76 @@ blockToDokuWiki opts x@(DefinitionList items) = do then do contents <- local (\s -> s { stUseTags = True }) (mapM (definitionListItemToDokuWiki opts) items) - return $ "<HTML><dl></HTML>\n" ++ vcat contents ++ "<HTML></dl></HTML>\n" + return $ "<HTML><dl></HTML>\n" <> vcat contents <> "<HTML></dl></HTML>\n" else do - contents <- local (\s -> s { stIndent = stIndent s ++ " " + contents <- local (\s -> s { stIndent = stIndent s <> " " , stBackSlashLB = backSlash}) (mapM (definitionListItemToDokuWiki opts) items) - return $ vcat contents ++ if null indent then "\n" else "" + return $ vcat contents <> if T.null indent then "\n" else "" -- Auxiliary functions for lists: -- | Convert ordered list attributes to HTML attribute string -listAttribsToString :: ListAttributes -> String +listAttribsToString :: ListAttributes -> Text listAttribsToString (startnum, numstyle, _) = - let numstyle' = camelCaseToHyphenated $ show numstyle + let numstyle' = camelCaseToHyphenated $ tshow numstyle in (if startnum /= 1 - then " start=\"" ++ show startnum ++ "\"" - else "") ++ + then " start=\"" <> tshow startnum <> "\"" + else "") <> (if numstyle /= DefaultStyle - then " style=\"list-style-type: " ++ numstyle' ++ ";\"" + then " style=\"list-style-type: " <> numstyle' <> ";\"" else "") -- | Convert bullet list item (list of blocks) to DokuWiki. listItemToDokuWiki :: PandocMonad m - => WriterOptions -> [Block] -> DokuWiki m String + => WriterOptions -> [Block] -> DokuWiki m Text listItemToDokuWiki opts items = do useTags <- asks stUseTags if useTags then do contents <- blockListToDokuWiki opts items - return $ "<HTML><li></HTML>" ++ contents ++ "<HTML></li></HTML>" + return $ "<HTML><li></HTML>" <> contents <> "<HTML></li></HTML>" else do bs <- mapM (blockToDokuWiki opts) items let contents = case items of - [_, CodeBlock _ _] -> concat bs + [_, CodeBlock _ _] -> T.concat bs _ -> vcat bs indent <- asks stIndent backSlash <- asks stBackSlashLB - let indent' = if backSlash then drop 2 indent else indent - return $ indent' ++ "* " ++ contents + let indent' = if backSlash then T.drop 2 indent else indent + return $ indent' <> "* " <> contents -- | Convert ordered list item (list of blocks) to DokuWiki. -- | TODO Emiminate dreadful duplication of text from listItemToDokuWiki -orderedListItemToDokuWiki :: PandocMonad m => WriterOptions -> [Block] -> DokuWiki m String +orderedListItemToDokuWiki :: PandocMonad m => WriterOptions -> [Block] -> DokuWiki m Text orderedListItemToDokuWiki opts items = do contents <- blockListToDokuWiki opts items useTags <- asks stUseTags if useTags - then return $ "<HTML><li></HTML>" ++ contents ++ "<HTML></li></HTML>" + then return $ "<HTML><li></HTML>" <> contents <> "<HTML></li></HTML>" else do indent <- asks stIndent backSlash <- asks stBackSlashLB - let indent' = if backSlash then drop 2 indent else indent - return $ indent' ++ "- " ++ contents + let indent' = if backSlash then T.drop 2 indent else indent + return $ indent' <> "- " <> contents -- | Convert definition list item (label, list of blocks) to DokuWiki. definitionListItemToDokuWiki :: PandocMonad m => WriterOptions -> ([Inline],[[Block]]) - -> DokuWiki m String + -> DokuWiki m Text definitionListItemToDokuWiki opts (label, items) = do labelText <- inlineListToDokuWiki opts label contents <- mapM (blockListToDokuWiki opts) items useTags <- asks stUseTags if useTags - then return $ "<HTML><dt></HTML>" ++ labelText ++ "<HTML></dt></HTML>\n" ++ - intercalate "\n" (map (\d -> "<HTML><dd></HTML>" ++ d ++ "<HTML></dd></HTML>") contents) + then return $ "<HTML><dt></HTML>" <> labelText <> "<HTML></dt></HTML>\n" <> + T.intercalate "\n" (map (\d -> "<HTML><dd></HTML>" <> d <> "<HTML></dd></HTML>") contents) else do indent <- asks stIndent backSlash <- asks stBackSlashLB - let indent' = if backSlash then drop 2 indent else indent - return $ indent' ++ "* **" ++ labelText ++ "** " ++ concat contents + let indent' = if backSlash then T.drop 2 indent else indent + return $ indent' <> "* **" <> labelText <> "** " <> T.concat contents -- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed. isSimpleList :: Block -> Bool @@ -333,17 +336,17 @@ isSimpleBlockQuote :: [Block] -> Bool isSimpleBlockQuote bs = all isPlainOrPara bs -- | Concatenates strings with line breaks between them. -vcat :: [String] -> String -vcat = intercalate "\n" +vcat :: [Text] -> Text +vcat = T.intercalate "\n" -- | For each string in the input list, convert all newlines to -- dokuwiki escaped newlines. Then concat the list using double linebreaks. -backSlashLineBreaks :: [String] -> String -backSlashLineBreaks ls = vcatBackSlash $ map escape ls +backSlashLineBreaks :: [Text] -> Text +backSlashLineBreaks ls = vcatBackSlash $ map (T.pack . escape . T.unpack) ls where - vcatBackSlash = intercalate "\\\\ \\\\ " -- simulate paragraphs. - escape ['\n'] = "" -- remove trailing newlines - escape ('\n':cs) = "\\\\ " ++ escape cs + vcatBackSlash = T.intercalate "\\\\ \\\\ " -- simulate paragraphs. + escape ['\n'] = "" -- remove trailing newlines + escape ('\n':cs) = "\\\\ " <> escape cs escape (c:cs) = c : escape cs escape [] = [] @@ -353,11 +356,11 @@ tableItemToDokuWiki :: PandocMonad m => WriterOptions -> Alignment -> [Block] - -> DokuWiki m String + -> DokuWiki m Text tableItemToDokuWiki opts align' item = do let mkcell x = (if align' == AlignRight || align' == AlignCenter then " " - else "") ++ x ++ + else "") <> x <> (if align' == AlignLeft || align' == AlignCenter then " " else "") @@ -369,7 +372,7 @@ tableItemToDokuWiki opts align' item = do blockListToDokuWiki :: PandocMonad m => WriterOptions -- ^ Options -> [Block] -- ^ List of block elements - -> DokuWiki m String + -> DokuWiki m Text blockListToDokuWiki opts blocks = do backSlash <- asks stBackSlashLB let blocks' = consolidateRawBlocks blocks @@ -380,51 +383,51 @@ blockListToDokuWiki opts blocks = do consolidateRawBlocks :: [Block] -> [Block] consolidateRawBlocks [] = [] consolidateRawBlocks (RawBlock f1 b1 : RawBlock f2 b2 : xs) - | f1 == f2 = consolidateRawBlocks (RawBlock f1 (b1 ++ "\n" ++ b2) : xs) + | f1 == f2 = consolidateRawBlocks (RawBlock f1 (b1 <> "\n" <> b2) : xs) consolidateRawBlocks (x:xs) = x : consolidateRawBlocks xs -- | Convert list of Pandoc inline elements to DokuWiki. inlineListToDokuWiki :: PandocMonad m - => WriterOptions -> [Inline] -> DokuWiki m String + => WriterOptions -> [Inline] -> DokuWiki m Text inlineListToDokuWiki opts lst = - concat <$> mapM (inlineToDokuWiki opts) lst + T.concat <$> mapM (inlineToDokuWiki opts) lst -- | Convert Pandoc inline element to DokuWiki. inlineToDokuWiki :: PandocMonad m - => WriterOptions -> Inline -> DokuWiki m String + => WriterOptions -> Inline -> DokuWiki m Text inlineToDokuWiki opts (Span _attrs ils) = inlineListToDokuWiki opts ils inlineToDokuWiki opts (Emph lst) = do contents <- inlineListToDokuWiki opts lst - return $ "//" ++ contents ++ "//" + return $ "//" <> contents <> "//" inlineToDokuWiki opts (Strong lst) = do contents <- inlineListToDokuWiki opts lst - return $ "**" ++ contents ++ "**" + return $ "**" <> contents <> "**" inlineToDokuWiki opts (Strikeout lst) = do contents <- inlineListToDokuWiki opts lst - return $ "<del>" ++ contents ++ "</del>" + return $ "<del>" <> contents <> "</del>" inlineToDokuWiki opts (Superscript lst) = do contents <- inlineListToDokuWiki opts lst - return $ "<sup>" ++ contents ++ "</sup>" + return $ "<sup>" <> contents <> "</sup>" inlineToDokuWiki opts (Subscript lst) = do contents <- inlineListToDokuWiki opts lst - return $ "<sub>" ++ contents ++ "</sub>" + return $ "<sub>" <> contents <> "</sub>" inlineToDokuWiki opts (SmallCaps lst) = inlineListToDokuWiki opts lst inlineToDokuWiki opts (Quoted SingleQuote lst) = do contents <- inlineListToDokuWiki opts lst - return $ "\8216" ++ contents ++ "\8217" + return $ "\8216" <> contents <> "\8217" inlineToDokuWiki opts (Quoted DoubleQuote lst) = do contents <- inlineListToDokuWiki opts lst - return $ "\8220" ++ contents ++ "\8221" + return $ "\8220" <> contents <> "\8221" inlineToDokuWiki opts (Cite _ lst) = inlineListToDokuWiki opts lst @@ -438,11 +441,11 @@ inlineToDokuWiki _ (Code _ str) = -- characters. -- It does mean that if pandoc could ever read dokuwiki, and so round-trip the format, -- any formatting inside inlined code blocks would be lost, or presented incorrectly. - return $ "''%%" ++ str ++ "%%''" + return $ "''%%" <> str <> "%%''" inlineToDokuWiki _ (Str str) = return $ escapeString str -inlineToDokuWiki _ (Math mathType str) = return $ delim ++ str ++ delim +inlineToDokuWiki _ (Math mathType str) = return $ delim <> str <> delim -- note: str should NOT be escaped where delim = case mathType of DisplayMath -> "$$" @@ -450,7 +453,7 @@ inlineToDokuWiki _ (Math mathType str) = return $ delim ++ str ++ delim inlineToDokuWiki _ il@(RawInline f str) | f == Format "dokuwiki" = return str - | f == Format "html" = return $ "<html>" ++ str ++ "</html>" + | f == Format "html" = return $ "<html>" <> str <> "</html>" | otherwise = "" <$ report (InlineNotRendered il) inlineToDokuWiki _ LineBreak = do @@ -470,34 +473,34 @@ inlineToDokuWiki _ Space = return " " inlineToDokuWiki opts (Link _ txt (src, _)) = do label <- inlineListToDokuWiki opts txt case txt of - [Str s] | "mailto:" `isPrefixOf` src -> return $ "<" ++ s ++ ">" + [Str s] | "mailto:" `T.isPrefixOf` src -> return $ "<" <> s <> ">" | escapeURI s == src -> return src _ -> if isURI src - then return $ "[[" ++ src ++ "|" ++ label ++ "]]" - else return $ "[[" ++ src' ++ "|" ++ label ++ "]]" - where src' = case src of - '/':xs -> xs -- with leading / it's a - _ -> src -- link to a help page + then return $ "[[" <> src <> "|" <> label <> "]]" + else return $ "[[" <> src' <> "|" <> label <> "]]" + where src' = case T.uncons src of + Just ('/',xs) -> xs -- with leading / it's a + _ -> src -- link to a help page inlineToDokuWiki opts (Image attr alt (source, tit)) = do alt' <- inlineListToDokuWiki opts alt let txt = case (tit, alt) of ("", []) -> "" - ("", _ ) -> "|" ++ alt' - (_ , _ ) -> "|" ++ tit - return $ "{{" ++ source ++ imageDims opts attr ++ txt ++ "}}" + ("", _ ) -> "|" <> alt' + (_ , _ ) -> "|" <> tit + return $ "{{" <> source <> imageDims opts attr <> txt <> "}}" inlineToDokuWiki opts (Note contents) = do contents' <- blockListToDokuWiki opts contents - return $ "((" ++ contents' ++ "))" + return $ "((" <> contents' <> "))" -- note - may not work for notes with multiple blocks -imageDims :: WriterOptions -> Attr -> String +imageDims :: WriterOptions -> Attr -> Text imageDims opts attr = go (toPx $ dimension Width attr) (toPx $ dimension Height attr) where toPx = fmap (showInPixel opts) . checkPct checkPct (Just (Percent _)) = Nothing checkPct maybeDim = maybeDim - go (Just w) Nothing = "?" ++ w - go (Just w) (Just h) = "?" ++ w ++ "x" ++ h - go Nothing (Just h) = "?0x" ++ h + go (Just w) Nothing = "?" <> w + go (Just w) (Just h) = "?" <> w <> "x" <> h + go Nothing (Just h) = "?0x" <> h go Nothing Nothing = "" diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 37c78bba8..4a1c27ce6 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -49,7 +49,7 @@ import Text.Pandoc.Options (EPUBVersion (..), HTMLMathMethod (..), ObfuscationMethod (NoObfuscation), WrapOption (..), WriterOptions (..)) import Text.Pandoc.Shared (makeSections, normalizeDate, renderTags', - safeRead, stringify, trim, uniqueIdent) + safeRead, stringify, trim, uniqueIdent, tshow) import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.UUID (getUUID) import Text.Pandoc.Walk (query, walk, walkM) @@ -176,10 +176,10 @@ getEPUBMetadata opts meta = do let localeLang = case mLang of Just lang -> - map (\c -> if c == '_' then '-' else c) $ - takeWhile (/='.') lang + TS.map (\c -> if c == '_' then '-' else c) $ + TS.takeWhile (/='.') lang Nothing -> "en-US" - return m{ epubLanguage = localeLang } + return m{ epubLanguage = TS.unpack localeLang } else return m let fixDate m = if null (epubDate m) @@ -194,7 +194,7 @@ getEPUBMetadata opts meta = do then return m else do let authors' = map stringify $ docAuthors meta - let toAuthor name = Creator{ creatorText = name + let toAuthor name = Creator{ creatorText = TS.unpack name , creatorRole = Just "aut" , creatorFileAs = Nothing } return $ m{ epubCreator = map toAuthor authors' ++ epubCreator m } @@ -253,18 +253,18 @@ addMetadataFromXML e@(Element (QName "meta" _ _) attrs _ _) md = addMetadataFromXML _ md = md metaValueToString :: MetaValue -> String -metaValueToString (MetaString s) = s -metaValueToString (MetaInlines ils) = stringify ils -metaValueToString (MetaBlocks bs) = stringify bs +metaValueToString (MetaString s) = TS.unpack s +metaValueToString (MetaInlines ils) = TS.unpack $ stringify ils +metaValueToString (MetaBlocks bs) = TS.unpack $ stringify bs metaValueToString (MetaBool True) = "true" metaValueToString (MetaBool False) = "false" metaValueToString _ = "" -metaValueToPaths:: MetaValue -> [FilePath] +metaValueToPaths :: MetaValue -> [FilePath] metaValueToPaths (MetaList xs) = map metaValueToString xs metaValueToPaths x = [metaValueToString x] -getList :: String -> Meta -> (MetaValue -> a) -> [a] +getList :: TS.Text -> Meta -> (MetaValue -> a) -> [a] getList s meta handleMetaValue = case lookupMeta s meta of Just (MetaList xs) -> map handleMetaValue xs @@ -288,7 +288,7 @@ getTitle meta = getList "title" meta handleMetaValue , titleType = metaValueToString <$> M.lookup "type" m } handleMetaValue mv = Title (metaValueToString mv) Nothing Nothing -getCreator :: String -> Meta -> [Creator] +getCreator :: TS.Text -> Meta -> [Creator] getCreator s meta = getList s meta handleMetaValue where handleMetaValue (MetaMap m) = Creator{ creatorText = maybe "" metaValueToString $ M.lookup "text" m @@ -296,7 +296,7 @@ getCreator s meta = getList s meta handleMetaValue , creatorRole = metaValueToString <$> M.lookup "role" m } handleMetaValue mv = Creator (metaValueToString mv) Nothing Nothing -getDate :: String -> Meta -> [Date] +getDate :: TS.Text -> Meta -> [Date] getDate s meta = getList s meta handleMetaValue where handleMetaValue (MetaMap m) = Date{ dateText = fromMaybe "" $ @@ -305,7 +305,7 @@ getDate s meta = getList s meta handleMetaValue handleMetaValue mv = Date { dateText = fromMaybe "" $ normalizeDate' $ metaValueToString mv , dateEvent = Nothing } -simpleList :: String -> Meta -> [String] +simpleList :: TS.Text -> Meta -> [String] simpleList s meta = case lookupMeta s meta of Just (MetaList xs) -> map metaValueToString xs @@ -366,11 +366,11 @@ metadataFromMeta opts meta = EPUBMetadata{ _ -> Nothing ibooksFields = case lookupMeta "ibooks" meta of Just (MetaMap mp) - -> M.toList $ M.map metaValueToString mp + -> M.toList $ M.mapKeys TS.unpack $ M.map metaValueToString mp _ -> [] calibreFields = case lookupMeta "calibre" meta of Just (MetaMap mp) - -> M.toList $ M.map metaValueToString mp + -> M.toList $ M.mapKeys TS.unpack $ M.map metaValueToString mp _ -> [] -- | Produce an EPUB2 file from a Pandoc document. @@ -396,9 +396,9 @@ writeEPUB :: PandocMonad m writeEPUB epubVersion opts doc = do let epubSubdir = writerEpubSubdirectory opts -- sanity check on epubSubdir - unless (all (\c -> isAscii c && isAlphaNum c) epubSubdir) $ + unless (TS.all (\c -> isAscii c && isAlphaNum c) epubSubdir) $ throwError $ PandocEpubSubdirectoryError epubSubdir - let initState = EPUBState { stMediaPaths = [], stMediaNextId = 0, stEpubSubdir = epubSubdir } + let initState = EPUBState { stMediaPaths = [], stMediaNextId = 0, stEpubSubdir = TS.unpack epubSubdir } evalStateT (pandocToEPUB epubVersion opts doc) initState pandocToEPUB :: PandocMonad m @@ -422,7 +422,7 @@ pandocToEPUB version opts doc = do [] -> case epubTitle metadata of [] -> "UNTITLED" (x:_) -> titleText x - x -> stringify x + x -> TS.unpack $ stringify x -- stylesheet stylesheets <- case epubStylesheets metadata of @@ -468,13 +468,13 @@ pandocToEPUB version opts doc = do case imageSize opts' (B.toStrict imgContent) of Right sz -> return $ sizeInPixels sz Left err' -> (0, 0) <$ report - (CouldNotDetermineImageSize img err') + (CouldNotDetermineImageSize (TS.pack img) err') cpContent <- lift $ writeHtml opts'{ writerVariables = Context (M.fromList [ ("coverpage", toVal' "true"), - ("pagetitle", toVal' $ - escapeStringForXML plainTitle), + ("pagetitle", toVal $ + escapeStringForXML $ TS.pack plainTitle), ("cover-image", toVal' coverImage), ("cover-image-width", toVal' $ show coverImageWidth), @@ -494,8 +494,8 @@ pandocToEPUB version opts doc = do Context (M.fromList [ ("titlepage", toVal' "true"), ("body-type", toVal' "frontmatter"), - ("pagetitle", toVal' $ - escapeStringForXML plainTitle)]) + ("pagetitle", toVal $ + escapeStringForXML $ TS.pack plainTitle)]) <> cssvars True <> vars } (Pandoc meta []) tpEntry <- mkEntry "text/title_page.xhtml" tpContent @@ -504,7 +504,7 @@ pandocToEPUB version opts doc = do let matchingGlob f = do xs <- lift $ P.glob f when (null xs) $ - report $ CouldNotFetchResource f "glob did not match any font files" + report $ CouldNotFetchResource (TS.pack f) "glob did not match any font files" return xs let mkFontEntry f = mkEntry ("fonts/" ++ takeFileName f) =<< lift (P.readFileLazy f) @@ -551,16 +551,16 @@ pandocToEPUB version opts doc = do let chapters' = secsToChapters secs - let extractLinkURL' :: Int -> Inline -> [(String, String)] + let extractLinkURL' :: Int -> Inline -> [(TS.Text, TS.Text)] extractLinkURL' num (Span (ident, _, _) _) - | not (null ident) = [(ident, showChapter num ++ ('#':ident))] + | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] extractLinkURL' _ _ = [] - let extractLinkURL :: Int -> Block -> [(String, String)] + let extractLinkURL :: Int -> Block -> [(TS.Text, TS.Text)] extractLinkURL num (Div (ident, _, _) _) - | not (null ident) = [(ident, showChapter num ++ ('#':ident))] + | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] extractLinkURL num (Header _ (ident, _, _) _) - | not (null ident) = [(ident, showChapter num ++ ('#':ident))] + | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] extractLinkURL num b = query (extractLinkURL' num) b let reftable = concat $ zipWith (\(Chapter bs) num -> @@ -568,10 +568,10 @@ pandocToEPUB version opts doc = do chapters' [1..] let fixInternalReferences :: Inline -> Inline - fixInternalReferences (Link attr lab ('#':xs, tit)) = - case lookup xs reftable of + fixInternalReferences (Link attr lab (src, tit)) + | Just ('#', xs) <- TS.uncons src = case lookup xs reftable of Just ys -> Link attr lab (ys, tit) - Nothing -> Link attr lab ('#':xs, tit) + Nothing -> Link attr lab (src, tit) fixInternalReferences x = x -- internal reference IDs change when we chunk the file, @@ -645,14 +645,14 @@ pandocToEPUB version opts doc = do ("href", makeRelative epubSubdir $ eRelativePath ent), ("media-type", - fromMaybe "application/octet-stream" + maybe "application/octet-stream" TS.unpack $ mediaTypeOf $ eRelativePath ent)] $ () let fontNode ent = unode "item" ! [("id", toId $ makeRelative epubSubdir $ eRelativePath ent), ("href", makeRelative epubSubdir $ eRelativePath ent), - ("media-type", fromMaybe "" $ + ("media-type", maybe "" TS.unpack $ getMimeType $ eRelativePath ent)] $ () let tocTitle = fromMaybe plainTitle $ @@ -724,7 +724,7 @@ pandocToEPUB version opts doc = do let tocLevel = writerTOCDepth opts let navPointNode :: PandocMonad m - => (Int -> [Inline] -> String -> [Element] -> Element) + => (Int -> [Inline] -> TS.Text -> [Element] -> Element) -> Block -> StateT Int m [Element] navPointNode formatter (Div (ident,_,_) (Header lvl (_,_,kvs) ils : children)) = do @@ -734,29 +734,29 @@ pandocToEPUB version opts doc = do n <- get modify (+1) let num = fromMaybe "" $ lookup "number" kvs - let tit = if writerNumberSections opts && not (null num) + let tit = if writerNumberSections opts && not (TS.null num) then Span ("", ["section-header-number"], []) [Str num] : Space : ils else ils src <- case lookup ident reftable of Just x -> return x Nothing -> throwError $ PandocSomeError $ - ident ++ " not found in reftable" + ident <> " not found in reftable" subs <- concat <$> mapM (navPointNode formatter) children return [formatter n tit src subs] navPointNode formatter (Div _ bs) = concat <$> mapM (navPointNode formatter) bs navPointNode _ _ = return [] - let navMapFormatter :: Int -> [Inline] -> String -> [Element] -> Element + let navMapFormatter :: Int -> [Inline] -> TS.Text -> [Element] -> Element navMapFormatter n tit src subs = unode "navPoint" ! [("id", "navPoint-" ++ show n)] $ - [ unode "navLabel" $ unode "text" $ stringify tit - , unode "content" ! [("src", "text/" ++ src)] $ () + [ unode "navLabel" $ unode "text" $ TS.unpack $ stringify tit + , unode "content" ! [("src", "text/" <> TS.unpack src)] $ () ] ++ subs let tpNode = unode "navPoint" ! [("id", "navPoint-0")] $ - [ unode "navLabel" $ unode "text" (stringify $ docTitle' meta) + [ unode "navLabel" $ unode "text" (TS.unpack $ stringify $ docTitle' meta) , unode "content" ! [("src", "text/title_page.xhtml")] $ () ] @@ -784,11 +784,11 @@ pandocToEPUB version opts doc = do ] tocEntry <- mkEntry "toc.ncx" tocData - let navXhtmlFormatter :: Int -> [Inline] -> String -> [Element] -> Element + let navXhtmlFormatter :: Int -> [Inline] -> TS.Text -> [Element] -> Element navXhtmlFormatter n tit src subs = unode "li" ! [("id", "toc-li-" ++ show n)] $ (unode "a" ! - [("href", "text/" ++ src)] + [("href", "text/" <> TS.unpack src)] $ titElements) : case subs of [] -> [] @@ -799,12 +799,12 @@ pandocToEPUB version opts doc = do opts{ writerTemplate = Nothing , writerVariables = Context (M.fromList - [("pagetitle", toVal' $ - escapeStringForXML plainTitle)]) + [("pagetitle", toVal $ + escapeStringForXML $ TS.pack plainTitle)]) <> writerVariables opts} (Pandoc nullMeta [Plain $ walk clean tit])) of - Left _ -> TS.pack $ stringify tit + Left _ -> stringify tit Right x -> x -- can't have <a> elements inside generated links... clean (Link _ ils _) = Span ("", [], []) ils @@ -815,7 +815,7 @@ pandocToEPUB version opts doc = do tocBlocks <- lift $ evalStateT (concat <$> mapM (navPointNode navXhtmlFormatter) secs) 1 let navBlocks = [RawBlock (Format "html") - $ showElement $ -- prettyprinting introduces bad spaces + $ TS.pack $ showElement $ -- prettyprinting introduces bad spaces unode navtag ! ([("epub:type","toc") | epub3] ++ [("id","toc")]) $ [ unode "h1" ! [("id","toc-title")] $ tocTitle @@ -836,7 +836,7 @@ pandocToEPUB version opts doc = do else [] let landmarks = if null landmarkItems then [] - else [RawBlock (Format "html") $ ppElement $ + else [RawBlock (Format "html") $ TS.pack $ ppElement $ unode "nav" ! [("epub:type","landmarks") ,("id","landmarks") ,("hidden","hidden")] $ @@ -995,49 +995,49 @@ showDateTimeISO8601 :: UTCTime -> String showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ" transformTag :: PandocMonad m - => Tag String - -> E m (Tag String) + => Tag TS.Text + -> E m (Tag TS.Text) transformTag tag@(TagOpen name attr) | name `elem` ["video", "source", "img", "audio"] && isNothing (lookup "data-external" attr) = do let src = fromAttrib "src" tag let poster = fromAttrib "poster" tag - newsrc <- modifyMediaRef src - newposter <- modifyMediaRef poster + newsrc <- modifyMediaRef $ TS.unpack src + newposter <- modifyMediaRef $ TS.unpack poster let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++ - [("src", "../" ++ newsrc) | not (null newsrc)] ++ - [("poster", "../" ++ newposter) | not (null newposter)] + [("src", "../" <> newsrc) | not (TS.null newsrc)] ++ + [("poster", "../" <> newposter) | not (TS.null newposter)] return $ TagOpen name attr' transformTag tag = return tag modifyMediaRef :: PandocMonad m => FilePath - -> E m FilePath + -> E m TS.Text modifyMediaRef "" = return "" modifyMediaRef oldsrc = do media <- gets stMediaPaths case lookup oldsrc media of - Just (n,_) -> return n + Just (n,_) -> return $ TS.pack n Nothing -> catchError - (do (img, mbMime) <- P.fetchItem oldsrc - let ext = fromMaybe (takeExtension (takeWhile (/='?') oldsrc)) - (('.':) <$> (mbMime >>= extensionFromMimeType)) + (do (img, mbMime) <- P.fetchItem $ TS.pack oldsrc + let ext = maybe (takeExtension (takeWhile (/='?') oldsrc)) TS.unpack + (("." <>) <$> (mbMime >>= extensionFromMimeType)) newName <- getMediaNextNewName ext let newPath = "media/" ++ newName entry <- mkEntry newPath (B.fromChunks . (:[]) $ img) modify $ \st -> st{ stMediaPaths = (oldsrc, (newPath, Just entry)):media} - return newPath) + return $ TS.pack newPath) (\e -> do - report $ CouldNotFetchResource oldsrc (show e) - return oldsrc) + report $ CouldNotFetchResource (TS.pack oldsrc) (tshow e) + return $ TS.pack oldsrc) getMediaNextNewName :: PandocMonad m => String -> E m String getMediaNextNewName ext = do nextId <- gets stMediaNextId modify $ \st -> st { stMediaNextId = nextId + 1 } let nextName = "file" ++ show nextId ++ ext - (P.fetchItem nextName >> getMediaNextNewName ext) `catchError` const (return nextName) + (P.fetchItem (TS.pack nextName) >> getMediaNextNewName ext) `catchError` const (return nextName) transformBlock :: PandocMonad m => Block @@ -1054,14 +1054,14 @@ transformInline :: PandocMonad m -> Inline -> E m Inline transformInline _opts (Image attr lab (src,tit)) = do - newsrc <- modifyMediaRef src - return $ Image attr lab ("../" ++ newsrc, tit) + newsrc <- modifyMediaRef $ TS.unpack src + return $ Image attr lab ("../" <> newsrc, tit) transformInline opts (x@(Math t m)) | WebTeX url <- writerHTMLMathMethod opts = do - newsrc <- modifyMediaRef (url ++ urlEncode m) + newsrc <- modifyMediaRef (TS.unpack url <> urlEncode (TS.unpack m)) let mathclass = if t == DisplayMath then "display" else "inline" return $ Span ("",["math",mathclass],[]) - [Image nullAttr [x] ("../" ++ newsrc, "")] + [Image nullAttr [x] ("../" <> newsrc, "")] transformInline _opts (RawInline fmt raw) | fmt == Format "html" = do let tags = parseTags raw @@ -1081,7 +1081,7 @@ ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . unEntity ('&':'#':xs) = let (ds,ys) = break (==';') xs rest = drop 1 ys - in case safeRead ('\'':'\\':ds ++ "'") of + in case safeRead (TS.pack $ "'\\" <> ds <> "'") of Just x -> x : unEntity rest Nothing -> '&':'#':unEntity xs unEntity (x:xs) = x : unEntity xs @@ -1090,7 +1090,7 @@ mediaTypeOf :: FilePath -> Maybe MimeType mediaTypeOf x = let mediaPrefixes = ["image", "video", "audio"] in case getMimeType x of - Just y | any (`isPrefixOf` y) mediaPrefixes -> Just y + Just y | any (`TS.isPrefixOf` y) mediaPrefixes -> Just y _ -> Nothing -- Returns filename for chapter number. @@ -1102,7 +1102,7 @@ addIdentifiers :: WriterOptions -> [Block] -> [Block] addIdentifiers opts bs = evalState (mapM go bs) Set.empty where go (Header n (ident,classes,kvs) ils) = do ids <- get - let ident' = if null ident + let ident' = if TS.null ident then uniqueIdent (writerExtensions opts) ils ids else ident modify $ Set.insert ident' @@ -1111,13 +1111,16 @@ addIdentifiers opts bs = evalState (mapM go bs) Set.empty -- Variant of normalizeDate that allows partial dates: YYYY, YYYY-MM normalizeDate' :: String -> Maybe String -normalizeDate' xs = - let xs' = trim xs in - case xs' of - [y1,y2,y3,y4] | all isDigit [y1,y2,y3,y4] -> Just xs' -- YYYY - [y1,y2,y3,y4,'-',m1,m2] | all isDigit [y1,y2,y3,y4,m1,m2] -- YYYY-MM - -> Just xs' - _ -> normalizeDate xs' +normalizeDate' = fmap TS.unpack . go . trim . TS.pack + where + go xs + | TS.length xs == 4 -- YYY + , TS.all isDigit xs = Just xs + | (y, s) <- TS.splitAt 4 xs -- YYY-MM + , Just ('-', m) <- TS.uncons s + , TS.length m == 2 + , TS.all isDigit y && TS.all isDigit m = Just xs + | otherwise = normalizeDate xs toRelator :: String -> Maybe String toRelator x diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 744eb2a06..8cb29c269 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.FB2 Copyright : Copyright (C) 2011-2012 Sergey Astanin @@ -23,11 +24,12 @@ import Control.Monad (zipWithM) import Control.Monad.Except (catchError) import Control.Monad.State.Strict (StateT, evalStateT, get, gets, lift, liftM, modify) import Data.ByteString.Base64 (encode) -import qualified Data.ByteString.Char8 as B8 -import Data.Char (isAscii, isControl, isSpace, toLower) +import Data.Char (isAscii, isControl, isSpace) import Data.Either (lefts, rights) -import Data.List (intercalate, isPrefixOf, stripPrefix) +import Data.List (intercalate) import Data.Text (Text, pack) +import qualified Data.Text as T +import qualified Data.Text.Encoding as TE import Network.HTTP (urlEncode) import Text.XML.Light import qualified Text.XML.Light as X @@ -40,15 +42,15 @@ import Text.Pandoc.Definition import Text.Pandoc.Logging import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..), def) import Text.Pandoc.Shared (capitalize, isURI, orderedListMarkers, - makeSections) + makeSections, tshow) import Text.Pandoc.Writers.Shared (lookupMetaString) -- | Data to be written at the end of the document: -- (foot)notes, URLs, references, images. data FbRenderState = FbRenderState - { footnotes :: [ (Int, String, [Content]) ] -- ^ #, ID, text - , imagesToFetch :: [ (String, String) ] -- ^ filename, URL or path - , parentListMarker :: String -- ^ list marker of the parent ordered list + { footnotes :: [ (Int, Text, [Content]) ] -- ^ #, ID, text + , imagesToFetch :: [ (Text, Text) ] -- ^ filename, URL or path + , parentListMarker :: Text -- ^ list marker of the parent ordered list , writerOptions :: WriterOptions } deriving (Show) @@ -98,8 +100,8 @@ pandocToFB2 opts (Pandoc meta blocks) = do description :: PandocMonad m => Meta -> FBM m Content description meta' = do let genre = case lookupMetaString "genre" meta' of - "" -> el "genre" "unrecognised" - s -> el "genre" s + "" -> el "genre" ("unrecognised" :: String) + s -> el "genre" (T.unpack s) bt <- booktitle meta' let as = authors meta' dd <- docdate meta' @@ -110,7 +112,7 @@ description meta' = do Just (MetaInlines [Str s]) -> [el "lang" $ iso639 s] Just (MetaString s) -> [el "lang" $ iso639 s] _ -> [] - where iso639 = takeWhile (/= '-') -- Convert BCP 47 to ISO 639 + where iso639 = T.unpack . T.takeWhile (/= '-') -- Convert BCP 47 to ISO 639 let coverimage url = do let img = Image nullAttr mempty (url, "") im <- insertImage InlineImage img @@ -122,7 +124,7 @@ description meta' = do return $ el "description" [ el "title-info" (genre : (as ++ bt ++ annotation ++ dd ++ coverpage ++ lang)) - , el "document-info" [el "program-used" "pandoc"] + , el "document-info" [el "program-used" ("pandoc" :: String)] ] booktitle :: PandocMonad m => Meta -> FBM m [Content] @@ -178,7 +180,7 @@ renderSection lvl (Div (id',"section":_,_) (Header _ _ title : xs)) = do then return [] else list . el "title" <$> formatTitle title content <- cMapM (renderSection (lvl + 1)) xs - let sectionContent = if null id' + let sectionContent = if T.null id' then el "section" (title' ++ content) else el "section" ([uattr "id" id'], title' ++ content) return [sectionContent] @@ -213,19 +215,19 @@ renderFootnotes = do -- | Fetch images and encode them for the FictionBook XML. -- Return image data and a list of hrefs of the missing images. -fetchImages :: PandocMonad m => [(String,String)] -> m ([Content],[String]) +fetchImages :: PandocMonad m => [(Text,Text)] -> m ([Content],[Text]) fetchImages links = do imgs <- mapM (uncurry fetchImage) links return (rights imgs, lefts imgs) -- | Fetch image data from disk or from network and make a <binary> XML section. -- Return either (Left hrefOfMissingImage) or (Right xmlContent). -fetchImage :: PandocMonad m => String -> String -> m (Either String Content) +fetchImage :: PandocMonad m => Text -> Text -> m (Either Text Content) fetchImage href link = do mbimg <- case (isURI link, readDataURI link) of (True, Just (mime,_,True,base64)) -> - let mime' = map toLower mime + let mime' = T.toLower mime in if mime' == "image/png" || mime' == "image/jpeg" then return (Just (mime',base64)) else return Nothing @@ -237,9 +239,9 @@ fetchImage href link = do report $ CouldNotDetermineMimeType link return Nothing Just mime -> return $ Just (mime, - B8.unpack $ encode bs)) + TE.decodeUtf8 $ encode bs)) (\e -> - do report $ CouldNotFetchResource link (show e) + do report $ CouldNotFetchResource link (tshow e) return Nothing) case mbimg of Just (imgtype, imgdata) -> @@ -247,52 +249,52 @@ fetchImage href link = do ( [uattr "id" href , uattr "content-type" imgtype] , txt imgdata ) - _ -> return (Left ('#':href)) + _ -> return (Left ("#" <> href)) -- | Extract mime type and encoded data from the Data URI. -readDataURI :: String -- ^ URI - -> Maybe (String,String,Bool,String) +readDataURI :: Text -- ^ URI + -> Maybe (Text,Text,Bool,Text) -- ^ Maybe (mime,charset,isBase64,data) readDataURI uri = - case stripPrefix "data:" uri of + case T.stripPrefix "data:" uri of Nothing -> Nothing Just rest -> - let meta = takeWhile (/= ',') rest -- without trailing ',' - uridata = drop (length meta + 1) rest - parts = split (== ';') meta + let meta = T.takeWhile (/= ',') rest -- without trailing ',' + uridata = T.drop (T.length meta + 1) rest + parts = T.split (== ';') meta (mime,cs,enc)=foldr upd ("text/plain","US-ASCII",False) parts in Just (mime,cs,enc,uridata) where upd str m@(mime,cs,enc) - | isMimeType str = (str,cs,enc) - | Just str' <- stripPrefix "charset=" str = (mime,str',enc) - | str == "base64" = (mime,cs,True) - | otherwise = m + | isMimeType str = (str,cs,enc) + | Just str' <- T.stripPrefix "charset=" str = (mime,str',enc) + | str == "base64" = (mime,cs,True) + | otherwise = m -- Without parameters like ;charset=...; see RFC 2045, 5.1 -isMimeType :: String -> Bool +isMimeType :: Text -> Bool isMimeType s = - case split (=='/') s of + case T.split (=='/') s of [mtype,msubtype] -> - (map toLower mtype `elem` types - || "x-" `isPrefixOf` map toLower mtype) - && all valid mtype - && all valid msubtype + (T.toLower mtype `elem` types + || "x-" `T.isPrefixOf` T.toLower mtype) + && T.all valid mtype + && T.all valid msubtype _ -> False where types = ["text","image","audio","video","application","message","multipart"] valid c = isAscii c && not (isControl c) && not (isSpace c) && - c `notElem` "()<>@,;:\\\"/[]?=" + c `notElem` ("()<>@,;:\\\"/[]?=" :: String) -footnoteID :: Int -> String -footnoteID i = "n" ++ show i +footnoteID :: Int -> Text +footnoteID i = "n" <> tshow i -mkitem :: PandocMonad m => String -> [Block] -> FBM m [Content] +mkitem :: PandocMonad m => Text -> [Block] -> FBM m [Content] mkitem mrk bs = do pmrk <- gets parentListMarker - let nmrk = pmrk ++ mrk ++ " " + let nmrk = pmrk <> mrk <> " " modify (\s -> s { parentListMarker = nmrk}) item <- cMapM blockToXml $ plainToPara $ indentBlocks nmrk bs modify (\s -> s { parentListMarker = pmrk }) -- old parent marker @@ -303,11 +305,12 @@ blockToXml :: PandocMonad m => Block -> FBM m [Content] blockToXml (Plain ss) = cMapM toXml ss -- FIXME: can lead to malformed FB2 blockToXml (Para [Math DisplayMath formula]) = insertMath NormalImage formula -- title beginning with fig: indicates that the image is a figure -blockToXml (Para [Image atr alt (src,'f':'i':'g':':':tit)]) = - insertImage NormalImage (Image atr alt (src,tit)) +blockToXml (Para [Image atr alt (src,tgt)]) + | Just tit <- T.stripPrefix "fig:" tgt + = insertImage NormalImage (Image atr alt (src,tit)) blockToXml (Para ss) = list . el "p" <$> cMapM toXml ss blockToXml (CodeBlock _ s) = return . spaceBeforeAfter . - map (el "p" . el "code") . lines $ s + map (el "p" . el "code" . T.unpack) . T.lines $ s blockToXml (RawBlock f str) = if f == Format "fb2" then return $ XI.parseXML str @@ -329,7 +332,7 @@ blockToXml (DefinitionList defs) = cMapM mkdef defs where mkdef (term, bss) = do - items <- cMapM (cMapM blockToXml . plainToPara . indentBlocks (replicate 4 ' ')) bss + items <- cMapM (cMapM blockToXml . plainToPara . indentBlocks (T.replicate 4 " ")) bss t <- wrap "strong" term return (el "p" t : items) blockToXml h@Header{} = do @@ -376,13 +379,13 @@ unPlain x = x -- Simulate increased indentation level. Will not really work -- for multi-line paragraphs. -indentPrefix :: String -> Block -> Block +indentPrefix :: Text -> Block -> Block indentPrefix spacer = indentBlock where indentBlock (Plain ins) = Plain (Str spacer:ins) indentBlock (Para ins) = Para (Str spacer:ins) indentBlock (CodeBlock a s) = - let s' = unlines . map (spacer++) . lines $ s + let s' = T.unlines . map (spacer<>) . T.lines $ s in CodeBlock a s' indentBlock (BlockQuote bs) = BlockQuote (map indent bs) indentBlock (Header l attr' ins) = Header l attr' (indentLines ins) @@ -396,12 +399,12 @@ indent :: Block -> Block indent = indentPrefix spacer where -- indentation space - spacer :: String - spacer = replicate 4 ' ' + spacer :: Text + spacer = T.replicate 4 " " -indentBlocks :: String -> [Block] -> [Block] +indentBlocks :: Text -> [Block] -> [Block] indentBlocks _ [] = [] -indentBlocks prefix (x:xs) = indentPrefix prefix x : map (indentPrefix $ replicate (length prefix) ' ') xs +indentBlocks prefix (x:xs) = indentPrefix prefix x : map (indentPrefix $ T.replicate (T.length prefix) " ") xs -- | Convert a Pandoc's Inline element to FictionBook XML representation. toXml :: PandocMonad m => Inline -> FBM m [Content] @@ -420,7 +423,7 @@ toXml (Quoted DoubleQuote ss) = do inner <- cMapM toXml ss return $ [txt "“"] ++ inner ++ [txt "”"] toXml (Cite _ ss) = cMapM toXml ss -- FIXME: support citation styles -toXml (Code _ s) = return [el "code" s] +toXml (Code _ s) = return [el "code" $ T.unpack s] toXml Space = return [txt " "] toXml SoftBreak = return [txt "\n"] toXml LineBreak = return [txt "\n"] @@ -438,40 +441,40 @@ toXml (Note bs) = do let fn_id = footnoteID n fn_desc <- cMapM blockToXml bs modify (\s -> s { footnotes = (n, fn_id, fn_desc) : fns }) - let fn_ref = txt $ "[" ++ show n ++ "]" - return . list $ el "a" ( [ attr ("l","href") ('#':fn_id) + let fn_ref = txt $ "[" <> tshow n <> "]" + return . list $ el "a" ( [ attr ("l","href") ("#" <> fn_id) , uattr "type" "note" ] , fn_ref ) -insertMath :: PandocMonad m => ImageMode -> String -> FBM m [Content] +insertMath :: PandocMonad m => ImageMode -> Text -> FBM m [Content] insertMath immode formula = do htmlMath <- fmap (writerHTMLMathMethod . writerOptions) get case htmlMath of WebTeX url -> do let alt = [Code nullAttr formula] - let imgurl = url ++ urlEncode formula + let imgurl = url <> T.pack (urlEncode $ T.unpack formula) let img = Image nullAttr alt (imgurl, "") insertImage immode img - _ -> return [el "code" formula] + _ -> return [el "code" $ T.unpack formula] insertImage :: PandocMonad m => ImageMode -> Inline -> FBM m [Content] insertImage immode (Image _ alt (url,ttl)) = do images <- imagesToFetch `liftM` get let n = 1 + length images - let fname = "image" ++ show n + let fname = "image" <> tshow n modify (\s -> s { imagesToFetch = (fname, url) : images }) - let ttlattr = case (immode, null ttl) of + let ttlattr = case (immode, T.null ttl) of (NormalImage, False) -> [ uattr "title" ttl ] _ -> [] return . list $ el "image" $ - [ attr ("l","href") ('#':fname) - , attr ("l","type") (show immode) - , uattr "alt" (cMap plain alt) ] + [ attr ("l","href") ("#" <> fname) + , attr ("l","type") (tshow immode) + , uattr "alt" (T.pack $ cMap plain alt) ] ++ ttlattr insertImage _ _ = error "unexpected inline instead of image" -replaceImagesWithAlt :: [String] -> Content -> Content +replaceImagesWithAlt :: [Text] -> Content -> Content replaceImagesWithAlt missingHrefs body = let cur = XC.fromContent body cur' = replaceAll cur @@ -507,8 +510,8 @@ replaceImagesWithAlt missingHrefs body = (Just alt', Just imtype') -> if imtype' == show NormalImage then el "p" alt' - else txt alt' - (Just alt', Nothing) -> txt alt' -- no type attribute + else txt $ T.pack alt' + (Just alt', Nothing) -> txt $ T.pack alt' -- no type attribute _ -> n -- don't replace if alt text is not found replaceNode n = n -- @@ -529,7 +532,7 @@ list = (:[]) -- | Convert an 'Inline' to plaintext. plain :: Inline -> String -plain (Str s) = s +plain (Str s) = T.unpack s plain (Emph ss) = cMap plain ss plain (Span _ ss) = cMap plain ss plain (Strong ss) = cMap plain ss @@ -539,13 +542,13 @@ plain (Subscript ss) = cMap plain ss plain (SmallCaps ss) = cMap plain ss plain (Quoted _ ss) = cMap plain ss plain (Cite _ ss) = cMap plain ss -- FIXME -plain (Code _ s) = s +plain (Code _ s) = T.unpack s plain Space = " " plain SoftBreak = " " plain LineBreak = "\n" -plain (Math _ s) = s +plain (Math _ s) = T.unpack s plain (RawInline _ _) = "" -plain (Link _ text (url,_)) = concat (map plain text ++ [" <", url, ">"]) +plain (Link _ text (url,_)) = concat (map plain text ++ [" <", T.unpack url, ">"]) plain (Image _ alt _) = cMap plain alt plain (Note _) = "" -- FIXME @@ -563,16 +566,16 @@ spaceBeforeAfter cs = in [emptyline] ++ cs ++ [emptyline] -- | Create a plain-text XML content. -txt :: String -> Content -txt s = Text $ CData CDataText s Nothing +txt :: Text -> Content +txt s = Text $ CData CDataText (T.unpack s) Nothing -- | Create an XML attribute with an unqualified name. -uattr :: String -> String -> Text.XML.Light.Attr -uattr name = Attr (uname name) +uattr :: String -> Text -> Text.XML.Light.Attr +uattr name = Attr (uname name) . T.unpack -- | Create an XML attribute with a qualified name from given namespace. -attr :: (String, String) -> String -> Text.XML.Light.Attr -attr (ns, name) = Attr (qname ns name) +attr :: (String, String) -> Text -> Text.XML.Light.Attr +attr (ns, name) = Attr (qname ns name) . T.unpack -- | Unqualified name uname :: String -> QName diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index f042bda21..e858f3a6c 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -30,12 +30,10 @@ module Text.Pandoc.Writers.HTML ( tagWithAttributes ) where import Control.Monad.State.Strict -import Data.Char (ord, toLower) -import Data.List (intercalate, intersperse, isPrefixOf, partition, delete) -import Data.List.Split (splitWhen) +import Data.Char (ord) +import Data.List (intercalate, intersperse, partition, delete) import Data.Maybe (fromMaybe, isJust, isNothing, mapMaybe) import qualified Data.Set as Set -import Data.String (fromString) import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.Lazy as TL @@ -112,19 +110,21 @@ defaultWriterState = WriterState {stNotes= [], stMath = False, stQuotes = False, -- Helpers to render HTML with the appropriate function. -strToHtml :: String -> Html -strToHtml ('\'':xs) = preEscapedString "\'" `mappend` strToHtml xs -strToHtml ('"' :xs) = preEscapedString "\"" `mappend` strToHtml xs -strToHtml (x:xs) | needsVariationSelector x - = preEscapedString [x, '\xFE0E'] `mappend` - case xs of - ('\xFE0E':ys) -> strToHtml ys - _ -> strToHtml xs -strToHtml xs@(_:_) = case break (\c -> c == '\'' || c == '"' || - needsVariationSelector c) xs of - (_ ,[]) -> toHtml xs - (ys,zs) -> toHtml ys `mappend` strToHtml zs -strToHtml [] = "" +strToHtml :: Text -> Html +strToHtml = strToHtml' . T.unpack + where + strToHtml' ('\'':xs) = preEscapedString "\'" `mappend` strToHtml' xs + strToHtml' ('"' :xs) = preEscapedString "\"" `mappend` strToHtml' xs + strToHtml' (x:xs) | needsVariationSelector x + = preEscapedString [x, '\xFE0E'] `mappend` + case xs of + ('\xFE0E':ys) -> strToHtml' ys + _ -> strToHtml' xs + strToHtml' xs@(_:_) = case break (\c -> c == '\'' || c == '"' || + needsVariationSelector c) xs of + (_ ,[]) -> toHtml xs + (ys,zs) -> toHtml ys `mappend` strToHtml' zs + strToHtml' [] = "" -- See #5469: this prevents iOS from substituting emojis. needsVariationSelector :: Char -> Bool @@ -223,14 +223,14 @@ writeHtmlString' st opts d = do case getField "pagetitle" context of Just (s :: Text) | not (T.null s) -> return context _ -> do - let fallback = + let fallback = T.pack $ case lookupContext "sourcefile" (writerVariables opts) of Nothing -> "Untitled" Just [] -> "Untitled" Just (x:_) -> takeBaseName $ T.unpack x report $ NoTitleElement fallback - return $ resetField "pagetitle" (T.pack fallback) context + return $ resetField "pagetitle" fallback context return $ render Nothing $ renderTemplate tpl (defField "body" (renderHtml' body) context') @@ -285,7 +285,7 @@ pandocToHtml opts (Pandoc meta blocks) = do _ -> mempty KaTeX url -> do H.script ! - A.src (toValue $ url ++ "katex.min.js") $ mempty + A.src (toValue $ url <> "katex.min.js") $ mempty nl opts let katexFlushLeft = case lookupContext "classoption" metadata of @@ -306,7 +306,7 @@ pandocToHtml opts (Pandoc meta blocks) = do ] nl opts H.link ! A.rel "stylesheet" ! - A.href (toValue $ url ++ "katex.min.css") + A.href (toValue $ url <> "katex.min.css") _ -> case lookupContext "mathml-script" (writerVariables opts) of @@ -329,7 +329,7 @@ pandocToHtml opts (Pandoc meta blocks) = do (case writerHTMLMathMethod opts of MathJax u -> defField "mathjax" True . defField "mathjaxurl" - (T.pack $ takeWhile (/='?') u) + (T.takeWhile (/='?') u) _ -> defField "mathjax" False) $ defField "quotes" (stQuotes st) $ -- for backwards compatibility we populate toc @@ -337,12 +337,12 @@ pandocToHtml opts (Pandoc meta blocks) = do -- boolean: maybe id (defField "toc") toc $ maybe id (defField "table-of-contents") toc $ - defField "author-meta" (map T.pack authsMeta) $ - maybe id (defField "date-meta" . T.pack) + defField "author-meta" authsMeta $ + maybe id (defField "date-meta") (normalizeDate dateMeta) $ defField "pagetitle" - (T.pack . stringifyHTML . docTitle $ meta) $ - defField "idprefix" (T.pack $ writerIdentifierPrefix opts) $ + (stringifyHTML . docTitle $ meta) $ + defField "idprefix" (writerIdentifierPrefix opts) $ -- these should maybe be set in pandoc.hs defField "slidy-url" ("https://www.w3.org/Talks/Tools/Slidy2" :: Text) $ @@ -354,11 +354,11 @@ pandocToHtml opts (Pandoc meta blocks) = do return (thebody, context) -- | Like Text.XHtml's identifier, but adds the writerIdentifierPrefix -prefixedId :: WriterOptions -> String -> Attribute +prefixedId :: WriterOptions -> Text -> Attribute prefixedId opts s = case s of "" -> mempty - _ -> A.id $ toValue $ writerIdentifierPrefix opts ++ s + _ -> A.id $ toValue $ writerIdentifierPrefix opts <> s toList :: PandocMonad m => (Html -> Html) @@ -414,7 +414,7 @@ tableOfContents opts sects = do let opts' = case slideVariant of RevealJsSlides -> opts{ writerIdentifierPrefix = - '/' : writerIdentifierPrefix opts } + "/" <> writerIdentifierPrefix opts } _ -> opts case toTableOfContents opts sects of bl@(BulletList (_:_)) -> Just <$> blockToHtml opts' bl @@ -446,64 +446,64 @@ footnoteSection opts notes = do H.ol (mconcat notes >> nl opts) >> nl opts) -- | Parse a mailto link; return Just (name, domain) or Nothing. -parseMailto :: String -> Maybe (String, String) +parseMailto :: Text -> Maybe (Text, Text) parseMailto s = - case break (==':') s of - (xs,':':addr) | map toLower xs == "mailto" -> do - let (name', rest) = span (/='@') addr - let domain = drop 1 rest + case T.break (==':') s of + (xs,T.uncons -> Just (':',addr)) | T.toLower xs == "mailto" -> do + let (name', rest) = T.span (/='@') addr + let domain = T.drop 1 rest return (name', domain) _ -> Prelude.fail "not a mailto: URL" -- | Obfuscate a "mailto:" link. obfuscateLink :: PandocMonad m - => WriterOptions -> Attr -> Html -> String + => WriterOptions -> Attr -> Html -> Text -> StateT WriterState m Html obfuscateLink opts attr txt s | writerEmailObfuscation opts == NoObfuscation = addAttrs opts attr $ H.a ! A.href (toValue s) $ txt -obfuscateLink opts attr (TL.unpack . renderHtml -> txt) s = +obfuscateLink opts attr (TL.toStrict . renderHtml -> txt) s = let meth = writerEmailObfuscation opts - s' = map toLower (take 7 s) ++ drop 7 s + s' = T.toLower (T.take 7 s) <> T.drop 7 s in case parseMailto s' of (Just (name', domain)) -> - let domain' = substitute "." " dot " domain + let domain' = T.replace "." " dot " domain at' = obfuscateChar '@' (linkText, altText) = - if txt == drop 7 s' -- autolink - then ("e", name' ++ " at " ++ domain') - else ("'" ++ obfuscateString txt ++ "'", - txt ++ " (" ++ name' ++ " at " ++ domain' ++ ")") + if txt == T.drop 7 s' -- autolink + then ("e", name' <> " at " <> domain') + else ("'" <> obfuscateString txt <> "'", + txt <> " (" <> name' <> " at " <> domain' <> ")") (_, classNames, _) = attr - classNamesStr = concatMap (' ':) classNames + classNamesStr = T.concat $ map (" "<>) classNames in case meth of ReferenceObfuscation -> -- need to use preEscapedString or &'s are escaped to & in URL return $ - preEscapedString $ "<a href=\"" ++ obfuscateString s' - ++ "\" class=\"email\">" ++ obfuscateString txt ++ "</a>" + preEscapedText $ "<a href=\"" <> obfuscateString s' + <> "\" class=\"email\">" <> obfuscateString txt <> "</a>" JavascriptObfuscation -> return $ (H.script ! A.type_ "text/javascript" $ - preEscapedString ("\n<!--\nh='" ++ - obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++ - obfuscateString name' ++ "';e=n+a+h;\n" ++ - "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\" clas'+'s=\"em' + 'ail" ++ - classNamesStr ++ "\">'+" ++ - linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) >> - H.noscript (preEscapedString $ obfuscateString altText) - _ -> throwError $ PandocSomeError $ "Unknown obfuscation method: " ++ show meth + preEscapedText ("\n<!--\nh='" <> + obfuscateString domain <> "';a='" <> at' <> "';n='" <> + obfuscateString name' <> "';e=n+a+h;\n" <> + "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\" clas'+'s=\"em' + 'ail" <> + classNamesStr <> "\">'+" <> + linkText <> "+'<\\/'+'a'+'>');\n// -->\n")) >> + H.noscript (preEscapedText $ obfuscateString altText) + _ -> throwError $ PandocSomeError $ "Unknown obfuscation method: " <> tshow meth _ -> addAttrs opts attr $ H.a ! A.href (toValue s) $ toHtml txt -- malformed email -- | Obfuscate character as entity. -obfuscateChar :: Char -> String +obfuscateChar :: Char -> Text obfuscateChar char = let num = ord char - numstr = if even num then show num else "x" ++ showHex num "" - in "&#" ++ numstr ++ ";" + numstr = if even num then show num else "x" <> showHex num "" + in "&#" <> T.pack numstr <> ";" -- | Obfuscate string using entities. -obfuscateString :: String -> String -obfuscateString = concatMap obfuscateChar . fromEntities +obfuscateString :: Text -> Text +obfuscateString = T.concatMap obfuscateChar . fromEntities -- | Create HTML tag with attributes. tagWithAttributes :: WriterOptions @@ -525,7 +525,7 @@ addAttrs :: PandocMonad m addAttrs opts attr h = foldl (!) h <$> attrsToHtml opts attr toAttrs :: PandocMonad m - => [(String, String)] -> StateT WriterState m [Attribute] + => [(Text, Text)] -> StateT WriterState m [Attribute] toAttrs kvs = do html5 <- gets stHtml5 mbEpubVersion <- gets stEPUBVersion @@ -533,18 +533,18 @@ toAttrs kvs = do if html5 then if x `Set.member` (html5Attributes <> rdfaAttributes) - || ':' `elem` x -- e.g. epub: namespace - || "data-" `isPrefixOf` x - || "aria-" `isPrefixOf` x - then Just $ customAttribute (fromString x) (toValue y) - else Just $ customAttribute (fromString ("data-" ++ x)) + || T.any (== ':') x -- e.g. epub: namespace + || "data-" `T.isPrefixOf` x + || "aria-" `T.isPrefixOf` x + then Just $ customAttribute (textTag x) (toValue y) + else Just $ customAttribute (textTag ("data-" <> x)) (toValue y) else if mbEpubVersion == Just EPUB2 && not (x `Set.member` (html4Attributes <> rdfaAttributes) || - "xml:" `isPrefixOf` x) + "xml:" `T.isPrefixOf` x) then Nothing - else Just $ customAttribute (fromString x) (toValue y)) + else Just $ customAttribute (textTag x) (toValue y)) kvs attrsToHtml :: PandocMonad m @@ -552,8 +552,8 @@ attrsToHtml :: PandocMonad m attrsToHtml opts (id',classes',keyvals) = do attrs <- toAttrs keyvals return $ - [prefixedId opts id' | not (null id')] ++ - [A.class_ (toValue $ unwords classes') | not (null classes')] ++ attrs + [prefixedId opts id' | not (T.null id')] ++ + [A.class_ (toValue $ T.unwords classes') | not (null classes')] ++ attrs imgAttrsToHtml :: PandocMonad m => WriterOptions -> Attr -> StateT WriterState m [Attribute] @@ -568,23 +568,23 @@ imgAttrsToHtml opts attr = do isNotDim ("height", _) = False isNotDim _ = True -dimensionsToAttrList :: Attr -> [(String, String)] +dimensionsToAttrList :: Attr -> [(Text, Text)] dimensionsToAttrList attr = consolidateStyles $ go Width ++ go Height where - consolidateStyles :: [(String, String)] -> [(String, String)] + consolidateStyles :: [(Text, Text)] -> [(Text, Text)] consolidateStyles xs = case partition isStyle xs of ([], _) -> xs - (ss, rest) -> ("style", intercalate ";" $ map snd ss) : rest + (ss, rest) -> ("style", T.intercalate ";" $ map snd ss) : rest isStyle ("style", _) = True isStyle _ = False go dir = case dimension dir attr of - (Just (Pixel a)) -> [(show dir, show a)] - (Just x) -> [("style", show dir ++ ":" ++ show x)] + (Just (Pixel a)) -> [(tshow dir, tshow a)] + (Just x) -> [("style", tshow dir <> ":" <> tshow x)] Nothing -> [] figure :: PandocMonad m - => WriterOptions -> Attr -> [Inline] -> (String, String) + => WriterOptions -> Attr -> [Inline] -> (Text, Text) -> StateT WriterState m Html figure opts attr txt (s,tit) = do img <- inlineToHtml opts (Image attr [Str ""] (s,tit)) @@ -601,14 +601,14 @@ figure opts attr txt (s,tit) = do else H.div ! A.class_ "figure" $ mconcat [nl opts, img, nl opts, capt, nl opts] -showSecNum :: [Int] -> String -showSecNum = intercalate "." . map show +showSecNum :: [Int] -> Text +showSecNum = T.intercalate "." . map tshow -getNumber :: WriterOptions -> Attr -> String +getNumber :: WriterOptions -> Attr -> Text getNumber opts (_,_,kvs) = showSecNum $ zipWith (+) num (writerNumberOffset opts ++ repeat 0) where - num = maybe [] (map (fromMaybe 0 . safeRead) . splitWhen (=='.')) $ + num = maybe [] (map (fromMaybe 0 . safeRead) . T.split (=='.')) $ lookup "number" kvs -- | Convert Pandoc block element to HTML. @@ -625,7 +625,7 @@ blockToHtml opts (Para [Image attr@(_,classes,_) txt (src,tit)]) inlineToHtml opts (Image attr txt (src, tit)) _ -> figure opts attr txt (src, tit) -- title beginning with fig: indicates that the image is a figure -blockToHtml opts (Para [Image attr txt (s,'f':'i':'g':':':tit)]) = +blockToHtml opts (Para [Image attr txt (s,T.stripPrefix "fig:" -> Just tit)]) = figure opts attr txt (s,tit) blockToHtml opts (Para lst) = do contents <- inlineListToHtml opts lst @@ -661,7 +661,7 @@ blockToHtml opts (Div (ident, "section":dclasses, dkvs) RevealJsSlides -> "fragment" _ -> "incremental" let inDiv zs = (RawBlock (Format "html") ("<div class=\"" - ++ fragmentClass ++ "\">")) : + <> fragmentClass <> "\">")) : (zs ++ [RawBlock (Format "html") "</div>"]) let (titleBlocks, innerSecs) = if titleSlide @@ -675,8 +675,8 @@ blockToHtml opts (Div (ident, "section":dclasses, dkvs) let classes' = ["title-slide" | titleSlide] ++ ["slide" | slide] ++ ["section" | (slide || writerSectionDivs opts) && not html5 ] ++ - ["level" ++ show level | slide || writerSectionDivs opts ] - ++ dclasses + ["level" <> tshow level | slide || writerSectionDivs opts ] + <> dclasses let secttag = if html5 then H5.section else H.div @@ -709,11 +709,11 @@ blockToHtml opts (Div attr@(ident, classes, kvs') bs) = do html5 <- gets stHtml5 slideVariant <- gets stSlideVariant let kvs = [(k,v) | (k,v) <- kvs', k /= "width"] ++ - [("style", "width:" ++ w ++ ";") + [("style", "width:" <> w <> ";") | ("width",w) <- kvs', "column" `elem` classes] ++ [("role", "doc-bibliography") | ident == "refs" && html5] ++ [("role", "doc-biblioentry") - | "ref-item" `isPrefixOf` ident && html5] + | "ref-item" `T.isPrefixOf` ident && html5] let speakerNotes = "notes" `elem` classes -- we don't want incremental output inside speaker notes, see #1394 let opts' = if | speakerNotes -> opts{ writerIncremental = False } @@ -751,7 +751,7 @@ blockToHtml opts (Div attr@(ident, classes, kvs') bs) = do blockToHtml opts (RawBlock f str) = do ishtml <- isRawHtml f if ishtml - then return $ preEscapedString str + then return $ preEscapedText str else if (f == Format "latex" || f == Format "tex") && allowsMathEnvironments (writerHTMLMathMethod opts) && isMathEnvironment str @@ -763,22 +763,22 @@ blockToHtml _ HorizontalRule = do html5 <- gets stHtml5 return $ if html5 then H5.hr else H.hr blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do - id'' <- if null id' + id'' <- if T.null id' then do modify $ \st -> st{ stCodeBlockNum = stCodeBlockNum st + 1 } codeblocknum <- gets stCodeBlockNum - return (writerIdentifierPrefix opts ++ "cb" ++ show codeblocknum) - else return (writerIdentifierPrefix opts ++ id') + return (writerIdentifierPrefix opts <> "cb" <> tshow codeblocknum) + else return (writerIdentifierPrefix opts <> id') let tolhs = isEnabled Ext_literate_haskell opts && - any (\c -> map toLower c == "haskell") classes && - any (\c -> map toLower c == "literate") classes + any (\c -> T.toLower c == "haskell") classes && + any (\c -> T.toLower c == "literate") classes classes' = if tolhs - then map (\c -> if map toLower c == "haskell" + then map (\c -> if T.toLower c == "haskell" then "literatehaskell" else c) classes else classes adjCode = if tolhs - then unlines . map ("> " ++) . lines $ rawCode + then T.unlines . map ("> " <>) . T.lines $ rawCode else rawCode hlCode = if isJust (writerHighlightStyle opts) then highlight (writerSyntaxMap opts) formatHtmlBlock @@ -786,7 +786,7 @@ blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do else Left "" case hlCode of Left msg -> do - unless (null msg) $ + unless (T.null msg) $ report $ CouldNotHighlight msg addAttrs opts (id',classes,keyvals) $ H.pre $ H.code $ toHtml adjCode @@ -819,7 +819,7 @@ blockToHtml opts (BlockQuote blocks) = do blockToHtml opts (Header level attr@(_,classes,_) lst) = do contents <- inlineListToHtml opts lst let secnum = getNumber opts attr - let contents' = if writerNumberSections opts && not (null secnum) + let contents' = if writerNumberSections opts && not (T.null secnum) && "unnumbered" `notElem` classes then (H.span ! A.class_ "header-section-number" $ toHtml secnum) >> strToHtml " " >> contents @@ -841,7 +841,7 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do html5 <- gets stHtml5 let numstyle' = case numstyle of Example -> "decimal" - _ -> camelCaseToHyphenated $ show numstyle + _ -> camelCaseToHyphenated $ tshow numstyle let attribs = [A.start $ toValue startnum | startnum /= 1] ++ [A.class_ "example" | numstyle == Example] ++ (if numstyle /= DefaultStyle @@ -854,7 +854,7 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do LowerRoman -> "i" UpperRoman -> "I" _ -> "1"] - else [A.style $ toValue $ "list-style-type: " ++ + else [A.style $ toValue $ "list-style-type: " <> numstyle'] else []) l <- ordList opts contents @@ -874,7 +874,7 @@ blockToHtml opts (Table capt aligns widths headers rows') = do cs <- inlineListToHtml opts capt return $ H.caption cs >> nl opts html5 <- gets stHtml5 - let percent w = show (truncate (100*w) :: Integer) ++ "%" + let percent w = show (truncate (100*w) :: Integer) <> "%" let coltags = if all (== 0.0) widths then mempty else do @@ -882,7 +882,7 @@ blockToHtml opts (Table capt aligns widths headers rows') = do nl opts mapM_ (\w -> do if html5 - then H.col ! A.style (toValue $ "width: " ++ + then H.col ! A.style (toValue $ "width: " <> percent w) else H.col ! A.width (toValue $ percent w) nl opts) widths @@ -901,8 +901,8 @@ blockToHtml opts (Table capt aligns widths headers rows') = do -- table, or some browsers give us skinny columns with lots of space between: return $ if totalWidth == 0 || totalWidth == 1 then tbl - else tbl ! A.style (toValue $ "width:" ++ - show (round (totalWidth * 100) :: Int) ++ "%;") + else tbl ! A.style (toValue $ "width:" <> + show (round (totalWidth * 100) :: Int) <> "%;") tableRowToHtml :: PandocMonad m => WriterOptions @@ -940,7 +940,7 @@ tableItemToHtml opts tag' align' item = do html5 <- gets stHtml5 let alignStr = alignmentToString align' let attribs = if html5 - then A.style (toValue $ "text-align: " ++ alignStr ++ ";") + then A.style (toValue $ "text-align: " <> alignStr <> ";") else A.align (toValue alignStr) let tag'' = if null alignStr then tag' @@ -967,8 +967,8 @@ inlineListToHtml opts lst = mapM (inlineToHtml opts) lst >>= return . mconcat -- | Annotates a MathML expression with the tex source -annotateMML :: XML.Element -> String -> XML.Element -annotateMML e tex = math (unode "semantics" [cs, unode "annotation" (annotAttrs, tex)]) +annotateMML :: XML.Element -> Text -> XML.Element +annotateMML e tex = math (unode "semantics" [cs, unode "annotation" (annotAttrs, T.unpack tex)]) where cs = case elChildren e of [] -> unode "mrow" () @@ -989,9 +989,9 @@ inlineToHtml opts inline = do (Str str) -> return $ strToHtml str Space -> return $ strToHtml " " SoftBreak -> return $ case writerWrapText opts of - WrapNone -> preEscapedString " " - WrapAuto -> preEscapedString " " - WrapPreserve -> preEscapedString "\n" + WrapNone -> preEscapedText " " + WrapAuto -> preEscapedText " " + WrapPreserve -> preEscapedText "\n" LineBreak -> return $ do if html5 then H5.br else H.br strToHtml "\n" @@ -999,9 +999,8 @@ inlineToHtml opts inline = do (Span (id',classes,kvs) ils) -> let spanLikeTag = case classes of (c:_) -> do - let c' = T.pack c - guard (c' `Set.member` htmlSpanLikeElements) - pure $ customParent (textTag c') + guard (c `Set.member` htmlSpanLikeElements) + pure $ customParent (textTag c) _ -> Nothing in case spanLikeTag of Just tag -> do @@ -1019,7 +1018,7 @@ inlineToHtml opts inline = do | "csl-no-smallcaps" `elem` classes] kvs' = if null styles then kvs - else ("style", concat styles) : kvs + else ("style", T.concat styles) : kvs classes' = [ c | c <- classes , c `notElem` [ "csl-no-emph" , "csl-no-strong" @@ -1032,7 +1031,7 @@ inlineToHtml opts inline = do (Code attr@(ids,cs,kvs) str) -> case hlCode of Left msg -> do - unless (null msg) $ + unless (T.null msg) $ report $ CouldNotHighlight msg addAttrs opts (ids,cs',kvs) $ maybe H.code id sampOrVar $ @@ -1079,7 +1078,7 @@ inlineToHtml opts inline = do `fmap` inlineListToHtml opts lst (Math t str) -> do modify (\st -> st {stMath = True}) - let mathClass = toValue $ ("math " :: String) ++ + let mathClass = toValue $ ("math " :: Text) <> if t == InlineMath then "inline" else "display" case writerHTMLMathMethod opts of WebTeX url -> do @@ -1088,7 +1087,7 @@ inlineToHtml opts inline = do InlineMath -> "\\textstyle " DisplayMath -> "\\displaystyle " let m = imtag ! A.style "vertical-align:middle" - ! A.src (toValue $ url ++ urlEncode (s ++ str)) + ! A.src (toValue $ url <> T.pack (urlEncode (T.unpack $ s <> str))) ! A.alt (toValue str) ! A.title (toValue str) let brtag = if html5 then H5.br else H.br @@ -1113,8 +1112,8 @@ inlineToHtml opts inline = do inlineToHtml opts il MathJax _ -> return $ H.span ! A.class_ mathClass $ toHtml $ case t of - InlineMath -> "\\(" ++ str ++ "\\)" - DisplayMath -> "\\[" ++ str ++ "\\]" + InlineMath -> "\\(" <> str <> "\\)" + DisplayMath -> "\\[" <> str <> "\\]" KaTeX _ -> return $ H.span ! A.class_ mathClass $ toHtml $ case t of InlineMath -> str @@ -1129,7 +1128,7 @@ inlineToHtml opts inline = do (RawInline f str) -> do ishtml <- isRawHtml f if ishtml - then return $ preEscapedString str + then return $ preEscapedText str else if (f == Format "latex" || f == Format "tex") && allowsMathEnvironments (writerHTMLMathMethod opts) && isMathEnvironment str @@ -1137,21 +1136,21 @@ inlineToHtml opts inline = do else do report $ InlineNotRendered inline return mempty - (Link attr txt (s,_)) | "mailto:" `isPrefixOf` s -> do + (Link attr txt (s,_)) | "mailto:" `T.isPrefixOf` s -> do linkText <- inlineListToHtml opts txt obfuscateLink opts attr linkText s (Link (ident,classes,kvs) txt (s,tit)) -> do linkText <- inlineListToHtml opts txt slideVariant <- gets stSlideVariant - let s' = case s of - '#':xs -> let prefix = if slideVariant == RevealJsSlides + let s' = case T.uncons s of + Just ('#',xs) -> let prefix = if slideVariant == RevealJsSlides then "/" else writerIdentifierPrefix opts - in '#' : prefix ++ xs + in "#" <> prefix <> xs _ -> s let link = H.a ! A.href (toValue s') $ linkText link' <- addAttrs opts (ident, classes, kvs) link - return $ if null tit + return $ if T.null tit then link' else link' ! A.title (toValue tit) (Image attr txt (s,tit)) -> do @@ -1164,7 +1163,7 @@ inlineToHtml opts inline = do (if isReveal then customAttribute "data-src" $ toValue s else A.src $ toValue s) : - [A.title $ toValue tit | not (null tit)] ++ + [A.title $ toValue tit | not (T.null tit)] ++ attrs imageTag = (if html5 then H5.img else H.img , [A.alt $ toValue alternate | not (null txt)] ) @@ -1174,7 +1173,7 @@ inlineToHtml opts inline = do else alternate in (tg $ H.a ! A.href (toValue s) $ toHtml linkTxt , [A5.controls ""] ) - normSrc = maybe s uriPath (parseURIReference s) + normSrc = maybe (T.unpack s) uriPath (parseURIReference $ T.unpack s) (tag, specAttrs) = case mediaCategory normSrc of Just "image" -> imageTag Just "video" -> mediaTag H5.video "Video" @@ -1186,18 +1185,18 @@ inlineToHtml opts inline = do (Note contents) -> do notes <- gets stNotes let number = length notes + 1 - let ref = show number + let ref = tshow number htmlContents <- blockListToNote opts ref contents epubVersion <- gets stEPUBVersion -- push contents onto front of notes modify $ \st -> st {stNotes = htmlContents:notes} slideVariant <- gets stSlideVariant - let revealSlash = ['/' | slideVariant == RevealJsSlides] - let link = H.a ! A.href (toValue $ "#" ++ - revealSlash ++ - writerIdentifierPrefix opts ++ "fn" ++ ref) + let revealSlash = T.pack ['/' | slideVariant == RevealJsSlides] + let link = H.a ! A.href (toValue $ "#" <> + revealSlash <> + writerIdentifierPrefix opts <> "fn" <> ref) ! A.class_ "footnote-ref" - ! prefixedId opts ("fnref" ++ ref) + ! prefixedId opts ("fnref" <> ref) $ (if isJust epubVersion then id else H.sup) @@ -1208,7 +1207,7 @@ inlineToHtml opts inline = do "role" "doc-noteref" _ -> link (Cite cits il)-> do contents <- inlineListToHtml opts (walk addRoleToLink il) - let citationIds = unwords $ map citationId cits + let citationIds = T.unwords $ map citationId cits let result = H.span ! A.class_ "citation" $ contents return $ if html5 then result ! customAttribute "data-cites" (toValue citationIds) @@ -1220,7 +1219,7 @@ addRoleToLink (Link (id',classes,kvs) ils (src,tit)) = addRoleToLink x = x blockListToNote :: PandocMonad m - => WriterOptions -> String -> [Block] + => WriterOptions -> Text -> [Block] -> StateT WriterState m Html blockListToNote opts ref blocks = do html5 <- gets stHtml5 @@ -1228,7 +1227,7 @@ blockListToNote opts ref blocks = do -- that block. Otherwise, insert a new Plain block with the backlink. let kvs = if html5 then [("role","doc-backlink")] else [] let backlink = [Link ("",["footnote-back"],kvs) - [Str "↩"] ("#" ++ "fnref" ++ ref,[])] + [Str "↩"] ("#" <> "fnref" <> ref,"")] let blocks' = if null blocks then [] else let lastBlock = last blocks @@ -1241,7 +1240,7 @@ blockListToNote opts ref blocks = do _ -> otherBlocks ++ [lastBlock, Plain backlink] contents <- blockListToHtml opts blocks' - let noteItem = H.li ! prefixedId opts ("fn" ++ ref) $ contents + let noteItem = H.li ! prefixedId opts ("fn" <> ref) $ contents epubVersion <- gets stEPUBVersion let noteItem' = case epubVersion of Just EPUB3 -> noteItem ! @@ -1251,10 +1250,10 @@ blockListToNote opts ref blocks = do _ -> noteItem return $ nl opts >> noteItem' -isMathEnvironment :: String -> Bool -isMathEnvironment s = "\\begin{" `isPrefixOf` s && +isMathEnvironment :: Text -> Bool +isMathEnvironment s = "\\begin{" `T.isPrefixOf` s && envName `elem` mathmlenvs - where envName = takeWhile (/= '}') (drop 7 s) + where envName = T.takeWhile (/= '}') (T.drop 7 s) mathmlenvs = [ "align" , "align*" , "alignat" @@ -1295,7 +1294,7 @@ isRawHtml f = do return $ f == Format "html" || ((html5 && f == Format "html5") || f == Format "html4") -html5Attributes :: Set.Set String +html5Attributes :: Set.Set Text html5Attributes = Set.fromList [ "abbr" , "accept" @@ -1504,7 +1503,7 @@ html5Attributes = Set.fromList ] -- See https://en.wikipedia.org/wiki/RDFa, https://www.w3.org/TR/rdfa-primer/ -rdfaAttributes :: Set.Set String +rdfaAttributes :: Set.Set Text rdfaAttributes = Set.fromList [ "about" , "rel" @@ -1520,7 +1519,7 @@ rdfaAttributes = Set.fromList , "prefix" ] -html4Attributes :: Set.Set String +html4Attributes :: Set.Set Text html4Attributes = Set.fromList [ "abbr" , "accept" diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs index 1d70913c5..e6c07aaf7 100644 --- a/src/Text/Pandoc/Writers/Haddock.hs +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -19,6 +19,7 @@ import Prelude import Control.Monad.State.Strict import Data.Default import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.Logging @@ -71,7 +72,7 @@ notesToHaddock opts notes = return $ text "#notes#" <> blankline <> contents -- | Escape special characters for Haddock. -escapeString :: String -> String +escapeString :: Text -> Text escapeString = escapeStringUsing haddockEscapes where haddockEscapes = backslashEscapes "\\/'`\"@<" @@ -88,8 +89,9 @@ blockToHaddock opts (Plain inlines) = do contents <- inlineListToHaddock opts inlines return $ contents <> cr -- title beginning with fig: indicates figure -blockToHaddock opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) = - blockToHaddock opts (Para [Image attr alt (src,tit)]) +blockToHaddock opts (Para [Image attr alt (src,tgt)]) + | Just tit <- T.stripPrefix "fig:" tgt + = blockToHaddock opts (Para [Image attr alt (src,tit)]) blockToHaddock opts (Para inlines) = -- TODO: if it contains linebreaks, we need to use a @...@ block (<> blankline) `fmap` blockToHaddock opts (Plain inlines) @@ -97,7 +99,7 @@ blockToHaddock opts (LineBlock lns) = blockToHaddock opts $ linesToPara lns blockToHaddock _ b@(RawBlock f str) | f == "haddock" = - return $ text str <> text "\n" + return $ literal str <> text "\n" | otherwise = do report $ BlockNotRendered b return empty @@ -105,13 +107,13 @@ blockToHaddock opts HorizontalRule = return $ blankline <> text (replicate (writerColumns opts) '_') <> blankline blockToHaddock opts (Header level (ident,_,_) inlines) = do contents <- inlineListToHaddock opts inlines - let attr' = if null ident + let attr' = if T.null ident then empty - else cr <> text "#" <> text ident <> text "#" + else cr <> text "#" <> literal ident <> text "#" return $ nowrap (text (replicate level '=') <> space <> contents) <> attr' <> blankline blockToHaddock _ (CodeBlock (_,_,_) str) = - return $ prefixed "> " (text str) <> blankline + return $ prefixed "> " (literal str) <> blankline -- Nothing in haddock corresponds to block quotes: blockToHaddock opts (BlockQuote blocks) = blockListToHaddock opts blocks @@ -130,8 +132,8 @@ blockToHaddock opts (BulletList items) = do blockToHaddock opts (OrderedList (start,_,delim) items) = do let attribs = (start, Decimal, delim) let markers = orderedListMarkers attribs - let markers' = map (\m -> if length m < 3 - then m ++ replicate (3 - length m) ' ' + let markers' = map (\m -> if T.length m < 3 + then m <> T.replicate (3 - T.length m) " " else m) markers contents <- zipWithM (orderedListItemToHaddock opts) markers' items return $ (if isTightList items then vcat else vsep) contents <> blankline @@ -154,15 +156,15 @@ bulletListItemToHaddock opts items = do -- | Convert ordered list item (a list of blocks) to haddock orderedListItemToHaddock :: PandocMonad m => WriterOptions -- ^ options - -> String -- ^ list item marker + -> Text -- ^ list item marker -> [Block] -- ^ list item (list of blocks) -> StateT WriterState m (Doc Text) orderedListItemToHaddock opts marker items = do contents <- blockListToHaddock opts items - let sps = case length marker - writerTabStop opts of + let sps = case T.length marker - writerTabStop opts of n | n > 0 -> text $ replicate n ' ' _ -> text " " - let start = text marker <> sps + let start = literal marker <> sps return $ hang (writerTabStop opts) start contents $$ if endsWithPlain items then cr @@ -202,8 +204,8 @@ inlineToHaddock :: PandocMonad m => WriterOptions -> Inline -> StateT WriterState m (Doc Text) inlineToHaddock opts (Span (ident,_,_) ils) = do contents <- inlineListToHaddock opts ils - if not (null ident) && null ils - then return $ "#" <> text ident <> "#" + if not (T.null ident) && null ils + then return $ "#" <> literal ident <> "#" else return contents inlineToHaddock opts (Emph lst) = do contents <- inlineListToHaddock opts lst @@ -228,15 +230,15 @@ inlineToHaddock opts (Quoted DoubleQuote lst) = do contents <- inlineListToHaddock opts lst return $ "“" <> contents <> "”" inlineToHaddock _ (Code _ str) = - return $ "@" <> text (escapeString str) <> "@" + return $ "@" <> literal (escapeString str) <> "@" inlineToHaddock _ (Str str) = - return $ text $ escapeString str + return $ literal $ escapeString str inlineToHaddock _ (Math mt str) = return $ case mt of - DisplayMath -> cr <> "\\[" <> text str <> "\\]" <> cr - InlineMath -> "\\(" <> text str <> "\\)" + DisplayMath -> cr <> "\\[" <> literal str <> "\\]" <> cr + InlineMath -> "\\(" <> literal str <> "\\)" inlineToHaddock _ il@(RawInline f str) - | f == "haddock" = return $ text str + | f == "haddock" = return $ literal str | otherwise = do report $ InlineNotRendered il return empty @@ -250,12 +252,12 @@ inlineToHaddock opts SoftBreak = inlineToHaddock _ Space = return space inlineToHaddock opts (Cite _ lst) = inlineListToHaddock opts lst inlineToHaddock _ (Link _ txt (src, _)) = do - let linktext = text $ escapeString $ stringify txt + let linktext = literal $ escapeString $ stringify txt let useAuto = isURI src && case txt of [Str s] | escapeURI s == src -> True _ -> False - return $ nowrap $ "<" <> text src <> + return $ nowrap $ "<" <> literal src <> (if useAuto then empty else space <> linktext) <> ">" inlineToHaddock opts (Image attr alternate (source, tit)) = do linkhaddock <- inlineToHaddock opts (Link attr alternate (source, tit)) @@ -264,5 +266,5 @@ inlineToHaddock opts (Image attr alternate (source, tit)) = do inlineToHaddock opts (Note contents) = do modify (\st -> st{ stNotes = contents : stNotes st }) st <- get - let ref = text $ writerIdentifierPrefix opts ++ show (length $ stNotes st) + let ref = literal $ writerIdentifierPrefix opts <> tshow (length $ stNotes st) return $ "<#notes [" <> ref <> "]>" diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs index 84a48d8b4..9c367dd73 100644 --- a/src/Text/Pandoc/Writers/ICML.hs +++ b/src/Text/Pandoc/Writers/ICML.hs @@ -2,6 +2,7 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.ICML @@ -20,10 +21,10 @@ module Text.Pandoc.Writers.ICML (writeICML) where import Prelude import Control.Monad.Except (catchError) import Control.Monad.State.Strict -import Data.List (intersperse, isInfixOf, isPrefixOf, stripPrefix) +import Data.List (intersperse) import Data.Maybe (fromMaybe, maybeToList) import qualified Data.Set as Set -import Data.Text as Text (breakOnAll, pack) +import qualified Data.Text as Text import Data.Text (Text) import Text.Pandoc.Class (PandocMonad, report) import qualified Text.Pandoc.Class as P @@ -32,18 +33,18 @@ import Text.Pandoc.ImageSize import Text.Pandoc.Logging import Text.Pandoc.Options import Text.DocLayout -import Text.Pandoc.Shared (isURI, linesToPara, splitBy) +import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Writers.Math (texMathToInlines) import Text.Pandoc.Writers.Shared import Text.Pandoc.XML -type Style = [String] -type Hyperlink = [(Int, String)] +type Style = [Text] +type Hyperlink = [(Int, Text)] data WriterState = WriterState{ - blockStyles :: Set.Set String - , inlineStyles :: Set.Set String + blockStyles :: Set.Set Text + , inlineStyles :: Set.Set Text , links :: Hyperlink , listDepth :: Int , maxListDepth :: Int @@ -61,14 +62,14 @@ defaultWriterState = WriterState{ } -- inline names (appear in InDesign's character styles pane) -emphName :: String -strongName :: String -strikeoutName :: String -superscriptName :: String -subscriptName :: String -smallCapsName :: String -codeName :: String -linkName :: String +emphName :: Text +strongName :: Text +strikeoutName :: Text +superscriptName :: Text +subscriptName :: Text +smallCapsName :: Text +codeName :: Text +linkName :: Text emphName = "Italic" strongName = "Bold" strikeoutName = "Strikeout" @@ -79,31 +80,31 @@ codeName = "Code" linkName = "Link" -- block element names (appear in InDesign's paragraph styles pane) -paragraphName :: String -figureName :: String -imgCaptionName :: String -codeBlockName :: String -blockQuoteName :: String -orderedListName :: String -bulletListName :: String -defListTermName :: String -defListDefName :: String -headerName :: String -tableName :: String -tableHeaderName :: String -tableCaptionName :: String -alignLeftName :: String -alignRightName :: String -alignCenterName :: String -firstListItemName :: String -beginsWithName :: String -lowerRomanName :: String -upperRomanName :: String -lowerAlphaName :: String -upperAlphaName :: String -subListParName :: String -footnoteName :: String -citeName :: String +paragraphName :: Text +figureName :: Text +imgCaptionName :: Text +codeBlockName :: Text +blockQuoteName :: Text +orderedListName :: Text +bulletListName :: Text +defListTermName :: Text +defListDefName :: Text +headerName :: Text +tableName :: Text +tableHeaderName :: Text +tableCaptionName :: Text +alignLeftName :: Text +alignRightName :: Text +alignCenterName :: Text +firstListItemName :: Text +beginsWithName :: Text +lowerRomanName :: Text +upperRomanName :: Text +lowerAlphaName :: Text +upperAlphaName :: Text +subListParName :: Text +footnoteName :: Text +citeName :: Text paragraphName = "Paragraph" figureName = "Figure" imgCaptionName = "Caption" @@ -153,9 +154,9 @@ writeICML opts (Pandoc meta blocks) = do Just tpl -> renderTemplate tpl context -- | Auxiliary functions for parStylesToDoc and charStylesToDoc. -contains :: String -> (String, (String, String)) -> [(String, String)] +contains :: Text -> (Text, (Text, Text)) -> [(Text, Text)] contains s rule = - [snd rule | (fst rule) `isInfixOf` s] + [snd rule | (fst rule) `Text.isInfixOf` s] -- | The monospaced font to use as default. monospacedFont :: Doc Text @@ -170,7 +171,7 @@ defaultListIndent :: Int defaultListIndent = 10 -- other constants -lineSeparator :: String +lineSeparator :: Text lineSeparator = "
" -- | Convert a WriterState with its block styles to the ICML listing of Paragraph Styles. @@ -178,7 +179,7 @@ parStylesToDoc :: WriterState -> Doc Text parStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ blockStyles st where makeStyle s = - let countSubStrs sub str = length $ Text.breakOnAll (Text.pack sub) (Text.pack str) + let countSubStrs sub str = length $ Text.breakOnAll sub str attrs = concatMap (contains s) [ (defListTermName, ("BulletsAndNumberingListType", "BulletList")) , (defListTermName, ("FontStyle", "Bold")) @@ -186,14 +187,14 @@ parStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ blockStyles st , (alignLeftName, ("Justification", "LeftAlign")) , (alignRightName, ("Justification", "RightAlign")) , (alignCenterName, ("Justification", "CenterAlign")) - , (headerName++"1", ("PointSize", "36")) - , (headerName++"2", ("PointSize", "30")) - , (headerName++"3", ("PointSize", "24")) - , (headerName++"4", ("PointSize", "18")) - , (headerName++"5", ("PointSize", "14")) + , (headerName<>"1", ("PointSize", "36")) + , (headerName<>"2", ("PointSize", "30")) + , (headerName<>"3", ("PointSize", "24")) + , (headerName<>"4", ("PointSize", "18")) + , (headerName<>"5", ("PointSize", "14")) ] -- what is the most nested list type, if any? - (isBulletList, isOrderedList) = findList $ reverse $ splitBy (==' ') s + (isBulletList, isOrderedList) = findList $ reverse $ splitTextBy (==' ') s where findList [] = (False, False) findList (x:xs) | x == bulletListName = (True, False) @@ -201,23 +202,23 @@ parStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ blockStyles st | otherwise = findList xs nBuls = countSubStrs bulletListName s nOrds = countSubStrs orderedListName s - attrs' = numbering ++ listType ++ indent ++ attrs + attrs' = numbering <> listType <> indent <> attrs where - numbering | isOrderedList = [("NumberingExpression", "^#.^t"), ("NumberingLevel", show nOrds)] + numbering | isOrderedList = [("NumberingExpression", "^#.^t"), ("NumberingLevel", tshow nOrds)] | otherwise = [] - listType | isOrderedList && not (subListParName `isInfixOf` s) + listType | isOrderedList && not (subListParName `Text.isInfixOf` s) = [("BulletsAndNumberingListType", "NumberedList")] - | isBulletList && not (subListParName `isInfixOf` s) + | isBulletList && not (subListParName `Text.isInfixOf` s) = [("BulletsAndNumberingListType", "BulletList")] | otherwise = [] - indent = [("LeftIndent", show indt)] + indent = [("LeftIndent", tshow indt)] where nBlockQuotes = countSubStrs blockQuoteName s nDefLists = countSubStrs defListDefName s indt = max 0 $ defaultListIndent*(nBuls + nOrds - 1) + defaultIndent*(nBlockQuotes + nDefLists) props = inTags True "Properties" [] (basedOn $$ tabList $$ numbForm) where - font = if codeBlockName `isInfixOf` s + font = if codeBlockName `Text.isInfixOf` s then monospacedFont else empty basedOn = inTags False "BasedOn" [("type", "object")] (text "$ID/NormalParagraphStyle") $$ font @@ -232,12 +233,12 @@ parStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ blockStyles st ] else empty makeNumb name = inTags False "NumberingFormat" [("type", "string")] (text name) - numbForm | isInfixOf lowerRomanName s = makeNumb "i, ii, iii, iv..." - | isInfixOf upperRomanName s = makeNumb "I, II, III, IV..." - | isInfixOf lowerAlphaName s = makeNumb "a, b, c, d..." - | isInfixOf upperAlphaName s = makeNumb "A, B, C, D..." + numbForm | Text.isInfixOf lowerRomanName s = makeNumb "i, ii, iii, iv..." + | Text.isInfixOf upperRomanName s = makeNumb "I, II, III, IV..." + | Text.isInfixOf lowerAlphaName s = makeNumb "a, b, c, d..." + | Text.isInfixOf upperAlphaName s = makeNumb "A, B, C, D..." | otherwise = empty - in inTags True "ParagraphStyle" ([("Self", "ParagraphStyle/"++s), ("Name", s)] ++ attrs') props + in inTags True "ParagraphStyle" ([("Self", "ParagraphStyle/"<>s), ("Name", s)] ++ attrs') props -- | Convert a WriterState with its inline styles to the ICML listing of Character Styles. charStylesToDoc :: WriterState -> Doc Text @@ -250,25 +251,25 @@ charStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ inlineStyles st , (subscriptName, ("Position", "Subscript")) , (smallCapsName, ("Capitalization", "SmallCaps")) ] - attrs' | isInfixOf emphName s && isInfixOf strongName s = ("FontStyle", "Bold Italic") : attrs - | isInfixOf strongName s = ("FontStyle", "Bold") : attrs - | isInfixOf emphName s = ("FontStyle", "Italic") : attrs - | otherwise = attrs + attrs' | Text.isInfixOf emphName s && Text.isInfixOf strongName s + = ("FontStyle", "Bold Italic") : attrs + | Text.isInfixOf strongName s = ("FontStyle", "Bold") : attrs + | Text.isInfixOf emphName s = ("FontStyle", "Italic") : attrs + | otherwise = attrs props = inTags True "Properties" [] $ inTags False "BasedOn" [("type", "object")] (text "$ID/NormalCharacterStyle") $$ font where font = - if codeName `isInfixOf` s + if codeName `Text.isInfixOf` s then monospacedFont else empty - in inTags True "CharacterStyle" ([("Self", "CharacterStyle/"++s), ("Name", s)] ++ attrs') props + in inTags True "CharacterStyle" ([("Self", "CharacterStyle/"<>s), ("Name", s)] ++ attrs') props -- | Escape colon characters as %3a -escapeColons :: String -> String -escapeColons (x:xs) - | x == ':' = "%3a" ++ escapeColons xs - | otherwise = x : escapeColons xs -escapeColons [] = [] +escapeColons :: Text -> Text +escapeColons = Text.concatMap $ \x -> case x of + ':' -> "%3a" + _ -> Text.singleton x -- | Convert a list of (identifier, url) pairs to the ICML listing of hyperlinks. hyperlinksToDoc :: Hyperlink -> Doc Text @@ -278,15 +279,15 @@ hyperlinksToDoc (x:xs) = hyp x $$ hyperlinksToDoc xs hyp (ident, url) = hdest $$ hlink where hdest = selfClosingTag "HyperlinkURLDestination" - [("Self", "HyperlinkURLDestination/"++escapeColons url), ("Name","link"), ("DestinationURL",url), ("DestinationUniqueKey","1")] -- HyperlinkURLDestination with more than one colon crashes CS6 - hlink = inTags True "Hyperlink" [("Self","uf-"++show ident), ("Name",url), - ("Source","htss-"++show ident), ("Visible","true"), ("DestinationUniqueKey","1")] + [("Self", "HyperlinkURLDestination/"<>escapeColons url), ("Name","link"), ("DestinationURL",url), ("DestinationUniqueKey","1")] -- HyperlinkURLDestination with more than one colon crashes CS6 + hlink = inTags True "Hyperlink" [("Self","uf-"<>tshow ident), ("Name",url), + ("Source","htss-"<>tshow ident), ("Visible","true"), ("DestinationUniqueKey","1")] $ inTags True "Properties" [] $ inTags False "BorderColor" [("type","enumeration")] (text "Black") - $$ inTags False "Destination" [("type","object")] (text $ "HyperlinkURLDestination/"++escapeColons (escapeStringForXML url)) -- HyperlinkURLDestination with more than one colon crashes CS6 + $$ inTags False "Destination" [("type","object")] (literal $ "HyperlinkURLDestination/"<>escapeColons (escapeStringForXML url)) -- HyperlinkURLDestination with more than one colon crashes CS6 -- | Key for specifying user-defined styles -dynamicStyleKey :: String +dynamicStyleKey :: Text dynamicStyleKey = "custom-style" -- | Convert a list of Pandoc blocks to ICML. @@ -299,7 +300,7 @@ blocksToICML opts style lst = do blockToICML :: PandocMonad m => WriterOptions -> Style -> Block -> WS m (Doc Text) blockToICML opts style (Plain lst) = parStyle opts style lst -- title beginning with fig: indicates that the image is a figure -blockToICML opts style (Para img@[Image _ txt (_,'f':'i':'g':':':_)]) = do +blockToICML opts style (Para img@[Image _ txt (_,Text.stripPrefix "fig:" -> Just _)]) = do figure <- parStyle opts (figureName:style) img caption <- parStyle opts (imgCaptionName:style) txt return $ intersperseBrs [figure, caption] @@ -308,7 +309,7 @@ blockToICML opts style (LineBlock lns) = blockToICML opts style $ linesToPara lns blockToICML opts style (CodeBlock _ str) = parStyle opts (codeBlockName:style) [Str str] blockToICML _ _ b@(RawBlock f str) - | f == Format "icml" = return $ text str + | f == Format "icml" = return $ literal str | otherwise = do report $ BlockNotRendered b return empty @@ -317,7 +318,7 @@ blockToICML opts style (OrderedList attribs lst) = listItemsToICML opts orderedL blockToICML opts style (BulletList lst) = listItemsToICML opts bulletListName style Nothing lst blockToICML opts style (DefinitionList lst) = intersperseBrs `fmap` mapM (definitionListItemToICML opts style) lst blockToICML opts style (Header lvl (_, cls, _) lst) = - let stl = (headerName ++ show lvl ++ unnumbered):style + let stl = (headerName <> tshow lvl <> unnumbered):style unnumbered = if "unnumbered" `elem` cls then " (unnumbered)" else "" @@ -348,7 +349,7 @@ blockToICML opts style (Table caption aligns widths headers rows) = | otherwise = stl c <- blocksToICML opts stl' cell let cl = return $ inTags True "Cell" - [("Name", show colNr ++":"++ show rowNr), ("AppliedCellStyle","CellStyle/Cell")] c + [("Name", tshow colNr <>":"<> tshow rowNr), ("AppliedCellStyle","CellStyle/Cell")] c liftM2 ($$) cl $ colsToICML rest restAligns rowNr (colNr+1) in do let tabl = if noHeader @@ -356,14 +357,14 @@ blockToICML opts style (Table caption aligns widths headers rows) = else headers:rows cells <- rowsToICML tabl (0::Int) let colWidths w = - [("SingleColumnWidth",show $ 500 * w) | w > 0] - let tupToDoc tup = selfClosingTag "Column" $ ("Name",show $ fst tup) : colWidths (snd tup) + [("SingleColumnWidth",tshow $ 500 * w) | w > 0] + let tupToDoc tup = selfClosingTag "Column" $ ("Name",tshow $ fst tup) : colWidths (snd tup) let colDescs = vcat $ zipWith (curry tupToDoc) [0..nrCols-1] widths let tableDoc = return $ inTags True "Table" [ ("AppliedTableStyle","TableStyle/Table") , ("HeaderRowCount", nrHeaders) - , ("BodyRowCount", show nrRows) - , ("ColumnCount", show nrCols) + , ("BodyRowCount", tshow nrRows) + , ("ColumnCount", tshow nrCols) ] (colDescs $$ cells) liftM2 ($$) tableDoc $ parStyle opts (tableCaptionName:style) caption blockToICML opts style (Div (_, _, kvs) lst) = @@ -372,7 +373,7 @@ blockToICML opts style (Div (_, _, kvs) lst) = blockToICML _ _ Null = return empty -- | Convert a list of lists of blocks to ICML list items. -listItemsToICML :: PandocMonad m => WriterOptions -> String -> Style -> Maybe ListAttributes -> [[Block]] -> WS m (Doc Text) +listItemsToICML :: PandocMonad m => WriterOptions -> Text -> Style -> Maybe ListAttributes -> [[Block]] -> WS m (Doc Text) listItemsToICML _ _ _ _ [] = return empty listItemsToICML opts listType style attribs (first:rest) = do st <- get @@ -397,7 +398,7 @@ listItemToICML opts style isFirst attribs item = doN UpperAlpha = [upperAlphaName] doN _ = [] bw = - [beginsWithName ++ show beginsWith | beginsWith > 1] + [beginsWithName <> tshow beginsWith | beginsWith > 1] in doN numbStl ++ bw makeNumbStart Nothing = [] stl = if isFirst @@ -426,7 +427,7 @@ inlinesToICML opts style lst = vcat `fmap` mapM (inlineToICML opts style) (merge -- | Convert an inline element to ICML. inlineToICML :: PandocMonad m => WriterOptions -> Style -> Inline -> WS m (Doc Text) -inlineToICML _ style (Str str) = charStyle style $ text $ escapeStringForXML str +inlineToICML _ style (Str str) = charStyle style $ literal $ escapeStringForXML str inlineToICML opts style (Emph lst) = inlinesToICML opts (emphName:style) lst inlineToICML opts style (Strong lst) = inlinesToICML opts (strongName:style) lst inlineToICML opts style (Strikeout lst) = inlinesToICML opts (strikeoutName:style) lst @@ -438,19 +439,19 @@ inlineToICML opts style (Quoted SingleQuote lst) = inlinesToICML opts style $ inlineToICML opts style (Quoted DoubleQuote lst) = inlinesToICML opts style $ mergeStrings opts $ [Str "“"] ++ lst ++ [Str "”"] inlineToICML opts style (Cite _ lst) = inlinesToICML opts (citeName:style) lst -inlineToICML _ style (Code _ str) = charStyle (codeName:style) $ text $ escapeStringForXML str +inlineToICML _ style (Code _ str) = charStyle (codeName:style) $ literal $ escapeStringForXML str inlineToICML _ style Space = charStyle style space inlineToICML opts style SoftBreak = case writerWrapText opts of WrapAuto -> charStyle style space WrapNone -> charStyle style space WrapPreserve -> charStyle style cr -inlineToICML _ style LineBreak = charStyle style $ text lineSeparator +inlineToICML _ style LineBreak = charStyle style $ literal lineSeparator inlineToICML opts style (Math mt str) = lift (texMathToInlines mt str) >>= (fmap mconcat . mapM (inlineToICML opts style)) inlineToICML _ _ il@(RawInline f str) - | f == Format "icml" = return $ text str + | f == Format "icml" = return $ literal str | otherwise = do report $ InlineNotRendered il return empty @@ -462,7 +463,7 @@ inlineToICML opts style (Link _ lst (url, title)) = do else 1 + fst (head $ links st) newst = st{ links = (ident, url):links st } cont = inTags True "HyperlinkTextSource" - [("Self","htss-"++show ident), ("Name",title), ("Hidden","false")] content + [("Self","htss-"<>tshow ident), ("Name",title), ("Hidden","false")] content in (cont, newst) inlineToICML opts style (Image attr _ target) = imageICML opts style attr target inlineToICML opts style (Note lst) = footnoteToICML opts style lst @@ -492,7 +493,7 @@ mergeStrings opts = mergeStrings' . map spaceToStr _ -> Str " " spaceToStr x = x - mergeStrings' (Str x : Str y : zs) = mergeStrings' (Str (x ++ y) : zs) + mergeStrings' (Str x : Str y : zs) = mergeStrings' (Str (x <> y) : zs) mergeStrings' (x : xs) = x : mergeStrings' xs mergeStrings' [] = [] @@ -503,20 +504,21 @@ intersperseBrs = vcat . intersperse (selfClosingTag "Br" []) . filter (not . isE -- | Wrap a list of inline elements in an ICML Paragraph Style parStyle :: PandocMonad m => WriterOptions -> Style -> [Inline] -> WS m (Doc Text) parStyle opts style lst = - let slipIn x y = if null y + let slipIn x y = if Text.null y then x - else x ++ " > " ++ y - stlStr = foldr slipIn [] $ reverse style - stl = if null stlStr + else x <> " > " <> y + stlStr = foldr slipIn "" $ reverse style + stl = if Text.null stlStr then "" - else "ParagraphStyle/" ++ stlStr + else "ParagraphStyle/" <> stlStr attrs = ("AppliedParagraphStyle", stl) attrs' = if firstListItemName `elem` style then let ats = attrs : [("NumberingContinue", "false")] - begins = filter (isPrefixOf beginsWithName) style + begins = filter (Text.isPrefixOf beginsWithName) style in if null begins then ats - else let i = fromMaybe "" $ stripPrefix beginsWithName $ head begins + else let i = fromMaybe "" $ Text.stripPrefix beginsWithName + $ head begins in ("NumberingStartAt", i) : ats else [attrs] in do @@ -531,18 +533,18 @@ charStyle style content = doc = inTags True "CharacterStyleRange" attrs $ inTagsSimple "Content" $ flush content in state $ \st -> - let styles = if null stlStr + let styles = if Text.null stlStr then st else st{ inlineStyles = Set.insert stlStr $ inlineStyles st } in (doc, styles) -- | Transform a Style to a tuple of String (eliminating duplicates and ordered) and corresponding attribute. -styleToStrAttr :: Style -> (String, [(String, String)]) +styleToStrAttr :: Style -> (Text, [(Text, Text)]) styleToStrAttr style = - let stlStr = unwords $ Set.toAscList $ Set.fromList style + let stlStr = Text.unwords $ Set.toAscList $ Set.fromList style stl = if null style then "$ID/NormalCharacterStyle" - else "CharacterStyle/" ++ stlStr + else "CharacterStyle/" <> stlStr attrs = [("AppliedCharacterStyle", stl)] in (stlStr, attrs) @@ -557,35 +559,35 @@ imageICML opts style attr (src, _) = do report $ CouldNotDetermineImageSize src msg return def) (\e -> do - report $ CouldNotFetchResource src (show e) + report $ CouldNotFetchResource src $ tshow e return def) let (ow, oh) = sizeInPoints imgS (imgWidth, imgHeight) = desiredSizeInPoints opts attr imgS hw = showFl $ ow / 2 hh = showFl $ oh / 2 - scale = showFl (imgWidth / ow) ++ " 0 0 " ++ showFl (imgHeight / oh) - src' = if isURI src then src else "file:" ++ src + scale = showFl (imgWidth / ow) <> " 0 0 " <> showFl (imgHeight / oh) + src' = if isURI src then src else "file:" <> src (stlStr, attrs) = styleToStrAttr style props = inTags True "Properties" [] $ inTags True "PathGeometry" [] $ inTags True "GeometryPathType" [("PathOpen","false")] $ inTags True "PathPointArray" [] $ vcat [ - selfClosingTag "PathPointType" [("Anchor", "-"++hw++" -"++hh), - ("LeftDirection", "-"++hw++" -"++hh), ("RightDirection", "-"++hw++" -"++hh)] - , selfClosingTag "PathPointType" [("Anchor", "-"++hw++" "++hh), - ("LeftDirection", "-"++hw++" "++hh), ("RightDirection", "-"++hw++" "++hh)] - , selfClosingTag "PathPointType" [("Anchor", hw++" "++hh), - ("LeftDirection", hw++" "++hh), ("RightDirection", hw++" "++hh)] - , selfClosingTag "PathPointType" [("Anchor", hw++" -"++hh), - ("LeftDirection", hw++" -"++hh), ("RightDirection", hw++" -"++hh)] + selfClosingTag "PathPointType" [("Anchor", "-"<>hw<>" -"<>hh), + ("LeftDirection", "-"<>hw<>" -"<>hh), ("RightDirection", "-"<>hw<>" -"<>hh)] + , selfClosingTag "PathPointType" [("Anchor", "-"<>hw<>" "<>hh), + ("LeftDirection", "-"<>hw<>" "<>hh), ("RightDirection", "-"<>hw<>" "<>hh)] + , selfClosingTag "PathPointType" [("Anchor", hw<>" "<>hh), + ("LeftDirection", hw<>" "<>hh), ("RightDirection", hw<>" "<>hh)] + , selfClosingTag "PathPointType" [("Anchor", hw<>" -"<>hh), + ("LeftDirection", hw<>" -"<>hh), ("RightDirection", hw<>" -"<>hh)] ] image = inTags True "Image" - [("Self","ue6"), ("ItemTransform", scale++" -"++hw++" -"++hh)] + [("Self","ue6"), ("ItemTransform", scale<>" -"<>hw<>" -"<>hh)] $ vcat [ inTags True "Properties" [] $ inTags True "Profile" [("type","string")] $ text "$ID/Embedded" , selfClosingTag "Link" [("Self", "ueb"), ("LinkResourceURI", src')] ] doc = inTags True "CharacterStyleRange" attrs $ inTags True "Rectangle" [("Self","uec"), ("StrokeWeight", "0"), - ("ItemTransform", scale++" "++hw++" -"++hh)] (props $$ image) + ("ItemTransform", scale<>" "<>hw<>" -"<>hh)] (props $$ image) state $ \st -> (doc, st{ inlineStyles = Set.insert stlStr $ inlineStyles st } ) diff --git a/src/Text/Pandoc/Writers/Ipynb.hs b/src/Text/Pandoc/Writers/Ipynb.hs index c58afed9d..75d3d8f9b 100644 --- a/src/Text/Pandoc/Writers/Ipynb.hs +++ b/src/Text/Pandoc/Writers/Ipynb.hs @@ -19,7 +19,6 @@ where import Prelude import Control.Monad.State import qualified Data.Map as M -import Data.Char (toLower) import Data.Maybe (catMaybes, fromMaybe) import Text.Pandoc.Options import Text.Pandoc.Definition @@ -30,6 +29,7 @@ import Text.Pandoc.Class import Text.Pandoc.Logging import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.Lazy as TL import Data.Aeson as Aeson import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Shared (safeRead, isURI) @@ -94,8 +94,8 @@ addAttachment :: PandocMonad m addAttachment (Image attr lab (src,tit)) | not (isURI src) = do (img, mbmt) <- fetchItem src - let mt = maybe "application/octet-stream" (T.pack) mbmt - modify $ M.insert (T.pack src) + let mt = fromMaybe "application/octet-stream" mbmt + modify $ M.insert src (MimeBundle (M.insert mt (BinaryData img) mempty)) return $ Image attr lab ("attachment:" <> src, tit) addAttachment x = return x @@ -121,7 +121,7 @@ extractCells opts (Div (_id,classes,kvs) xs : bs) , "code" `elem` classes = do let (codeContent, rest) = case xs of - (CodeBlock _ t : ys) -> (T.pack t, ys) + (CodeBlock _ t : ys) -> (t, ys) ys -> (mempty, ys) let meta = pairsToJSONMeta kvs outputs <- catMaybes <$> mapM blockToOutput rest @@ -139,7 +139,7 @@ extractCells opts (Div (_id,classes,kvs) xs : bs) case consolidateAdjacentRawBlocks xs of [RawBlock (Format f) raw] -> do let format' = - case map toLower f of + case T.toLower f of "html" -> "text/html" "revealjs" -> "text/html" "latex" -> "text/latex" @@ -148,11 +148,11 @@ extractCells opts (Div (_id,classes,kvs) xs : bs) _ -> f (Cell{ cellType = Raw - , cellSource = Source $ breakLines $ T.pack raw + , cellSource = Source $ breakLines raw , cellMetadata = if format' == "ipynb" -- means no format given then mempty else M.insert "format" - (Aeson.String $ T.pack format') mempty + (Aeson.String format') mempty , cellAttachments = Nothing } :) <$> extractCells opts bs _ -> extractCells opts bs extractCells opts (CodeBlock (_id,classes,kvs) raw : bs) @@ -164,7 +164,7 @@ extractCells opts (CodeBlock (_id,classes,kvs) raw : bs) codeExecutionCount = exeCount , codeOutputs = [] } - , cellSource = Source $ breakLines $ T.pack raw + , cellSource = Source $ breakLines raw , cellMetadata = meta , cellAttachments = Nothing } :) <$> extractCells opts bs extractCells opts (b:bs) = do @@ -177,13 +177,13 @@ extractCells opts (b:bs) = do blockToOutput :: PandocMonad m => Block -> m (Maybe (Output a)) blockToOutput (Div (_,["output","stream",sname],_) (CodeBlock _ t:_)) = return $ Just - $ Stream{ streamName = T.pack sname - , streamText = Source (breakLines $ T.pack t) } + $ Stream{ streamName = sname + , streamText = Source (breakLines t) } blockToOutput (Div (_,["output","error"],kvs) (CodeBlock _ t:_)) = return $ Just - $ Err{ errName = maybe mempty T.pack (lookup "ename" kvs) - , errValue = maybe mempty T.pack (lookup "evalue" kvs) - , errTraceback = breakLines $ T.pack t } + $ Err{ errName = fromMaybe mempty (lookup "ename" kvs) + , errValue = fromMaybe mempty (lookup "evalue" kvs) + , errTraceback = breakLines t } blockToOutput (Div (_,["output","execute_result"],kvs) bs) = do (data', metadata') <- extractData bs return $ Just @@ -207,28 +207,28 @@ extractData bs = do (img, mbmt) <- fetchItem src case mbmt of Just mt -> return - (M.insert (T.pack mt) (BinaryData img) mmap, + (M.insert mt (BinaryData img) mmap, meta <> pairsToJSONMeta kvs) Nothing -> (mmap, meta) <$ report (BlockNotRendered b) go (mmap, meta) b@(CodeBlock (_,["json"],_) code) = - case decode (UTF8.fromStringLazy code) of + case decode (UTF8.fromTextLazy $ TL.fromStrict code) of Just v -> return (M.insert "application/json" (JsonData v) mmap, meta) Nothing -> (mmap, meta) <$ report (BlockNotRendered b) go (mmap, meta) (CodeBlock ("",[],[]) code) = - return (M.insert "text/plain" (TextualData (T.pack code)) mmap, meta) + return (M.insert "text/plain" (TextualData code) mmap, meta) go (mmap, meta) (RawBlock (Format "html") raw) = - return (M.insert "text/html" (TextualData (T.pack raw)) mmap, meta) + return (M.insert "text/html" (TextualData raw) mmap, meta) go (mmap, meta) (RawBlock (Format "latex") raw) = - return (M.insert "text/latex" (TextualData (T.pack raw)) mmap, meta) + return (M.insert "text/latex" (TextualData raw) mmap, meta) go (mmap, meta) (Div _ bs') = foldM go (mmap, meta) bs' go (mmap, meta) b = (mmap, meta) <$ report (BlockNotRendered b) -pairsToJSONMeta :: [(String, String)] -> JSONMeta +pairsToJSONMeta :: [(Text, Text)] -> JSONMeta pairsToJSONMeta kvs = - M.fromList [(T.pack k, case Aeson.decode (UTF8.fromStringLazy v) of + M.fromList [(k, case Aeson.decode (UTF8.fromTextLazy $ TL.fromStrict v) of Just val -> val - Nothing -> String (T.pack v)) + Nothing -> String v) | (k,v) <- kvs , k /= "execution_count" ] diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs index 44ddba9a0..14df21ea8 100644 --- a/src/Text/Pandoc/Writers/JATS.hs +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -1,6 +1,7 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.JATS Copyright : Copyright (C) 2017-2019 John MacFarlane @@ -18,9 +19,8 @@ module Text.Pandoc.Writers.JATS ( writeJATS ) where import Prelude import Control.Monad.Reader import Control.Monad.State -import Data.Char (toLower) import Data.Generics (everywhere, mkT) -import Data.List (partition, isPrefixOf) +import Data.List (partition) import qualified Data.Map as M import Data.Maybe (fromMaybe) import Data.Time (toGregorian, Day, parseTimeM, defaultTimeLocale, formatTime) @@ -88,7 +88,7 @@ docToJATS opts (Pandoc meta blocks) = do case getField "date" metadata of Nothing -> NullVal Just (SimpleVal (x :: Doc Text)) -> - case parseDate (T.unpack $ render Nothing x) of + case parseDate (render Nothing x) of Nothing -> NullVal Just day -> let (y,m,d) = toGregorian day @@ -158,7 +158,7 @@ deflistItemToJATS opts term defs = do -- | Convert a list of lists of blocks to a list of JATS list items. listItemsToJATS :: PandocMonad m - => WriterOptions -> Maybe [String] -> [[Block]] -> JATS m (Doc Text) + => WriterOptions -> Maybe [Text] -> [[Block]] -> JATS m (Doc Text) listItemsToJATS opts markers items = case markers of Nothing -> vcat <$> mapM (listItemToJATS opts Nothing) items @@ -166,41 +166,41 @@ listItemsToJATS opts markers items = -- | Convert a list of blocks into a JATS list item. listItemToJATS :: PandocMonad m - => WriterOptions -> Maybe String -> [Block] -> JATS m (Doc Text) + => WriterOptions -> Maybe Text -> [Block] -> JATS m (Doc Text) listItemToJATS opts mbmarker item = do contents <- wrappedBlocksToJATS (not . isParaOrList) opts (walk demoteHeaderAndRefs item) return $ inTagsIndented "list-item" $ - maybe empty (\lbl -> inTagsSimple "label" (text lbl)) mbmarker + maybe empty (\lbl -> inTagsSimple "label" (text $ T.unpack lbl)) mbmarker $$ contents -imageMimeType :: String -> [(String, String)] -> (String, String) +imageMimeType :: Text -> [(Text, Text)] -> (Text, Text) imageMimeType src kvs = - let mbMT = getMimeType src + let mbMT = getMimeType (T.unpack src) maintype = fromMaybe "image" $ lookup "mimetype" kvs `mplus` - (takeWhile (/='/') <$> mbMT) + (T.takeWhile (/='/') <$> mbMT) subtype = fromMaybe "" $ lookup "mime-subtype" kvs `mplus` - ((drop 1 . dropWhile (/='/')) <$> mbMT) + ((T.drop 1 . T.dropWhile (/='/')) <$> mbMT) in (maintype, subtype) -languageFor :: [String] -> String +languageFor :: [Text] -> Text languageFor classes = case langs of (l:_) -> escapeStringForXML l [] -> "" - where isLang l = map toLower l `elem` map (map toLower) languages + where isLang l = T.toLower l `elem` map T.toLower languages langsFrom s = if isLang s then [s] - else languagesByExtension . map toLower $ s + else languagesByExtension . T.toLower $ s langs = concatMap langsFrom classes -codeAttr :: Attr -> (String, [(String, String)]) +codeAttr :: Attr -> (Text, [(Text, Text)]) codeAttr (ident,classes,kvs) = (lang, attr) where - attr = [("id",ident) | not (null ident)] ++ - [("language",lang) | not (null lang)] ++ + attr = [("id",ident) | not (T.null ident)] ++ + [("language",lang) | not (T.null lang)] ++ [(k,v) | (k,v) <- kvs, k `elem` ["code-type", "code-version", "executable", "language-version", "orientation", @@ -211,7 +211,7 @@ codeAttr (ident,classes,kvs) = (lang, attr) blockToJATS :: PandocMonad m => WriterOptions -> Block -> JATS m (Doc Text) blockToJATS _ Null = return empty blockToJATS opts (Div (id',"section":_,kvs) (Header _lvl _ ils : xs)) = do - let idAttr = [("id", writerIdentifierPrefix opts ++ id') | not (null id')] + let idAttr = [("id", writerIdentifierPrefix opts <> id') | not (T.null id')] let otherAttrs = ["sec-type", "specific-use"] let attribs = idAttr ++ [(k,v) | (k,v) <- kvs, k `elem` otherAttrs] title' <- inlinesToJATS opts ils @@ -219,21 +219,21 @@ blockToJATS opts (Div (id',"section":_,kvs) (Header _lvl _ ils : xs)) = do return $ inTags True "sec" attribs $ inTagsSimple "title" title' $$ contents -- Bibliography reference: -blockToJATS opts (Div ('r':'e':'f':'-':_,_,_) [Para lst]) = +blockToJATS opts (Div (T.stripPrefix "ref-" -> Just _,_,_) [Para lst]) = inlinesToJATS opts lst blockToJATS opts (Div ("refs",_,_) xs) = do contents <- blocksToJATS opts xs return $ inTagsIndented "ref-list" contents blockToJATS opts (Div (ident,[cls],kvs) bs) | cls `elem` ["fig", "caption", "table-wrap"] = do contents <- blocksToJATS opts bs - let attr = [("id", ident) | not (null ident)] ++ + let attr = [("id", ident) | not (T.null ident)] ++ [("xml:lang",l) | ("lang",l) <- kvs] ++ [(k,v) | (k,v) <- kvs, k `elem` ["specific-use", "content-type", "orientation", "position"]] return $ inTags True cls attr contents blockToJATS opts (Div (ident,_,kvs) bs) = do contents <- blocksToJATS opts bs - let attr = [("id", ident) | not (null ident)] ++ + let attr = [("id", ident) | not (T.null ident)] ++ [("xml:lang",l) | ("lang",l) <- kvs] ++ [(k,v) | (k,v) <- kvs, k `elem` ["specific-use", "content-type", "orientation", "position"]] @@ -245,13 +245,13 @@ blockToJATS opts (Header _ _ title) = do blockToJATS opts (Plain lst) = blockToJATS opts (Para lst) -- title beginning with fig: indicates that the image is a figure blockToJATS opts (Para [Image (ident,_,kvs) txt - (src,'f':'i':'g':':':tit)]) = do + (src,T.stripPrefix "fig:" -> Just tit)]) = do alt <- inlinesToJATS opts txt let (maintype, subtype) = imageMimeType src kvs let capt = if null txt then empty else inTagsSimple "caption" $ inTagsSimple "p" alt - let attr = [("id", ident) | not (null ident)] ++ + let attr = [("id", ident) | not (T.null ident)] ++ [(k,v) | (k,v) <- kvs, k `elem` ["fig-type", "orientation", "position", "specific-use"]] let graphicattr = [("mimetype",maintype), @@ -262,11 +262,11 @@ blockToJATS opts (Para [Image (ident,_,kvs) txt capt $$ selfClosingTag "graphic" graphicattr blockToJATS _ (Para [Image (ident,_,kvs) _ (src, tit)]) = do let (maintype, subtype) = imageMimeType src kvs - let attr = [("id", ident) | not (null ident)] ++ + let attr = [("id", ident) | not (T.null ident)] ++ [("mimetype", maintype), ("mime-subtype", subtype), ("xlink:href", src)] ++ - [("xlink:title", tit) | not (null tit)] ++ + [("xlink:title", tit) | not (T.null tit)] ++ [(k,v) | (k,v) <- kvs, k `elem` ["baseline-shift", "content-type", "specific-use", "xlink:actuate", "xlink:href", "xlink:role", "xlink:show", @@ -279,9 +279,9 @@ blockToJATS opts (LineBlock lns) = blockToJATS opts (BlockQuote blocks) = inTagsIndented "disp-quote" <$> blocksToJATS opts blocks blockToJATS _ (CodeBlock a str) = return $ - inTags False tag attr (flush (text (escapeStringForXML str))) + inTags False tag attr (flush (text (T.unpack $ escapeStringForXML str))) where (lang, attr) = codeAttr a - tag = if null lang then "preformat" else "code" + tag = if T.null lang then "preformat" else "code" blockToJATS _ (BulletList []) = return empty blockToJATS opts (BulletList lst) = inTags True "list" [("list-type", "bullet")] <$> @@ -307,16 +307,16 @@ blockToJATS opts (OrderedList (start, numstyle, delimstyle) items) = do blockToJATS opts (DefinitionList lst) = inTags True "def-list" [] <$> deflistItemsToJATS opts lst blockToJATS _ b@(RawBlock f str) - | f == "jats" = return $ text str -- raw XML block + | f == "jats" = return $ text $ T.unpack str -- raw XML block | otherwise = do report $ BlockNotRendered b return empty blockToJATS _ HorizontalRule = return empty -- not semantic blockToJATS opts (Table [] aligns widths headers rows) = do - let percent w = show (truncate (100*w) :: Integer) ++ "*" + let percent w = tshow (truncate (100*w) :: Integer) <> "*" let coltags = vcat $ zipWith (\w al -> selfClosingTag "col" ([("width", percent w) | w > 0] ++ - [("align", alignmentToString al)])) widths aligns + [("align", alignmentToText al)])) widths aligns thead <- if all null headers then return empty else inTagsIndented "thead" <$> tableRowToJATS opts True headers @@ -328,8 +328,8 @@ blockToJATS opts (Table caption aligns widths headers rows) = do tbl <- blockToJATS opts (Table [] aligns widths headers rows) return $ inTags True "table-wrap" [] $ captionDoc $$ tbl -alignmentToString :: Alignment -> [Char] -alignmentToString alignment = case alignment of +alignmentToText :: Alignment -> Text +alignmentToText alignment = case alignment of AlignLeft -> "left" AlignRight -> "right" AlignCenter -> "center" @@ -364,7 +364,7 @@ inlinesToJATS opts lst = hcat <$> mapM (inlineToJATS opts) (fixCitations lst) x : Str (stringify ys) : fixCitations zs where needsFixing (RawInline (Format "jats") z) = - "<pub-id pub-id-type=" `isPrefixOf` z + "<pub-id pub-id-type=" `T.isPrefixOf` z needsFixing _ = False isRawInline (RawInline{}) = True isRawInline _ = False @@ -373,7 +373,7 @@ inlinesToJATS opts lst = hcat <$> mapM (inlineToJATS opts) (fixCitations lst) -- | Convert an inline element to JATS. inlineToJATS :: PandocMonad m => WriterOptions -> Inline -> JATS m (Doc Text) -inlineToJATS _ (Str str) = return $ text $ escapeStringForXML str +inlineToJATS _ (Str str) = return $ text $ T.unpack $ escapeStringForXML str inlineToJATS opts (Emph lst) = inTagsSimple "italic" <$> inlinesToJATS opts lst inlineToJATS opts (Strong lst) = @@ -393,11 +393,11 @@ inlineToJATS opts (Quoted DoubleQuote lst) = do contents <- inlinesToJATS opts lst return $ char '“' <> contents <> char '”' inlineToJATS _ (Code a str) = - return $ inTags False tag attr $ text (escapeStringForXML str) + return $ inTags False tag attr $ literal (escapeStringForXML str) where (lang, attr) = codeAttr a - tag = if null lang then "monospace" else "code" + tag = if T.null lang then "monospace" else "code" inlineToJATS _ il@(RawInline f x) - | f == "jats" = return $ text x + | f == "jats" = return $ literal x | otherwise = do report $ InlineNotRendered il return empty @@ -412,12 +412,12 @@ inlineToJATS opts (Note contents) = do let notenum = case notes of (n, _):_ -> n + 1 [] -> 1 - thenote <- inTags True "fn" [("id","fn" ++ show notenum)] + thenote <- inTags True "fn" [("id","fn" <> tshow notenum)] <$> wrappedBlocksToJATS (not . isPara) opts (walk demoteHeaderAndRefs contents) modify $ \st -> st{ jatsNotes = (notenum, thenote) : notes } return $ inTags False "xref" [("ref-type", "fn"), - ("rid", "fn" ++ show notenum)] + ("rid", "fn" <> tshow notenum)] $ text (show notenum) inlineToJATS opts (Cite _ lst) = -- TODO revisit this after examining the jats.csl pipeline @@ -425,7 +425,7 @@ inlineToJATS opts (Cite _ lst) = inlineToJATS opts (Span ("",_,[]) ils) = inlinesToJATS opts ils inlineToJATS opts (Span (ident,_,kvs) ils) = do contents <- inlinesToJATS opts ils - let attr = [("id",ident) | not (null ident)] ++ + let attr = [("id",ident) | not (T.null ident)] ++ [("xml:lang",l) | ("lang",l) <- kvs] ++ [(k,v) | (k,v) <- kvs , k `elem` ["content-type", "rationale", @@ -447,7 +447,7 @@ inlineToJATS _ (Math t str) = do InlineMath -> "inline-formula" let rawtex = inTagsSimple "tex-math" $ text "<![CDATA[" <> - text str <> + literal str <> text "]]>" return $ inTagsSimple tagtype $ case res of @@ -455,11 +455,11 @@ inlineToJATS _ (Math t str) = do cr <> rawtex $$ text (Xml.ppcElement conf $ fixNS r) Left _ -> rawtex -inlineToJATS _ (Link _attr [Str t] ('m':'a':'i':'l':'t':'o':':':email, _)) +inlineToJATS _ (Link _attr [Str t] (T.stripPrefix "mailto:" -> Just email, _)) | escapeURI t == email = - return $ inTagsSimple "email" $ text (escapeStringForXML email) -inlineToJATS opts (Link (ident,_,kvs) txt ('#':src, _)) = do - let attr = [("id", ident) | not (null ident)] ++ + return $ inTagsSimple "email" $ literal (escapeStringForXML email) +inlineToJATS opts (Link (ident,_,kvs) txt (T.uncons -> Just ('#', src), _)) = do + let attr = [("id", ident) | not (T.null ident)] ++ [("alt", stringify txt) | not (null txt)] ++ [("rid", src)] ++ [(k,v) | (k,v) <- kvs, k `elem` ["ref-type", "specific-use"]] @@ -469,10 +469,10 @@ inlineToJATS opts (Link (ident,_,kvs) txt ('#':src, _)) = do contents <- inlinesToJATS opts txt return $ inTags False "xref" attr contents inlineToJATS opts (Link (ident,_,kvs) txt (src, tit)) = do - let attr = [("id", ident) | not (null ident)] ++ + let attr = [("id", ident) | not (T.null ident)] ++ [("ext-link-type", "uri"), ("xlink:href", src)] ++ - [("xlink:title", tit) | not (null tit)] ++ + [("xlink:title", tit) | not (T.null tit)] ++ [(k,v) | (k,v) <- kvs, k `elem` ["assigning-authority", "specific-use", "xlink:actuate", "xlink:role", "xlink:show", @@ -480,18 +480,18 @@ inlineToJATS opts (Link (ident,_,kvs) txt (src, tit)) = do contents <- inlinesToJATS opts txt return $ inTags False "ext-link" attr contents inlineToJATS _ (Image (ident,_,kvs) _ (src, tit)) = do - let mbMT = getMimeType src + let mbMT = getMimeType (T.unpack src) let maintype = fromMaybe "image" $ lookup "mimetype" kvs `mplus` - (takeWhile (/='/') <$> mbMT) + (T.takeWhile (/='/') <$> mbMT) let subtype = fromMaybe "" $ lookup "mime-subtype" kvs `mplus` - ((drop 1 . dropWhile (/='/')) <$> mbMT) - let attr = [("id", ident) | not (null ident)] ++ + ((T.drop 1 . T.dropWhile (/='/')) <$> mbMT) + let attr = [("id", ident) | not (T.null ident)] ++ [("mimetype", maintype), ("mime-subtype", subtype), ("xlink:href", src)] ++ - [("xlink:title", tit) | not (null tit)] ++ + [("xlink:title", tit) | not (T.null tit)] ++ [(k,v) | (k,v) <- kvs, k `elem` ["baseline-shift", "content-type", "specific-use", "xlink:actuate", "xlink:href", "xlink:role", "xlink:show", @@ -517,8 +517,8 @@ demoteHeaderAndRefs (Div ("refs",cls,kvs) bs) = Div ("",cls,kvs) bs demoteHeaderAndRefs x = x -parseDate :: String -> Maybe Day -parseDate s = msum (map (\fs -> parsetimeWith fs s) formats) :: Maybe Day +parseDate :: Text -> Maybe Day +parseDate s = msum (map (\fs -> parsetimeWith fs $ T.unpack s) formats) :: Maybe Day where parsetimeWith = parseTimeM True defaultTimeLocale formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y", "%e %B %Y", "%b. %e, %Y", "%B %e, %Y", diff --git a/src/Text/Pandoc/Writers/Jira.hs b/src/Text/Pandoc/Writers/Jira.hs index b610dd8bf..d26dae4c7 100644 --- a/src/Text/Pandoc/Writers/Jira.hs +++ b/src/Text/Pandoc/Writers/Jira.hs @@ -17,7 +17,6 @@ JIRA: module Text.Pandoc.Writers.Jira ( writeJira ) where import Prelude import Control.Monad.State.Strict -import Data.Char (toLower) import Data.Foldable (find) import Data.Text (Text, pack) import Text.Pandoc.Class (PandocMonad, report) @@ -97,7 +96,7 @@ anchor :: Attr -> Text anchor (ident,_,_) = if ident == "" then "" - else "{anchor:" <> pack ident <> "}" + else "{anchor:" <> ident <> "}" -- | Append a newline character unless we are in a list. appendNewlineUnlessInList :: PandocMonad m @@ -130,7 +129,7 @@ blockToJira opts (LineBlock lns) = blockToJira _ b@(RawBlock f str) = if f == Format "jira" - then return (pack str) + then return str else "" <$ report (BlockNotRendered b) blockToJira _ HorizontalRule = return "----\n" @@ -141,14 +140,14 @@ blockToJira opts (Header level attr inlines) = do return $ prefix <> anchor attr <> contents <> "\n" blockToJira _ (CodeBlock attr@(_,classes,_) str) = do - let lang = find (\c -> map toLower c `elem` knownLanguages) classes + let lang = find (\c -> T.toLower c `elem` knownLanguages) classes let start = case lang of Nothing -> "{code}" - Just l -> "{code:" <> pack l <> "}" + Just l -> "{code:" <> l <> "}" let anchorMacro = anchor attr appendNewlineUnlessInList . T.intercalate "\n" $ (if anchorMacro == "" then id else (anchorMacro :)) - [start, pack str, "{code}"] + [start, str, "{code}"] blockToJira opts (BlockQuote [p@(Para _)]) = do contents <- blockToJira opts p @@ -274,9 +273,9 @@ inlineToJira opts (Quoted DoubleQuote lst) = do inlineToJira opts (Cite _ lst) = inlineListToJira opts lst inlineToJira _ (Code attr str) = - return (anchor attr <> "{{" <> pack str <> "}}") + return (anchor attr <> "{{" <> str <> "}}") -inlineToJira _ (Str str) = return $ escapeStringForJira (pack str) +inlineToJira _ (Str str) = return $ escapeStringForJira str inlineToJira opts (Math InlineMath str) = lift (texMathToInlines InlineMath str) >>= inlineListToJira opts @@ -288,7 +287,7 @@ inlineToJira opts (Math DisplayMath str) = do inlineToJira _opts il@(RawInline f str) = if f == Format "jira" - then return (pack str) + then return str else "" <$ report (InlineNotRendered il) inlineToJira _ LineBreak = return "\n" @@ -302,12 +301,12 @@ inlineToJira opts (Link _attr txt (src, _title)) = do return $ T.concat [ "[" , if null txt then "" else linkText <> "|" - , pack src + , src , "]" ] inlineToJira _opts (Image attr _alt (src, _title)) = - return . T.concat $ [anchor attr, "!", pack src, "!"] + return . T.concat $ [anchor attr, "!", src, "!"] inlineToJira opts (Note contents) = do curNotes <- gets stNotes @@ -318,7 +317,7 @@ inlineToJira opts (Note contents) = do return $ "[" <> pack (show newnum) <> "]" -- | Language codes recognized by jira -knownLanguages :: [String] +knownLanguages :: [Text] knownLanguages = [ "actionscript", "ada", "applescript", "bash", "c", "c#", "c++" , "css", "erlang", "go", "groovy", "haskell", "html", "javascript" diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index f56b3a657..8b46edfef 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -2,6 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.LaTeX Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -22,9 +23,8 @@ import Control.Applicative ((<|>)) import Control.Monad.State.Strict import Data.Monoid (Any(..)) import Data.Char (isAlphaNum, isAscii, isDigit, isLetter, isSpace, - isPunctuation, ord, toLower) -import Data.List (foldl', intercalate, intersperse, nubBy, - stripPrefix, (\\), uncons) + isPunctuation, ord) +import Data.List (foldl', intersperse, nubBy, (\\), uncons) import Data.Maybe (catMaybes, fromMaybe, isJust, mapMaybe, isNothing) import qualified Data.Map as M import Data.Text (Text) @@ -70,7 +70,7 @@ data WriterState = , stCsquotes :: Bool -- true if document uses csquotes , stHighlighting :: Bool -- true if document has highlighted code , stIncremental :: Bool -- true if beamer lists should be displayed bit by bit - , stInternalLinks :: [String] -- list of internal link targets + , stInternalLinks :: [Text] -- list of internal link targets , stBeamer :: Bool -- produce beamer , stEmptyLine :: Bool -- true if no content on line , stHasCslRefs :: Bool -- has a Div with class refs @@ -132,8 +132,9 @@ pandocToLaTeX options (Pandoc meta blocks) = do _ -> blocks else blocks -- see if there are internal links - let isInternalLink (Link _ _ ('#':xs,_)) = [xs] - isInternalLink _ = [] + let isInternalLink (Link _ _ (s,_)) + | Just ('#', xs) <- T.uncons s = [xs] + isInternalLink _ = [] modify $ \s -> s{ stInternalLinks = query isInternalLink blocks' } let colwidth = if writerWrapText options == WrapAuto then Just $ writerColumns options @@ -149,7 +150,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do let documentClass = case (lookupContext "documentclass" (writerVariables options)) `mplus` - (T.pack . stringify <$> lookupMeta "documentclass" meta) of + (stringify <$> lookupMeta "documentclass" meta) of Just x -> x Nothing | beamer -> "beamer" | otherwise -> case writerTopLevelDivision options of @@ -188,8 +189,8 @@ pandocToLaTeX options (Pandoc meta blocks) = do ] let toPolyObj :: Lang -> Val Text toPolyObj lang = MapVal $ Context $ - M.fromList [ ("name" , SimpleVal $ text name) - , ("options" , SimpleVal $ text opts) ] + M.fromList [ ("name" , SimpleVal $ literal name) + , ("options" , SimpleVal $ literal opts) ] where (name, opts) = toPolyglossia lang mblang <- toLang $ case getLang options meta of @@ -201,15 +202,15 @@ pandocToLaTeX options (Pandoc meta blocks) = do let dirs = query (extract "dir") blocks let context = defField "toc" (writerTableOfContents options) $ - defField "toc-depth" (T.pack . show $ + defField "toc-depth" (tshow $ (writerTOCDepth options - if stHasChapters st then 1 else 0)) $ defField "body" main $ - defField "title-meta" (T.pack titleMeta) $ + defField "title-meta" titleMeta $ defField "author-meta" - (T.pack $ intercalate "; " authorsMeta) $ + (T.intercalate "; " authorsMeta) $ defField "documentclass" documentClass $ defField "verbatim-in-note" (stVerbInNote st) $ defField "tables" (stTable st) $ @@ -245,42 +246,42 @@ pandocToLaTeX options (Pandoc meta blocks) = do defField "csl-refs" (stHasCslRefs st) $ defField "csl-hanging-indent" (stCslHangingIndent st) $ defField "geometry" geometryFromMargins $ - (case T.unpack . render Nothing <$> + (case T.uncons . render Nothing <$> getField "papersize" metadata of - -- uppercase a4, a5, etc. - Just (('A':d:ds) :: String) - | all isDigit (d:ds) -> resetField "papersize" - (T.pack ('a':d:ds)) - _ -> id) + -- uppercase a4, a5, etc. + Just (Just ('A', ds)) + | not (T.null ds) && T.all isDigit ds + -> resetField "papersize" ("a" <> ds) + _ -> id) metadata let context' = -- note: lang is used in some conditionals in the template, -- so we need to set it if we have any babel/polyglossia: maybe id (\l -> defField "lang" - ((text $ renderLang l) :: Doc Text)) mblang + (literal $ renderLang l)) mblang $ maybe id (\l -> defField "babel-lang" - ((text $ toBabel l) :: Doc Text)) mblang + (literal $ toBabel l)) mblang $ defField "babel-otherlangs" - (map ((text . toBabel) :: Lang -> Doc Text) docLangs) + (map (literal . toBabel) docLangs) $ defField "babel-newcommands" (vcat $ - map (\(poly, babel) -> (text :: String -> Doc Text) $ + map (\(poly, babel) -> literal $ -- \textspanish and \textgalician are already used by babel -- save them as \oritext... and let babel use that if poly `elem` ["spanish", "galician"] - then "\\let\\oritext" ++ poly ++ "\\text" ++ poly ++ "\n" ++ - "\\AddBabelHook{" ++ poly ++ "}{beforeextras}" ++ - "{\\renewcommand{\\text" ++ poly ++ "}{\\oritext" - ++ poly ++ "}}\n" ++ - "\\AddBabelHook{" ++ poly ++ "}{afterextras}" ++ - "{\\renewcommand{\\text" ++ poly ++ "}[2][]{\\foreignlanguage{" - ++ poly ++ "}{##2}}}" + then "\\let\\oritext" <> poly <> "\\text" <> poly <> "\n" <> + "\\AddBabelHook{" <> poly <> "}{beforeextras}" <> + "{\\renewcommand{\\text" <> poly <> "}{\\oritext" + <> poly <> "}}\n" <> + "\\AddBabelHook{" <> poly <> "}{afterextras}" <> + "{\\renewcommand{\\text" <> poly <> "}[2][]{\\foreignlanguage{" + <> poly <> "}{##2}}}" else (if poly == "latin" -- see #4161 then "\\providecommand{\\textlatin}{}\n\\renewcommand" - else "\\newcommand") ++ "{\\text" ++ poly ++ - "}[2][]{\\foreignlanguage{" ++ babel ++ "}{#2}}\n" ++ - "\\newenvironment{" ++ poly ++ - "}[2][]{\\begin{otherlanguage}{" ++ - babel ++ "}}{\\end{otherlanguage}}" + else "\\newcommand") <> "{\\text" <> poly <> + "}[2][]{\\foreignlanguage{" <> babel <> "}{#2}}\n" <> + "\\newenvironment{" <> poly <> + "}[2][]{\\begin{otherlanguage}{" <> + babel <> "}}{\\end{otherlanguage}}" ) -- eliminate duplicates that have same polyglossia name $ nubBy (\a b -> fst a == fst b) @@ -305,15 +306,16 @@ data StringContext = TextString deriving (Eq) -- escape things as needed for LaTeX -stringToLaTeX :: PandocMonad m => StringContext -> String -> LW m String +stringToLaTeX :: PandocMonad m => StringContext -> Text -> LW m Text stringToLaTeX context zs = do opts <- gets stOptions - return $ - foldr (go opts context) mempty $ + return $ T.pack $ + foldr (go opts context) mempty $ T.unpack $ if writerPreferAscii opts - then T.unpack $ Normalize.normalize Normalize.NFD $ T.pack zs + then Normalize.normalize Normalize.NFD zs else zs where + go :: WriterOptions -> StringContext -> Char -> String -> String go opts ctx x xs = let ligatures = isEnabled Ext_smart opts && ctx == TextString isUrl = ctx == URLString @@ -324,12 +326,12 @@ stringToLaTeX context zs = do emits s = case mbAccentCmd of Just cmd -> - cmd ++ "{" ++ s ++ "}" ++ drop 1 xs -- drop combining accent - Nothing -> s ++ xs + cmd <> "{" <> s <> "}" <> drop 1 xs -- drop combining accent + Nothing -> s <> xs emitc c = case mbAccentCmd of Just cmd -> - cmd ++ "{" ++ [c] ++ "}" ++ drop 1 xs -- drop combining accent + cmd <> "{" <> [c] <> "}" <> drop 1 xs -- drop combining accent Nothing -> c : xs emitcseq cs = do case xs of @@ -434,17 +436,17 @@ accents = M.fromList , ('\8413', "\\textcircled") ] -toLabel :: PandocMonad m => String -> LW m String +toLabel :: PandocMonad m => Text -> LW m Text toLabel z = go `fmap` stringToLaTeX URLString z - where go [] = "" - go (x:xs) - | (isLetter x || isDigit x) && isAscii x = x:go xs - | x `elem` ("_-+=:;." :: String) = x:go xs - | otherwise = "ux" ++ printf "%x" (ord x) ++ go xs + where + go = T.concatMap $ \x -> case x of + _ | (isLetter x || isDigit x) && isAscii x -> T.singleton x + | x `elemText` "_-+=:;." -> T.singleton x + | otherwise -> T.pack $ "ux" <> printf "%x" (ord x) -- | Puts contents into LaTeX command. -inCmd :: String -> Doc Text -> Doc Text -inCmd cmd contents = char '\\' <> text cmd <> braces contents +inCmd :: Text -> Doc Text -> Doc Text +inCmd cmd contents = char '\\' <> literal cmd <> braces contents toSlides :: PandocMonad m => [Block] -> LW m [Block] toSlides bs = do @@ -475,10 +477,10 @@ blockToLaTeX :: PandocMonad m blockToLaTeX Null = return empty blockToLaTeX (Div attr@(identifier,"block":_,_) (Header _ _ ils : bs)) = do ref <- toLabel identifier - let anchor = if null identifier + let anchor = if T.null identifier then empty else cr <> "\\protect\\hypertarget" <> - braces (text ref) <> braces empty + braces (literal ref) <> braces empty title' <- inlineListToLaTeX ils contents <- blockListToLaTeX bs wrapDiv attr $ ("\\begin{block}" <> braces title' <> anchor) $$ @@ -502,23 +504,23 @@ blockToLaTeX (Div (identifier,"slide":dclasses,dkvs) , isNothing (lookup "fragile" kvs) , "fragile" `notElem` classes] ++ [k | k <- classes, k `elem` frameoptions] ++ - [k ++ "=" ++ v | (k,v) <- kvs, k `elem` frameoptions] + [k <> "=" <> v | (k,v) <- kvs, k `elem` frameoptions] let options = if null optionslist then empty - else brackets (text (intercalate "," optionslist)) + else brackets (literal (T.intercalate "," optionslist)) slideTitle <- if ils == [Str "\0"] -- marker for hrule then return empty else braces <$> inlineListToLaTeX ils ref <- toLabel identifier - let slideAnchor = if null identifier + let slideAnchor = if T.null identifier then empty else cr <> "\\protect\\hypertarget" <> - braces (text ref) <> braces empty + braces (literal ref) <> braces empty contents <- blockListToLaTeX bs >>= wrapDiv (identifier,classes,kvs) return $ ("\\begin{frame}" <> options <> slideTitle <> slideAnchor) $$ contents $$ "\\end{frame}" -blockToLaTeX (Div (identifier@(_:_),dclasses,dkvs) +blockToLaTeX (Div (identifier@(T.uncons -> Just (_,_)),dclasses,dkvs) (Header lvl ("",hclasses,hkvs) ils : bs)) = do -- move identifier from div to header blockToLaTeX (Div ("",dclasses,dkvs) @@ -557,21 +559,23 @@ blockToLaTeX (Div (identifier,classes,kvs) bs) = do blockToLaTeX (Plain lst) = inlineListToLaTeX lst -- title beginning with fig: indicates that the image is a figure -blockToLaTeX (Para [Image attr@(ident, _, _) txt (src,'f':'i':'g':':':tit)]) = do - (capt, captForLof, footnotes) <- getCaption True txt - lab <- labelFor ident - let caption = "\\caption" <> captForLof <> braces capt <> lab - img <- inlineToLaTeX (Image attr txt (src,tit)) - innards <- hypertarget True ident $ - "\\centering" $$ img $$ caption <> cr - let figure = cr <> "\\begin{figure}" $$ innards $$ "\\end{figure}" - st <- get - return $ (if stInMinipage st - -- can't have figures in notes or minipage (here, table cell) - -- http://www.tex.ac.uk/FAQ-ouparmd.html - then cr <> "\\begin{center}" $$ img $+$ capt $$ - "\\end{center}" - else figure) $$ footnotes +blockToLaTeX (Para [Image attr@(ident, _, _) txt (src,tgt)]) + | Just tit <- T.stripPrefix "fig:" tgt + = do + (capt, captForLof, footnotes) <- getCaption True txt + lab <- labelFor ident + let caption = "\\caption" <> captForLof <> braces capt <> lab + img <- inlineToLaTeX (Image attr txt (src,tit)) + innards <- hypertarget True ident $ + "\\centering" $$ img $$ caption <> cr + let figure = cr <> "\\begin{figure}" $$ innards $$ "\\end{figure}" + st <- get + return $ (if stInMinipage st + -- can't have figures in notes or minipage (here, table cell) + -- http://www.tex.ac.uk/FAQ-ouparmd.html + then cr <> "\\begin{center}" $$ img $+$ capt $$ + "\\end{center}" + else figure) $$ footnotes -- . . . indicates pause in beamer slides blockToLaTeX (Para [Str ".",Space,Str ".",Space,Str "."]) = do beamer <- gets stBeamer @@ -606,7 +610,7 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do else linkAnchor' <> "%" let lhsCodeBlock = do modify $ \s -> s{ stLHS = True } - return $ flush (linkAnchor $$ "\\begin{code}" $$ text str $$ + return $ flush (linkAnchor $$ "\\begin{code}" $$ literal str $$ "\\end{code}") $$ cr let rawCodeBlock = do st <- get @@ -614,41 +618,41 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do then modify (\s -> s{ stVerbInNote = True }) >> return "Verbatim" else return "verbatim" - return $ flush (linkAnchor $$ text ("\\begin{" ++ env ++ "}") $$ - text str $$ text ("\\end{" ++ env ++ "}")) <> cr + return $ flush (linkAnchor $$ literal ("\\begin{" <> env <> "}") $$ + literal str $$ literal ("\\end{" <> env <> "}")) <> cr let listingsCodeBlock = do st <- get ref <- toLabel identifier let params = if writerListings (stOptions st) then (case getListingsLanguage classes of - Just l -> [ "language=" ++ mbBraced l ] + Just l -> [ "language=" <> mbBraced l ] Nothing -> []) ++ [ "numbers=left" | "numberLines" `elem` classes || "number" `elem` classes || "number-lines" `elem` classes ] ++ [ (if key == "startFrom" then "firstnumber" - else key) ++ "=" ++ mbBraced attr | + else key) <> "=" <> mbBraced attr | (key,attr) <- keyvalAttr, key `notElem` ["exports", "tangle", "results"] -- see #4889 ] ++ (if identifier == "" then [] - else [ "label=" ++ ref ]) + else [ "label=" <> ref ]) else [] printParams | null params = empty | otherwise = brackets $ hcat (intersperse ", " - (map text params)) - return $ flush ("\\begin{lstlisting}" <> printParams $$ text str $$ + (map literal params)) + return $ flush ("\\begin{lstlisting}" <> printParams $$ literal str $$ "\\end{lstlisting}") $$ cr let highlightedCodeBlock = case highlight (writerSyntaxMap opts) formatLaTeXBlock ("",classes,keyvalAttr) str of Left msg -> do - unless (null msg) $ + unless (T.null msg) $ report $ CouldNotHighlight msg rawCodeBlock Right h -> do @@ -667,7 +671,7 @@ blockToLaTeX b@(RawBlock f x) = do beamer <- gets stBeamer if (f == Format "latex" || f == Format "tex" || (f == Format "beamer" && beamer)) - then return $ text x + then return $ literal x else do report $ BlockNotRendered b return empty @@ -680,7 +684,7 @@ blockToLaTeX (BulletList lst) = do let spacing = if isTightList lst then text "\\tightlist" else empty - return $ text ("\\begin{itemize}" ++ inc) $$ spacing $$ vcat items $$ + return $ text ("\\begin{itemize}" <> inc) $$ spacing $$ vcat items $$ "\\end{itemize}" blockToLaTeX (OrderedList _ []) = return empty -- otherwise latex error blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do @@ -712,7 +716,7 @@ blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do LowerAlpha -> "a" Example -> "1" DefaultStyle -> "1" - let enum = text $ "enum" ++ map toLower (toRomanNumeral oldlevel) + let enum = literal $ "enum" <> T.toLower (toRomanNumeral oldlevel) let stylecommand | numstyle == DefaultStyle && numdelim == DefaultDelim = empty | beamer && numstyle == Decimal && numdelim == Period = empty @@ -726,7 +730,7 @@ blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do let spacing = if isTightList lst then text "\\tightlist" else empty - return $ text ("\\begin{enumerate}" ++ inc) + return $ text ("\\begin{enumerate}" <> inc) $$ stylecommand $$ resetcounter $$ spacing @@ -741,7 +745,7 @@ blockToLaTeX (DefinitionList lst) = do let spacing = if all isTightList (map snd lst) then text "\\tightlist" else empty - return $ text ("\\begin{description}" ++ inc) $$ spacing $$ vcat items $$ + return $ text ("\\begin{description}" <> inc) $$ spacing $$ vcat items $$ "\\end{description}" blockToLaTeX HorizontalRule = return @@ -771,7 +775,7 @@ blockToLaTeX (Table caption aligns widths heads rows) = do else "\\caption" <> captForLof <> braces captionText <> "\\tabularnewline" rows' <- mapM (tableRowToLaTeX False aligns widths) rows - let colDescriptors = text $ concatMap toColDescriptor aligns + let colDescriptors = literal $ T.concat $ map toColDescriptor aligns modify $ \s -> s{ stTable = True } notes <- notesToLaTeX <$> gets stNotes return $ "\\begin{longtable}[]" <> @@ -806,7 +810,7 @@ getCaption externalNotes txt = do else return empty return (capt, captForLof, footnotes) -toColDescriptor :: Alignment -> String +toColDescriptor :: Alignment -> Text toColDescriptor align = case align of AlignLeft -> "l" @@ -853,9 +857,9 @@ fixLineBreaks' ils = case splitBy (== LineBreak) ils of [] -> [] [xs] -> xs chunks -> RawInline "tex" "\\vtop{" : - concatMap tohbox chunks ++ + concatMap tohbox chunks <> [RawInline "tex" "}"] - where tohbox ys = RawInline "tex" "\\hbox{\\strut " : ys ++ + where tohbox ys = RawInline "tex" "\\hbox{\\strut " : ys <> [RawInline "tex" "}"] -- We also change display math to inline math, since display @@ -933,8 +937,9 @@ defListItemToLaTeX (term, defs) = do modify $ \s -> s{stInItem = False} -- put braces around term if it contains an internal link, -- since otherwise we get bad bracket interactions: \item[\hyperref[..] - let isInternalLink (Link _ _ ('#':_,_)) = True - isInternalLink _ = False + let isInternalLink (Link _ _ (src,_)) + | Just ('#', _) <- T.uncons src = True + isInternalLink _ = False let term'' = if any isInternalLink term then braces term' else term' @@ -949,8 +954,8 @@ defListItemToLaTeX (term, defs) = do -- | Craft the section header, inserting the secton reference, if supplied. sectionHeader :: PandocMonad m - => [String] -- classes - -> [Char] + => [Text] -- classes + -> Text -> Int -> [Inline] -> LW m (Doc Text) @@ -958,9 +963,9 @@ sectionHeader classes ident level lst = do let unnumbered = "unnumbered" `elem` classes let unlisted = "unlisted" `elem` classes txt <- inlineListToLaTeX lst - plain <- stringToLaTeX TextString $ concatMap stringify lst + plain <- stringToLaTeX TextString $ T.concat $ map stringify lst let removeInvalidInline (Note _) = [] - removeInvalidInline (Span (id', _, _) _) | not (null id') = [] + removeInvalidInline (Span (id', _, _) _) | not (T.null id') = [] removeInvalidInline Image{} = [] removeInvalidInline x = [x] let lstNoNotes = foldr (mappend . (\x -> walkM removeInvalidInline x)) mempty lst @@ -972,11 +977,11 @@ sectionHeader classes ident level lst = do then return empty else return $ brackets txtNoNotes - let contents = if render Nothing txt == T.pack plain + let contents = if render Nothing txt == plain then braces txt else braces (text "\\texorpdfstring" <> braces txt - <> braces (text plain)) + <> braces (literal plain)) book <- gets stHasChapters opts <- gets stOptions let topLevelDivision = if book && writerTopLevelDivision opts == TopLevelDefault @@ -1036,45 +1041,45 @@ wrapDiv (_,classes,kvs) t = do then \contents -> let w = maybe "0.48" fromPct (lookup "width" kvs) in inCmd "begin" "column" <> - braces (text w <> "\\textwidth") + braces (literal w <> "\\textwidth") $$ contents $$ inCmd "end" "column" else id fromPct xs = - case reverse xs of - '%':ds -> case safeRead (reverse ds) of - Just digits -> showFl (digits / 100 :: Double) - Nothing -> xs - _ -> xs + case T.unsnoc xs of + Just (ds, '%') -> case safeRead ds of + Just digits -> showFl (digits / 100 :: Double) + Nothing -> xs + _ -> xs wrapDir = case lookup "dir" kvs of Just "rtl" -> align "RTL" Just "ltr" -> align "LTR" _ -> id wrapLang txt = case lang of Just lng -> let (l, o) = toPolyglossiaEnv lng - ops = if null o + ops = if T.null o then "" - else brackets $ text o - in inCmd "begin" (text l) <> ops + else brackets $ literal o + in inCmd "begin" (literal l) <> ops $$ blankline <> txt <> blankline - $$ inCmd "end" (text l) + $$ inCmd "end" (literal l) Nothing -> txt return $ wrapColumns . wrapColumn . wrapDir . wrapLang $ t -hypertarget :: PandocMonad m => Bool -> String -> Doc Text -> LW m (Doc Text) +hypertarget :: PandocMonad m => Bool -> Text -> Doc Text -> LW m (Doc Text) hypertarget _ "" x = return x hypertarget addnewline ident x = do - ref <- text `fmap` toLabel ident + ref <- literal `fmap` toLabel ident return $ text "\\hypertarget" <> braces ref <> braces ((if addnewline && not (isEmpty x) then ("%" <> cr) else empty) <> x) -labelFor :: PandocMonad m => String -> LW m (Doc Text) +labelFor :: PandocMonad m => Text -> LW m (Doc Text) labelFor "" = return empty labelFor ident = do - ref <- text `fmap` toLabel ident + ref <- literal `fmap` toLabel ident return $ text "\\label" <> braces ref -- | Convert list of inline elements to LaTeX. @@ -1088,11 +1093,12 @@ inlineListToLaTeX lst = -- so we turn nbsps after hard breaks to \hspace commands. -- this is mostly used in verse. where fixLineInitialSpaces [] = [] - fixLineInitialSpaces (LineBreak : Str s@('\160':_) : xs) = - LineBreak : fixNbsps s ++ fixLineInitialSpaces xs + fixLineInitialSpaces (LineBreak : Str s : xs) + | Just ('\160', _) <- T.uncons s + = LineBreak : fixNbsps s <> fixLineInitialSpaces xs fixLineInitialSpaces (x:xs) = x : fixLineInitialSpaces xs - fixNbsps s = let (ys,zs) = span (=='\160') s - in replicate (length ys) hspace ++ [Str zs] + fixNbsps s = let (ys,zs) = T.span (=='\160') s + in replicate (T.length ys) hspace <> [Str zs] hspace = RawInline "latex" "\\hspace*{0.333em}" -- We need \hfill\break for a line break at the start -- of a paragraph. See #5591. @@ -1119,11 +1125,11 @@ inlineToLaTeX (Span (id',classes,kvs) ils) = do ["LR" | ("dir", "ltr") `elem` kvs] ++ (case lang of Just lng -> let (l, o) = toPolyglossia lng - ops = if null o then "" else ("[" ++ o ++ "]") - in ["text" ++ l ++ ops] + ops = if T.null o then "" else ("[" <> o <> "]") + in ["text" <> l <> ops] Nothing -> []) contents <- inlineListToLaTeX ils - return $ (if null id' + return $ (if T.null id' then empty else "\\protect" <> linkAnchor) <> (if null cmds @@ -1167,13 +1173,13 @@ inlineToLaTeX (Code (_,classes,kvs) str) = do , k `notElem` ["exports","tangle","results"]] let listingsopt = if null listingsopts then "" - else "[" ++ - intercalate ", " - (map (\(k,v) -> k ++ "=" ++ v) - listingsopts) ++ "]" + else "[" <> + T.intercalate ", " + (map (\(k,v) -> k <> "=" <> v) + listingsopts) <> "]" inNote <- gets stInNote when inNote $ modify $ \s -> s{ stVerbInNote = True } - let chr = case "!\"'()*,-./:;?@" \\ str of + let chr = case "!\"'()*,-./:;?@" \\ T.unpack str of (c:_) -> c [] -> '!' let str' = escapeStringUsing (backslashEscapes "\\{}%~_&#") str @@ -1181,16 +1187,17 @@ inlineToLaTeX (Code (_,classes,kvs) str) = do -- (defined in the default template) so that we don't have -- to change the way we escape characters depending on whether -- the lstinline is inside another command. See #1629: - return $ text $ "\\passthrough{\\lstinline" ++ listingsopt ++ [chr] ++ str' ++ [chr] ++ "}" - let rawCode = liftM (text . (\s -> "\\texttt{" ++ escapeSpaces s ++ "}")) + return $ literal $ "\\passthrough{\\lstinline" <> + listingsopt <> T.singleton chr <> str' <> T.singleton chr <> "}" + let rawCode = liftM (literal . (\s -> "\\texttt{" <> escapeSpaces s <> "}")) $ stringToLaTeX CodeString str - where escapeSpaces = concatMap - (\c -> if c == ' ' then "\\ " else [c]) + where escapeSpaces = T.concatMap + (\c -> if c == ' ' then "\\ " else T.singleton c) let highlightCode = case highlight (writerSyntaxMap opts) formatLaTeXInline ("",classes,[]) str of Left msg -> do - unless (null msg) $ report $ CouldNotHighlight msg + unless (T.null msg) $ report $ CouldNotHighlight msg rawCode Right h -> modify (\st -> st{ stHighlighting = True }) >> return (text (T.unpack h)) @@ -1225,20 +1232,20 @@ inlineToLaTeX (Quoted qt lst) = do else char '\x2018' <> inner <> char '\x2019' inlineToLaTeX (Str str) = do setEmptyLine False - liftM text $ stringToLaTeX TextString str + liftM literal $ stringToLaTeX TextString str inlineToLaTeX (Math InlineMath str) = do setEmptyLine False - return $ "\\(" <> text (handleMathComment str) <> "\\)" + return $ "\\(" <> literal (handleMathComment str) <> "\\)" inlineToLaTeX (Math DisplayMath str) = do setEmptyLine False - return $ "\\[" <> text (handleMathComment str) <> "\\]" + return $ "\\[" <> literal (handleMathComment str) <> "\\]" inlineToLaTeX il@(RawInline f str) = do beamer <- gets stBeamer if (f == Format "latex" || f == Format "tex" || (f == Format "beamer" && beamer)) then do setEmptyLine False - return $ text str + return $ literal str else do report $ InlineNotRendered il return empty @@ -1253,30 +1260,33 @@ inlineToLaTeX SoftBreak = do WrapNone -> return space WrapPreserve -> return cr inlineToLaTeX Space = return space -inlineToLaTeX (Link _ txt ('#':ident, _)) = do - contents <- inlineListToLaTeX txt - lab <- toLabel ident - return $ text "\\protect\\hyperlink" <> braces (text lab) <> braces contents -inlineToLaTeX (Link _ txt (src, _)) = +inlineToLaTeX (Link _ txt (src,_)) + | Just ('#', ident) <- T.uncons src + = do + contents <- inlineListToLaTeX txt + lab <- toLabel ident + return $ text "\\protect\\hyperlink" <> braces (literal lab) <> braces contents + | otherwise = case txt of - [Str x] | unEscapeString x == unEscapeString src -> -- autolink + [Str x] | unEscapeString (T.unpack x) == unEscapeString (T.unpack src) -> -- autolink do modify $ \s -> s{ stUrl = True } src' <- stringToLaTeX URLString (escapeURI src) - return $ text $ "\\url{" ++ src' ++ "}" - [Str x] | Just rest <- stripPrefix "mailto:" src, - unEscapeString x == unEscapeString rest -> -- email autolink + return $ literal $ "\\url{" <> src' <> "}" + [Str x] | Just rest <- T.stripPrefix "mailto:" src, + unEscapeString (T.unpack x) == unEscapeString (T.unpack rest) -> -- email autolink do modify $ \s -> s{ stUrl = True } src' <- stringToLaTeX URLString (escapeURI src) contents <- inlineListToLaTeX txt - return $ "\\href" <> braces (text src') <> + return $ "\\href" <> braces (literal src') <> braces ("\\nolinkurl" <> braces contents) _ -> do contents <- inlineListToLaTeX txt src' <- stringToLaTeX URLString (escapeURI src) - return $ text ("\\href{" ++ src' ++ "}{") <> + return $ literal ("\\href{" <> src' <> "}{") <> contents <> char '}' -inlineToLaTeX il@(Image _ _ ('d':'a':'t':'a':':':_, _)) = do - report $ InlineNotRendered il - return empty +inlineToLaTeX il@(Image _ _ (src, _)) + | Just _ <- T.stripPrefix "data:" src = do + report $ InlineNotRendered il + return empty inlineToLaTeX (Image attr _ (source, _)) = do setEmptyLine False modify $ \s -> s{ stGraphics = True } @@ -1284,9 +1294,9 @@ inlineToLaTeX (Image attr _ (source, _)) = do let showDim dir = let d = text (show dir) <> "=" in case dimension dir attr of Just (Pixel a) -> - [d <> text (showInInch opts (Pixel a)) <> "in"] + [d <> literal (showInInch opts (Pixel a)) <> "in"] Just (Percent a) -> - [d <> text (showFl (a / 100)) <> + [d <> literal (showFl (a / 100)) <> case dir of Width -> "\\textwidth" Height -> "\\textheight" @@ -1300,18 +1310,18 @@ inlineToLaTeX (Image attr _ (source, _)) = do Height | isJust (dimension Width attr) -> [d <> "\\textheight"] _ -> [] - dimList = showDim Width ++ showDim Height + dimList = showDim Width <> showDim Height dims = if null dimList then empty else brackets $ mconcat (intersperse "," dimList) source' = if isURI source then source - else unEscapeString source + else T.pack $ unEscapeString $ T.unpack source source'' <- stringToLaTeX URLString source' inHeading <- gets stInHeading return $ (if inHeading then "\\protect\\includegraphics" else "\\includegraphics") <> - dims <> braces (text source'') + dims <> braces (literal source'') inlineToLaTeX (Note contents) = do setEmptyLine False externalNotes <- gets stExternalNotes @@ -1336,13 +1346,14 @@ inlineToLaTeX (Note contents) = do -- A comment at the end of math needs to be followed by a newline, -- or the closing delimiter gets swallowed. -handleMathComment :: String -> String +handleMathComment :: Text -> Text handleMathComment s = - let (_, ys) = break (\c -> c == '\n' || c == '%') $ reverse s - in case ys of - '%':'\\':_ -> s - '%':_ -> s ++ "\n" - _ -> s + let (_, ys) = T.break (\c -> c == '\n' || c == '%') $ T.reverse s -- no T.breakEnd + in case T.uncons ys of + Just ('%', ys') -> case T.uncons ys' of + Just ('\\', _) -> s + _ -> s <> "\n" + _ -> s protectCode :: Inline -> [Inline] protectCode x@(Code _ _) = [ltx "\\mbox{" , x , ltx "}"] @@ -1379,7 +1390,7 @@ citationsToNatbib cits head cits s = citationSuffix $ last cits - ks = intercalate ", " $ map citationId cits + ks = T.intercalate ", " $ map citationId cits citationsToNatbib (c:cs) | citationMode c == AuthorInText = do author <- citeCommand "citeauthor" [] [] (citationId c) @@ -1403,31 +1414,34 @@ citationsToNatbib cits = do NormalCitation -> citeCommand "citealp" p s k citeCommand :: PandocMonad m - => String -> [Inline] -> [Inline] -> String -> LW m (Doc Text) + => Text -> [Inline] -> [Inline] -> Text -> LW m (Doc Text) citeCommand c p s k = do args <- citeArguments p s k - return $ text ("\\" ++ c) <> args + return $ literal ("\\" <> c) <> args citeArguments :: PandocMonad m - => [Inline] -> [Inline] -> String -> LW m (Doc Text) + => [Inline] -> [Inline] -> Text -> LW m (Doc Text) citeArguments p s k = do let s' = stripLocatorBraces $ case s of - (Str - [x] : r) | isPunctuation x -> dropWhile (== Space) r - (Str (x:xs) : r) | isPunctuation x -> Str xs : r - _ -> s + (Str t : r) -> case T.uncons t of + Just (x, xs) + | T.null xs + , isPunctuation x -> dropWhile (== Space) r + | isPunctuation x -> Str xs : r + _ -> s + _ -> s pdoc <- inlineListToLaTeX p sdoc <- inlineListToLaTeX s' let optargs = case (isEmpty pdoc, isEmpty sdoc) of (True, True ) -> empty (True, False) -> brackets sdoc (_ , _ ) -> brackets pdoc <> brackets sdoc - return $ optargs <> braces (text k) + return $ optargs <> braces (literal k) -- strip off {} used to define locator in pandoc-citeproc; see #5722 stripLocatorBraces :: [Inline] -> [Inline] stripLocatorBraces = walk go - where go (Str xs) = Str $ filter (\c -> c /= '{' && c /= '}') xs + where go (Str xs) = Str $ T.filter (\c -> c /= '{' && c /= '}') xs go x = x citationsToBiblatex :: PandocMonad m => [Citation] -> LW m (Doc Text) @@ -1453,7 +1467,7 @@ citationsToBiblatex (c:cs) AuthorInText -> "\\textcite" NormalCitation -> "\\autocite" return $ text cmd <> - braces (text (intercalate "," (map citationId (c:cs)))) + braces (literal (T.intercalate "," (map citationId (c:cs)))) | otherwise = do let cmd = case citationMode c of SuppressAuthor -> "\\autocites*" @@ -1470,17 +1484,17 @@ citationsToBiblatex (c:cs) citationsToBiblatex _ = return empty -- Determine listings language from list of class attributes. -getListingsLanguage :: [String] -> Maybe String +getListingsLanguage :: [Text] -> Maybe Text getListingsLanguage xs = foldr ((<|>) . toListingsLanguage) Nothing xs -mbBraced :: String -> String -mbBraced x = if not (all isAlphaNum x) +mbBraced :: Text -> Text +mbBraced x = if not (T.all isAlphaNum x) then "{" <> x <> "}" else x -- Extract a key from divs and spans -extract :: String -> Block -> [String] +extract :: Text -> Block -> [Text] extract key (Div attr _) = lookKey key attr extract key (Plain ils) = query (extractInline key) ils extract key (Para ils) = query (extractInline key) ils @@ -1488,16 +1502,16 @@ extract key (Header _ _ ils) = query (extractInline key) ils extract _ _ = [] -- Extract a key from spans -extractInline :: String -> Inline -> [String] +extractInline :: Text -> Inline -> [Text] extractInline key (Span attr _) = lookKey key attr extractInline _ _ = [] -- Look up a key in an attribute and give a list of its values -lookKey :: String -> Attr -> [String] -lookKey key (_,_,kvs) = maybe [] words $ lookup key kvs +lookKey :: Text -> Attr -> [Text] +lookKey key (_,_,kvs) = maybe [] T.words $ lookup key kvs -- In environments \Arabic instead of \arabic is used -toPolyglossiaEnv :: Lang -> (String, String) +toPolyglossiaEnv :: Lang -> (Text, Text) toPolyglossiaEnv l = case toPolyglossia l of ("arabic", o) -> ("Arabic", o) @@ -1506,7 +1520,7 @@ toPolyglossiaEnv l = -- Takes a list of the constituents of a BCP 47 language code and -- converts it to a Polyglossia (language, options) tuple -- http://mirrors.ctan.org/macros/latex/contrib/polyglossia/polyglossia.pdf -toPolyglossia :: Lang -> (String, String) +toPolyglossia :: Lang -> (Text, Text) toPolyglossia (Lang "ar" _ "DZ" _) = ("arabic", "locale=algeria") toPolyglossia (Lang "ar" _ "IQ" _) = ("arabic", "locale=mashriq") toPolyglossia (Lang "ar" _ "JO" _) = ("arabic", "locale=mashriq") @@ -1546,7 +1560,7 @@ toPolyglossia x = (commonFromBcp47 x, "") -- http://mirrors.ctan.org/macros/latex/required/babel/base/babel.pdf -- List of supported languages (slightly outdated): -- http://tug.ctan.org/language/hyph-utf8/doc/generic/hyph-utf8/hyphenation.pdf -toBabel :: Lang -> String +toBabel :: Lang -> Text toBabel (Lang "de" _ "AT" vars) | "1901" `elem` vars = "austrian" | otherwise = "naustrian" @@ -1578,7 +1592,7 @@ toBabel x = commonFromBcp47 x -- Takes a list of the constituents of a BCP 47 language code -- and converts it to a string shared by Babel and Polyglossia. -- https://tools.ietf.org/html/bcp47#section-2.1 -commonFromBcp47 :: Lang -> String +commonFromBcp47 :: Lang -> Text commonFromBcp47 (Lang "pt" _ "BR" _) = "brazil" -- Note: documentation says "brazilian" works too, but it doesn't seem to work -- on some systems. See #2953. diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index f8c895e3c..d9eeb3bfa 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.Man Copyright : Copyright (C) 2007-2019 John MacFarlane @@ -12,10 +13,10 @@ Conversion of 'Pandoc' documents to roff man page format. -} -module Text.Pandoc.Writers.Man ( writeMan) where +module Text.Pandoc.Writers.Man ( writeMan ) where import Prelude import Control.Monad.State.Strict -import Data.List (intersperse, stripPrefix) +import Data.List (intersperse) import Data.Maybe (fromMaybe) import Data.Text (Text) import qualified Data.Text as T @@ -73,13 +74,13 @@ pandocToMan opts (Pandoc meta blocks) = do $ setFieldsFromTitle $ defField "has-tables" hasTables $ defField "hyphenate" True - $ defField "pandoc-version" (T.pack pandocVersion) metadata + $ defField "pandoc-version" pandocVersion metadata return $ render colwidth $ case writerTemplate opts of Nothing -> main Just tpl -> renderTemplate tpl context -escString :: WriterOptions -> String -> String +escString :: WriterOptions -> Text -> Text escString _ = escapeString AsciiOnly -- for better portability -- | Return man representation of notes. @@ -117,30 +118,30 @@ blockToMan opts (Para inlines) = do blockToMan opts (LineBlock lns) = blockToMan opts $ linesToPara lns blockToMan _ b@(RawBlock f str) - | f == Format "man" = return $ text str + | f == Format "man" = return $ literal str | otherwise = do report $ BlockNotRendered b return empty -blockToMan _ HorizontalRule = return $ text ".PP" $$ text " * * * * *" +blockToMan _ HorizontalRule = return $ literal ".PP" $$ literal " * * * * *" blockToMan opts (Header level _ inlines) = do contents <- inlineListToMan opts inlines let heading = case level of 1 -> ".SH " _ -> ".SS " - return $ nowrap $ text heading <> contents + return $ nowrap $ literal heading <> contents blockToMan opts (CodeBlock _ str) = return $ - text ".IP" $$ - text ".nf" $$ - text "\\f[C]" $$ - ((case str of - '.':_ -> text "\\&" - _ -> mempty) <> - text (escString opts str)) $$ - text "\\f[R]" $$ - text ".fi" + literal ".IP" $$ + literal ".nf" $$ + literal "\\f[C]" $$ + ((case T.uncons str of + Just ('.',_) -> literal "\\&" + _ -> mempty) <> + literal (escString opts str)) $$ + literal "\\f[R]" $$ + literal ".fi" blockToMan opts (BlockQuote blocks) = do contents <- blockListToMan opts blocks - return $ text ".RS" $$ contents $$ text ".RE" + return $ literal ".RS" $$ contents $$ literal ".RE" blockToMan opts (Table caption alignments widths headers rows) = let aligncode AlignLeft = "l" aligncode AlignRight = "r" @@ -151,24 +152,24 @@ blockToMan opts (Table caption alignments widths headers rows) = modify $ \st -> st{ stHasTables = True } let iwidths = if all (== 0) widths then repeat "" - else map (printf "w(%0.1fn)" . (70 *)) widths + else map (T.pack . printf "w(%0.1fn)" . (70 *)) widths -- 78n default width - 8n indent = 70n - let coldescriptions = text $ unwords - (zipWith (\align width -> aligncode align ++ width) - alignments iwidths) ++ "." + let coldescriptions = literal $ T.unwords + (zipWith (\align width -> aligncode align <> width) + alignments iwidths) <> "." colheadings <- mapM (blockListToMan opts) headers - let makeRow cols = text "T{" $$ - vcat (intersperse (text "T}@T{") cols) $$ - text "T}" + let makeRow cols = literal "T{" $$ + vcat (intersperse (literal "T}@T{") cols) $$ + literal "T}" let colheadings' = if all null headers then empty else makeRow colheadings $$ char '_' body <- mapM (\row -> do cols <- mapM (blockListToMan opts) row return $ makeRow cols) rows - return $ text ".PP" $$ caption' $$ - text ".TS" $$ text "tab(@);" $$ coldescriptions $$ - colheadings' $$ vcat body $$ text ".TE" + return $ literal ".PP" $$ caption' $$ + literal ".TS" $$ literal "tab(@);" $$ coldescriptions $$ + colheadings' $$ vcat body $$ literal ".TE" blockToMan opts (BulletList items) = do contents <- mapM (bulletListItemToMan opts) items @@ -176,7 +177,7 @@ blockToMan opts (BulletList items) = do blockToMan opts (OrderedList attribs items) = do let markers = take (length items) $ orderedListMarkers attribs let indent = 1 + - maximum (map length markers) + maximum (map T.length markers) contents <- mapM (\(num, item) -> orderedListItemToMan opts num indent item) $ zip markers items return (vcat contents) @@ -192,20 +193,20 @@ bulletListItemToMan opts (Para first:rest) = bulletListItemToMan opts (Plain first:rest) = do first' <- blockToMan opts (Plain first) rest' <- blockListToMan opts rest - let first'' = text ".IP \\[bu] 2" $$ first' + let first'' = literal ".IP \\[bu] 2" $$ first' let rest'' = if null rest then empty - else text ".RS 2" $$ rest' $$ text ".RE" + else literal ".RS 2" $$ rest' $$ literal ".RE" return (first'' $$ rest'') bulletListItemToMan opts (first:rest) = do first' <- blockToMan opts first rest' <- blockListToMan opts rest - return $ text "\\[bu] .RS 2" $$ first' $$ rest' $$ text ".RE" + return $ literal "\\[bu] .RS 2" $$ first' $$ rest' $$ literal ".RE" -- | Convert ordered list item (a list of blocks) to man. orderedListItemToMan :: PandocMonad m => WriterOptions -- ^ options - -> String -- ^ order marker for list item + -> Text -- ^ order marker for list item -> Int -- ^ number of spaces to indent -> [Block] -- ^ list item (list of blocks) -> StateT WriterState m (Doc Text) @@ -216,10 +217,10 @@ orderedListItemToMan opts num indent (first:rest) = do first' <- blockToMan opts first rest' <- blockListToMan opts rest let num' = printf ("%" ++ show (indent - 1) ++ "s") num - let first'' = text (".IP \"" ++ num' ++ "\" " ++ show indent) $$ first' + let first'' = literal (".IP \"" <> T.pack num' <> "\" " <> tshow indent) $$ first' let rest'' = if null rest then empty - else text ".RS 4" $$ rest' $$ text ".RE" + else literal ".RS 4" $$ rest' $$ literal ".RE" return $ first'' $$ rest'' -- | Convert definition list item (label, list of blocks) to man. @@ -245,9 +246,9 @@ definitionListItemToMan opts (label, defs) = do return $ first' $$ if null xs then empty - else text ".RS" $$ rest' $$ text ".RE" + else literal ".RS" $$ rest' $$ literal ".RE" [] -> return empty - return $ text ".TP" $$ nowrap labelText $$ contents + return $ literal ".TP" $$ nowrap labelText $$ contents makeCodeBold :: [Inline] -> [Inline] makeCodeBold = walk go @@ -275,7 +276,7 @@ inlineToMan opts (Strong lst) = withFontFeature 'B' (inlineListToMan opts lst) inlineToMan opts (Strikeout lst) = do contents <- inlineListToMan opts lst - return $ text "[STRIKEOUT:" <> contents <> char ']' + return $ literal "[STRIKEOUT:" <> contents <> char ']' inlineToMan opts (Superscript lst) = do contents <- inlineListToMan opts lst return $ char '^' <> contents <> char '^' @@ -288,48 +289,48 @@ inlineToMan opts (Quoted SingleQuote lst) = do return $ char '`' <> contents <> char '\'' inlineToMan opts (Quoted DoubleQuote lst) = do contents <- inlineListToMan opts lst - return $ text "\\[lq]" <> contents <> text "\\[rq]" + return $ literal "\\[lq]" <> contents <> literal "\\[rq]" inlineToMan opts (Cite _ lst) = inlineListToMan opts lst inlineToMan opts (Code _ str) = - withFontFeature 'C' (return (text $ escString opts str)) -inlineToMan opts (Str str@('.':_)) = - return $ afterBreak "\\&" <> text (escString opts str) -inlineToMan opts (Str str) = return $ text $ escString opts str + withFontFeature 'C' (return (literal $ escString opts str)) +inlineToMan opts (Str str@(T.uncons -> Just ('.',_))) = + return $ afterBreak "\\&" <> literal (escString opts str) +inlineToMan opts (Str str) = return $ literal $ escString opts str inlineToMan opts (Math InlineMath str) = lift (texMathToInlines InlineMath str) >>= inlineListToMan opts inlineToMan opts (Math DisplayMath str) = do contents <- lift (texMathToInlines DisplayMath str) >>= inlineListToMan opts - return $ cr <> text ".RS" $$ contents $$ text ".RE" + return $ cr <> literal ".RS" $$ contents $$ literal ".RE" inlineToMan _ il@(RawInline f str) - | f == Format "man" = return $ text str + | f == Format "man" = return $ literal str | otherwise = do report $ InlineNotRendered il return empty inlineToMan _ LineBreak = return $ - cr <> text ".PD 0" $$ text ".P" $$ text ".PD" <> cr + cr <> literal ".PD 0" $$ literal ".P" $$ literal ".PD" <> cr inlineToMan _ SoftBreak = return space inlineToMan _ Space = return space inlineToMan opts (Link _ txt (src, _)) | not (isURI src) = inlineListToMan opts txt -- skip relative links | otherwise = do linktext <- inlineListToMan opts txt - let srcSuffix = fromMaybe src (stripPrefix "mailto:" src) + let srcSuffix = fromMaybe src (T.stripPrefix "mailto:" src) return $ case txt of [Str s] | escapeURI s == srcSuffix -> - char '<' <> text srcSuffix <> char '>' - _ -> linktext <> text " (" <> text src <> char ')' + char '<' <> literal srcSuffix <> char '>' + _ -> linktext <> literal " (" <> literal src <> char ')' inlineToMan opts (Image attr alternate (source, tit)) = do let txt = if null alternate || (alternate == [Str ""]) || (alternate == [Str source]) -- to prevent autolinks then [Str "image"] else alternate linkPart <- inlineToMan opts (Link attr txt (source, tit)) - return $ char '[' <> text "IMAGE: " <> linkPart <> char ']' + return $ char '[' <> literal "IMAGE: " <> linkPart <> char ']' inlineToMan _ (Note contents) = do -- add to notes in state modify $ \st -> st{ stNotes = contents : stNotes st } notes <- gets stNotes - let ref = show (length notes) - return $ char '[' <> text ref <> char ']' + let ref = tshow (length notes) + return $ char '[' <> literal ref <> char ']' diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 06b6da3a5..0d89c0004 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -1,8 +1,9 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE MultiWayIf #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.Markdown Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -22,8 +23,7 @@ import Control.Monad.Reader import Control.Monad.State.Strict import Data.Char (isSpace, isAlphaNum) import Data.Default -import Data.List (find, group, intersperse, sortBy, stripPrefix, transpose, - isPrefixOf) +import Data.List (find, intersperse, sortBy, transpose) import qualified Data.Map as M import Data.Maybe (fromMaybe, catMaybes) import Data.Ord (comparing) @@ -48,7 +48,7 @@ import Text.Pandoc.Writers.Math (texMathToInlines) import Text.Pandoc.XML (toHtml5Entities) type Notes = [[Block]] -type Ref = (String, Target, Attr) +type Ref = (Text, Target, Attr) type Refs = [Ref] type MD m = ReaderT WriterEnv (StateT WriterState m) @@ -77,7 +77,7 @@ data WriterState = WriterState { stNotes :: Notes , stKeys :: M.Map Key (M.Map (Target, Attr) Int) , stLastIdx :: Int - , stIds :: Set.Set String + , stIds :: Set.Set Text , stNoteNum :: Int } @@ -246,11 +246,11 @@ keyToMarkdown :: PandocMonad m -> Ref -> MD m (Doc Text) keyToMarkdown opts (label', (src, tit), attr) = do - let tit' = if null tit + let tit' = if T.null tit then empty - else space <> "\"" <> text tit <> "\"" + else space <> "\"" <> literal tit <> "\"" return $ nest 2 $ hang 2 - ("[" <> text label' <> "]:" <> space) (text src <> tit') + ("[" <> literal label' <> "]:" <> space) (literal src <> tit') <+> linkAttributes opts attr -- | Return markdown representation of notes. @@ -265,24 +265,24 @@ notesToMarkdown opts notes = do noteToMarkdown :: PandocMonad m => WriterOptions -> Int -> [Block] -> MD m (Doc Text) noteToMarkdown opts num blocks = do contents <- blockListToMarkdown opts blocks - let num' = text $ writerIdentifierPrefix opts ++ show num + let num' = literal $ writerIdentifierPrefix opts <> tshow num let marker = if isEnabled Ext_footnotes opts - then text "[^" <> num' <> text "]:" - else text "[" <> num' <> text "]" + then literal "[^" <> num' <> literal "]:" + else literal "[" <> num' <> literal "]" let markerSize = 4 + offset num' let spacer = case writerTabStop opts - markerSize of - n | n > 0 -> text $ replicate n ' ' - _ -> text " " + n | n > 0 -> literal $ T.replicate n " " + _ -> literal " " return $ if isEnabled Ext_footnotes opts then hang (writerTabStop opts) (marker <> spacer) contents else marker <> spacer <> contents -- | Escape special characters for Markdown. -escapeString :: WriterOptions -> String -> String -escapeString opts = +escapeText :: WriterOptions -> Text -> Text +escapeText opts = (if writerPreferAscii opts - then T.unpack . toHtml5Entities . T.pack - else id) . go + then toHtml5Entities + else id) . T.pack . go . T.unpack where go [] = [] go (c:cs) = @@ -321,12 +321,12 @@ escapeString opts = attrsToMarkdown :: Attr -> Doc Text attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys] where attribId = case attribs of - ([],_,_) -> empty + ("",_,_) -> empty (i,_,_) -> "#" <> escAttr i attribClasses = case attribs of (_,[],_) -> empty (_,cs,_) -> hsep $ - map (escAttr . ('.':)) + map (escAttr . ("."<>)) cs attribKeys = case attribs of (_,_,[]) -> empty @@ -334,10 +334,10 @@ attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys] map (\(k,v) -> escAttr k <> "=\"" <> escAttr v <> "\"") ks - escAttr = mconcat . map escAttrChar - escAttrChar '"' = text "\\\"" - escAttrChar '\\' = text "\\\\" - escAttrChar c = text [c] + escAttr = mconcat . map escAttrChar . T.unpack + escAttrChar '"' = literal "\\\"" + escAttrChar '\\' = literal "\\\\" + escAttrChar c = literal $ T.singleton c linkAttributes :: WriterOptions -> Attr -> Doc Text linkAttributes opts attr = @@ -346,7 +346,7 @@ linkAttributes opts attr = else empty -- | Ordered list start parser for use in Para below. -olMarker :: Parser [Char] ParserState Char +olMarker :: Parser Text ParserState Char olMarker = do (start, style', delim) <- anyOrderedListMarker if delim == Period && (style' == UpperAlpha || (style' == UpperRoman && @@ -355,9 +355,9 @@ olMarker = do (start, style', delim) <- anyOrderedListMarker else spaceChar -- | True if string begins with an ordered list marker -beginsWithOrderedListMarker :: String -> Bool +beginsWithOrderedListMarker :: Text -> Bool beginsWithOrderedListMarker str = - case runParser olMarker defaultParserState "para start" (take 10 str) of + case runParser olMarker defaultParserState "para start" (T.take 10 str) of Left _ -> False Right _ -> True @@ -403,9 +403,9 @@ blockToMarkdown' opts (Div attrs ils) = do case () of _ | isEnabled Ext_fenced_divs opts && attrs /= nullAttr -> - nowrap (text ":::" <+> attrsToMarkdown attrs) $$ + nowrap (literal ":::" <+> attrsToMarkdown attrs) $$ chomp contents $$ - text ":::" <> blankline + literal ":::" <> blankline | isEnabled Ext_native_divs opts || (isEnabled Ext_raw_html opts && isEnabled Ext_markdown_in_html_blocks opts) -> @@ -425,38 +425,36 @@ blockToMarkdown' opts (Plain inlines) = do let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing - let rendered = T.unpack $ render colwidth contents - let escapeMarker (x:xs) | x `elem` (".()" :: String) = '\\':x:xs - | otherwise = x : escapeMarker xs - escapeMarker [] = [] + let rendered = render colwidth contents + let escapeMarker = T.concatMap $ \x -> if x `elemText` ".()" + then T.pack ['\\', x] + else T.singleton x + let spaceOrNothing = (not isPlain &&) . maybe True (isSpace . fst) . T.uncons let contents' = - case rendered of - '%':_ | isEnabled Ext_pandoc_title_block opts && - isEnabled Ext_all_symbols_escapable opts -> - "\\" <> contents - '+':s:_ | not isPlain && isSpace s -> "\\" <> contents - '*':s:_ | not isPlain && isSpace s -> "\\" <> contents - '-':s:_ | not isPlain && isSpace s -> "\\" <> contents - '+':[] | not isPlain -> "\\" <> contents - '*':[] | not isPlain -> "\\" <> contents - '-':[] | not isPlain -> "\\" <> contents - '|':_ | (isEnabled Ext_line_blocks opts || - isEnabled Ext_pipe_tables opts) - && isEnabled Ext_all_symbols_escapable opts - -> "\\" <> contents - _ | not isPlain && beginsWithOrderedListMarker rendered - && isEnabled Ext_all_symbols_escapable opts - -> text $ escapeMarker rendered - | otherwise -> contents + case T.uncons rendered of + Just ('%', _) + | isEnabled Ext_pandoc_title_block opts && + isEnabled Ext_all_symbols_escapable opts -> "\\" <> contents + Just ('+', s) | spaceOrNothing s -> "\\" <> contents + Just ('*', s) | spaceOrNothing s -> "\\" <> contents + Just ('-', s) | spaceOrNothing s -> "\\" <> contents + Just ('|', _) | (isEnabled Ext_line_blocks opts || + isEnabled Ext_pipe_tables opts) + && isEnabled Ext_all_symbols_escapable opts + -> "\\" <> contents + _ | not isPlain && beginsWithOrderedListMarker rendered + && isEnabled Ext_all_symbols_escapable opts + -> literal $ escapeMarker rendered + | otherwise -> contents return $ contents' <> cr -- title beginning with fig: indicates figure -blockToMarkdown' opts (Para [Image attr alt (src,'f':'i':'g':':':tit)]) +blockToMarkdown' opts (Para [Image attr alt (src,tgt@(T.stripPrefix "fig:" -> Just tit))]) | isEnabled Ext_raw_html opts && not (isEnabled Ext_link_attributes opts) && attr /= nullAttr = -- use raw HTML - ((<> blankline) . text . T.unpack . T.strip) <$> + ((<> blankline) . literal . T.strip) <$> writeHtml5String opts{ writerTemplate = Nothing } - (Pandoc nullMeta [Para [Image attr alt (src,"fig:" ++ tit)]]) + (Pandoc nullMeta [Para [Image attr alt (src,tgt)]]) | otherwise = blockToMarkdown opts (Para [Image attr alt (src,tit)]) blockToMarkdown' opts (Para inlines) = (<> blankline) `fmap` blockToMarkdown opts (Plain inlines) @@ -464,39 +462,39 @@ blockToMarkdown' opts (LineBlock lns) = if isEnabled Ext_line_blocks opts then do mdLines <- mapM (inlineListToMarkdown opts) lns - return $ (vcat $ map (hang 2 (text "| ")) mdLines) <> blankline + return $ (vcat $ map (hang 2 (literal "| ")) mdLines) <> blankline else blockToMarkdown opts $ linesToPara lns blockToMarkdown' opts b@(RawBlock f str) = do plain <- asks envPlain let Format fmt = f let rawAttribBlock = return $ - (text "```{=" <> text fmt <> "}") $$ - text str $$ - (text "```" <> text "\n") + (literal "```{=" <> literal fmt <> "}") $$ + literal str $$ + (literal "```" <> literal "\n") let renderEmpty = mempty <$ report (BlockNotRendered b) case () of _ | plain -> renderEmpty | isEnabled Ext_raw_attribute opts -> rawAttribBlock | f `elem` ["markdown", "markdown_github", "markdown_phpextra", "markdown_mmd", "markdown_strict"] -> - return $ text str <> text "\n" + return $ literal str <> literal "\n" | f `elem` ["html", "html5", "html4"] -> case () of _ | isEnabled Ext_markdown_attribute opts -> return $ - text (addMarkdownAttribute str) <> text "\n" + literal (addMarkdownAttribute str) <> literal "\n" | isEnabled Ext_raw_html opts -> return $ - text str <> text "\n" + literal str <> literal "\n" | isEnabled Ext_raw_attribute opts -> rawAttribBlock | otherwise -> renderEmpty | f `elem` ["latex", "tex"] -> case () of _ | isEnabled Ext_raw_tex opts -> return $ - text str <> text "\n" + literal str <> literal "\n" | isEnabled Ext_raw_attribute opts -> rawAttribBlock | otherwise -> renderEmpty | otherwise -> renderEmpty blockToMarkdown' opts HorizontalRule = do - return $ blankline <> text (replicate (writerColumns opts) '-') <> blankline + return $ blankline <> literal (T.replicate (writerColumns opts) "-") <> blankline blockToMarkdown' opts (Header level attr inlines) = do -- first, if we're putting references at the end of a section, we -- put them here. @@ -516,7 +514,7 @@ blockToMarkdown' opts (Header level attr inlines) = do (id',[],[]) | isEnabled Ext_auto_identifiers opts && id' == autoId -> empty (id',_,_) | isEnabled Ext_mmd_header_identifiers opts -> - space <> brackets (text id') + space <> brackets (literal id') _ | isEnabled Ext_header_attributes opts -> space <> attrsToMarkdown attr | otherwise -> empty @@ -533,44 +531,44 @@ blockToMarkdown' opts (Header level attr inlines) = do then blanklines 3 <> contents <> blanklines 2 else contents <> blankline | setext -> - contents <> attr' <> cr <> text (replicate (offset contents) '=') <> + contents <> attr' <> cr <> literal (T.replicate (offset contents) "=") <> blankline 2 | plain -> if isEnabled Ext_gutenberg opts then blanklines 2 <> contents <> blankline else contents <> blankline | setext -> - contents <> attr' <> cr <> text (replicate (offset contents) '-') <> + contents <> attr' <> cr <> literal (T.replicate (offset contents) "-") <> blankline -- ghc interprets '#' characters in column 1 as linenum specifiers. _ | plain || isEnabled Ext_literate_haskell opts -> contents <> blankline - _ -> text (replicate level '#') <> space <> contents <> attr' <> blankline + _ -> literal (T.replicate level "#") <> space <> contents <> attr' <> blankline return $ refs <> hdr blockToMarkdown' opts (CodeBlock (_,classes,_) str) | "haskell" `elem` classes && "literate" `elem` classes && isEnabled Ext_literate_haskell opts = - return $ prefixed "> " (text str) <> blankline + return $ prefixed "> " (literal str) <> blankline blockToMarkdown' opts (CodeBlock attribs str) = return $ case attribs == nullAttr of False | isEnabled Ext_backtick_code_blocks opts -> - backticks <> attrs <> cr <> text str <> cr <> backticks <> blankline + backticks <> attrs <> cr <> literal str <> cr <> backticks <> blankline | isEnabled Ext_fenced_code_blocks opts -> - tildes <> attrs <> cr <> text str <> cr <> tildes <> blankline - _ -> nest (writerTabStop opts) (text str) <> blankline - where endline c = text $ case [length ln - | ln <- map trim (lines str) - , [c,c,c] `isPrefixOf` ln - , all (== c) ln] of - [] -> replicate 3 c - xs -> replicate (maximum xs + 1) c + tildes <> attrs <> cr <> literal str <> cr <> tildes <> blankline + _ -> nest (writerTabStop opts) (literal str) <> blankline + where endline c = literal $ case [T.length ln + | ln <- map trim (T.lines str) + , T.pack [c,c,c] `T.isPrefixOf` ln + , T.all (== c) ln] of + [] -> T.replicate 3 $ T.singleton c + xs -> T.replicate (maximum xs + 1) $ T.singleton c backticks = endline '`' tildes = endline '~' attrs = if isEnabled Ext_fenced_code_attributes opts then nowrap $ " " <> attrsToMarkdown attribs else case attribs of - (_,(cls:_),_) -> " " <> text cls + (_,(cls:_),_) -> " " <> literal cls _ -> empty blockToMarkdown' opts (BlockQuote blocks) = do plain <- asks envPlain @@ -635,9 +633,9 @@ blockToMarkdown' opts t@(Table caption aligns widths headers rows) = do rows (id,) <$> pipeTable (all null headers) aligns' rawHeaders rawRows | isEnabled Ext_raw_html opts -> fmap (id,) $ - (text . T.unpack) <$> + literal <$> (writeHtml5String opts{ writerTemplate = Nothing } $ Pandoc nullMeta [t]) - | otherwise -> return $ (id, text "[TABLE]") + | otherwise -> return $ (id, literal "[TABLE]") return $ nst (tbl $$ caption'') $$ blankline blockToMarkdown' opts (BulletList items) = do contents <- inList $ mapM (bulletListItemToMarkdown opts) items @@ -648,8 +646,8 @@ blockToMarkdown' opts (OrderedList (start,sty,delim) items) = do let delim' = if isEnabled Ext_fancy_lists opts then delim else DefaultDelim let attribs = (start', sty', delim') let markers = orderedListMarkers attribs - let markers' = map (\m -> if length m < 3 - then m ++ replicate (3 - length m) ' ' + let markers' = map (\m -> if T.length m < 3 + then m <> T.replicate (3 - T.length m) " " else m) markers contents <- inList $ mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $ @@ -662,7 +660,7 @@ blockToMarkdown' opts (DefinitionList items) = do inList :: Monad m => MD m a -> MD m a inList p = local (\env -> env {envInList = True}) p -addMarkdownAttribute :: String -> String +addMarkdownAttribute :: Text -> Text addMarkdownAttribute s = case span isTagText $ reverse $ parseTags s of (xs,(TagOpen t attrs:rest)) -> @@ -675,29 +673,29 @@ pipeTable :: PandocMonad m => Bool -> [Alignment] -> [Doc Text] -> [[Doc Text]] -> MD m (Doc Text) pipeTable headless aligns rawHeaders rawRows = do - let sp = text " " + let sp = literal " " let blockFor AlignLeft x y = lblock (x + 2) (sp <> y) <> lblock 0 empty blockFor AlignCenter x y = cblock (x + 2) (sp <> y) <> lblock 0 empty blockFor AlignRight x y = rblock (x + 2) (sp <> y) <> lblock 0 empty blockFor _ x y = lblock (x + 2) (sp <> y) <> lblock 0 empty let widths = map (max 3 . maximum . map offset) $ transpose (rawHeaders : rawRows) - let torow cs = nowrap $ text "|" <> - hcat (intersperse (text "|") $ + let torow cs = nowrap $ literal "|" <> + hcat (intersperse (literal "|") $ zipWith3 blockFor aligns widths (map chomp cs)) - <> text "|" - let toborder (a, w) = text $ case a of - AlignLeft -> ':':replicate (w + 1) '-' - AlignCenter -> ':':replicate w '-' ++ ":" - AlignRight -> replicate (w + 1) '-' ++ ":" - AlignDefault -> replicate (w + 2) '-' + <> literal "|" + let toborder (a, w) = literal $ case a of + AlignLeft -> ":" <> T.replicate (w + 1) "-" + AlignCenter -> ":" <> T.replicate w "-" <> ":" + AlignRight -> T.replicate (w + 1) "-" <> ":" + AlignDefault -> T.replicate (w + 2) "-" -- note: pipe tables can't completely lack a -- header; for a headerless table, we need a header of empty cells. -- see jgm/pandoc#1996. let header = if headless then torow (replicate (length aligns) empty) else torow rawHeaders - let border = nowrap $ text "|" <> hcat (intersperse (text "|") $ - map toborder $ zip aligns widths) <> text "|" + let border = nowrap $ literal "|" <> hcat (intersperse (literal "|") $ + map toborder $ zip aligns widths) <> literal "|" let body = vcat $ map torow rawRows return $ header $$ border $$ body @@ -729,15 +727,15 @@ pandocTable opts multiline headless aligns widths rawHeaders rawRows = do let widthsInChars | isSimple = map numChars columns | otherwise = zipWith relWidth widths columns - let makeRow = hcat . intersperse (lblock 1 (text " ")) . + let makeRow = hcat . intersperse (lblock 1 (literal " ")) . (zipWith3 alignHeader aligns widthsInChars) let rows' = map makeRow rawRows let head' = makeRow rawHeaders - let underline = mconcat $ intersperse (text " ") $ - map (\width -> text (replicate width '-')) widthsInChars + let underline = mconcat $ intersperse (literal " ") $ + map (\width -> literal (T.replicate width "-")) widthsInChars let border = if multiline - then text (replicate (sum widthsInChars + - length widthsInChars - 1) '-') + then literal (T.replicate (sum widthsInChars + + length widthsInChars - 1) "-") else if headless then underline else empty @@ -767,8 +765,8 @@ bulletListItemToMarkdown :: PandocMonad m => WriterOptions -> [Block] -> MD m (D bulletListItemToMarkdown opts bs = do let exts = writerExtensions opts contents <- blockListToMarkdown opts $ taskListItemToAscii exts bs - let sps = replicate (writerTabStop opts - 2) ' ' - let start = text ('-' : ' ' : sps) + let sps = T.replicate (writerTabStop opts - 2) " " + let start = literal $ "- " <> sps -- remove trailing blank line if item ends with a tight list let contents' = if itemEndsWithTightList bs then chomp contents <> cr @@ -778,19 +776,19 @@ bulletListItemToMarkdown opts bs = do -- | Convert ordered list item (a list of blocks) to markdown. orderedListItemToMarkdown :: PandocMonad m => WriterOptions -- ^ options - -> String -- ^ list item marker + -> Text -- ^ list item marker -> [Block] -- ^ list item (list of blocks) -> MD m (Doc Text) orderedListItemToMarkdown opts marker bs = do let exts = writerExtensions opts contents <- blockListToMarkdown opts $ taskListItemToAscii exts bs - let sps = case writerTabStop opts - length marker of - n | n > 0 -> text $ replicate n ' ' - _ -> text " " + let sps = case writerTabStop opts - T.length marker of + n | n > 0 -> literal $ T.replicate n " " + _ -> literal " " let ind = if isEnabled Ext_four_space_rule opts then writerTabStop opts - else max (writerTabStop opts) (length marker + 1) - let start = text marker <> sps + else max (writerTabStop opts) (T.length marker + 1) + let start = literal marker <> sps -- remove trailing blank line if item ends with a tight list let contents' = if itemEndsWithTightList bs then chomp contents <> cr @@ -811,8 +809,8 @@ definitionListItemToMarkdown opts (label, defs) = do isPlain <- asks envPlain let leader = if isPlain then " " else ": " let sps = case writerTabStop opts - 3 of - n | n > 0 -> text $ replicate n ' ' - _ -> text " " + n | n > 0 -> literal $ T.replicate n " " + _ -> literal " " let isTight = case defs of ((Plain _ : _): _) -> True _ -> False @@ -828,7 +826,7 @@ definitionListItemToMarkdown opts (label, defs) = do return $ blankline <> nowrap labelText $$ (if isTight then empty else blankline) <> contents <> blankline else do - return $ nowrap (chomp labelText <> text " " <> cr) <> + return $ nowrap (chomp labelText <> literal " " <> cr) <> vsep (map vsep defs') <> blankline -- | Convert list of Pandoc block elements to markdown. @@ -860,12 +858,12 @@ blockListToMarkdown opts blocks = do fixBlocks (Plain ils : bs) = Para ils : fixBlocks bs fixBlocks (r@(RawBlock f raw) : b : bs) - | not (null raw) - , last raw /= '\n' = + | not (T.null raw) + , T.last raw /= '\n' = case b of Plain{} -> r : fixBlocks (b:bs) RawBlock{} -> r : fixBlocks (b:bs) - _ -> RawBlock f (raw ++ "\n") : fixBlocks (b:bs) -- #4629 + _ -> RawBlock f (raw <> "\n") : fixBlocks (b:bs) -- #4629 fixBlocks (x : xs) = x : fixBlocks xs fixBlocks [] = [] isListBlock (BulletList _) = True @@ -880,10 +878,10 @@ blockListToMarkdown opts blocks = do mapM (blockToMarkdown opts) (fixBlocks blocks) >>= return . mconcat getKey :: Doc Text -> Key -getKey = toKey . T.unpack . render Nothing +getKey = toKey . render Nothing -findUsableIndex :: [String] -> Int -> Int -findUsableIndex lbls i = if (show i) `elem` lbls +findUsableIndex :: [Text] -> Int -> Int +findUsableIndex lbls i = if (tshow i) `elem` lbls then findUsableIndex lbls (i + 1) else i @@ -897,7 +895,7 @@ getNextIndex = do -- | Get reference for target; if none exists, create unique one and return. -- Prefer label if possible; otherwise, generate a unique key. -getReference :: PandocMonad m => Attr -> Doc Text -> Target -> MD m String +getReference :: PandocMonad m => Attr -> Doc Text -> Target -> MD m Text getReference attr label target = do refs <- gets stRefs case find (\(_,t,a) -> t == target && a == attr) refs of @@ -910,9 +908,9 @@ getReference attr label target = do then do i <- getNextIndex modify $ \s -> s{ stLastIdx = i } - return (show i, i) + return (tshow i, i) else - return (T.unpack (render Nothing label), 0) + return (render Nothing label, 0) modify (\s -> s{ stRefs = (lab', target, attr) : refs, stKeys = M.insert (getKey label) @@ -923,10 +921,10 @@ getReference attr label target = do Just km -> do -- we have refs with this label case M.lookup (target, attr) km of Just i -> do - let lab' = T.unpack $ render Nothing $ + let lab' = render Nothing $ label <> if i == 0 then mempty - else text (show i) + else literal (tshow i) -- make sure it's in stRefs; it may be -- a duplicate that was printed in a previous -- block: @@ -937,7 +935,7 @@ getReference attr label target = do Nothing -> do -- but this one is to a new target i <- getNextIndex modify $ \s -> s{ stLastIdx = i } - let lab' = show i + let lab' = tshow i modify (\s -> s{ stRefs = (lab', target, attr) : refs, stKeys = M.insert (getKey label) @@ -955,28 +953,28 @@ inlineListToMarkdown opts lst = do (Link _ _ _) -> case is of -- If a link is followed by another link, or '[', '(' or ':' -- then we don't shortcut - (Link _ _ _):_ -> unshortcutable - Space:(Link _ _ _):_ -> unshortcutable - Space:(Str('[':_)):_ -> unshortcutable - Space:(RawInline _ ('[':_)):_ -> unshortcutable - Space:(Cite _ _):_ -> unshortcutable - SoftBreak:(Link _ _ _):_ -> unshortcutable - SoftBreak:(Str('[':_)):_ -> unshortcutable - SoftBreak:(RawInline _ ('[':_)):_ -> unshortcutable - SoftBreak:(Cite _ _):_ -> unshortcutable - LineBreak:(Link _ _ _):_ -> unshortcutable - LineBreak:(Str('[':_)):_ -> unshortcutable - LineBreak:(RawInline _ ('[':_)):_ -> unshortcutable - LineBreak:(Cite _ _):_ -> unshortcutable - (Cite _ _):_ -> unshortcutable - Str ('[':_):_ -> unshortcutable - Str ('(':_):_ -> unshortcutable - Str (':':_):_ -> unshortcutable - (RawInline _ ('[':_)):_ -> unshortcutable - (RawInline _ ('(':_)):_ -> unshortcutable - (RawInline _ (':':_)):_ -> unshortcutable - (RawInline _ (' ':'[':_)):_ -> unshortcutable - _ -> shortcutable + (Link _ _ _):_ -> unshortcutable + Space:(Link _ _ _):_ -> unshortcutable + Space:(Str(thead -> Just '[')):_ -> unshortcutable + Space:(RawInline _ (thead -> Just '[')):_ -> unshortcutable + Space:(Cite _ _):_ -> unshortcutable + SoftBreak:(Link _ _ _):_ -> unshortcutable + SoftBreak:(Str(thead -> Just '[')):_ -> unshortcutable + SoftBreak:(RawInline _ (thead -> Just '[')):_ -> unshortcutable + SoftBreak:(Cite _ _):_ -> unshortcutable + LineBreak:(Link _ _ _):_ -> unshortcutable + LineBreak:(Str(thead -> Just '[')):_ -> unshortcutable + LineBreak:(RawInline _ (thead -> Just '[')):_ -> unshortcutable + LineBreak:(Cite _ _):_ -> unshortcutable + (Cite _ _):_ -> unshortcutable + Str (thead -> Just '['):_ -> unshortcutable + Str (thead -> Just '('):_ -> unshortcutable + Str (thead -> Just ':'):_ -> unshortcutable + (RawInline _ (thead -> Just '[')):_ -> unshortcutable + (RawInline _ (thead -> Just '(')):_ -> unshortcutable + (RawInline _ (thead -> Just ':')):_ -> unshortcutable + (RawInline _ (T.stripPrefix " [" -> Just _ )):_ -> unshortcutable + _ -> shortcutable _ -> shortcutable where shortcutable = liftM2 (<>) (inlineToMarkdown opts i) (go is) unshortcutable = do @@ -984,6 +982,7 @@ inlineListToMarkdown opts lst = do (\env -> env { envRefShortcutable = False }) (inlineToMarkdown opts i) fmap (iMark <>) (go is) + thead = fmap fst . T.uncons isSp :: Inline -> Bool isSp Space = True @@ -992,22 +991,22 @@ isSp _ = False avoidBadWrapsInList :: [Inline] -> [Inline] avoidBadWrapsInList [] = [] -avoidBadWrapsInList (s:Str ('>':cs):xs) | isSp s = - Str (' ':'>':cs) : avoidBadWrapsInList xs -avoidBadWrapsInList (s:Str [c]:[]) - | isSp s && c `elem` ['-','*','+'] = Str [' ', c] : [] -avoidBadWrapsInList (s:Str [c]:Space:xs) - | isSp s && c `elem` ['-','*','+'] = - Str [' ', c] : Space : avoidBadWrapsInList xs +avoidBadWrapsInList (s:Str (T.uncons -> Just ('>',cs)):xs) | isSp s = + Str (" >" <> cs) : avoidBadWrapsInList xs +avoidBadWrapsInList (s:Str (T.uncons -> Just (c, cs)):[]) + | T.null cs && isSp s && c `elem` ['-','*','+'] = Str (T.pack [' ', c]) : [] +avoidBadWrapsInList (s:Str (T.uncons -> Just (c, cs)):Space:xs) + | T.null cs && isSp s && c `elem` ['-','*','+'] = + Str (T.pack [' ', c]) : Space : avoidBadWrapsInList xs avoidBadWrapsInList (s:Str cs:Space:xs) | isSp s && isOrderedListMarker cs = - Str (' ':cs) : Space : avoidBadWrapsInList xs + Str (" " <> cs) : Space : avoidBadWrapsInList xs avoidBadWrapsInList (s:Str cs:[]) - | isSp s && isOrderedListMarker cs = Str (' ':cs) : [] + | isSp s && isOrderedListMarker cs = Str (" " <> cs) : [] avoidBadWrapsInList (x:xs) = x : avoidBadWrapsInList xs -isOrderedListMarker :: String -> Bool -isOrderedListMarker xs = not (null xs) && (last xs `elem` ['.',')']) && +isOrderedListMarker :: Text -> Bool +isOrderedListMarker xs = not (T.null xs) && (T.last xs `elem` ['.',')']) && isRight (runParser (anyOrderedListMarker >> eof) defaultParserState "" xs) @@ -1020,7 +1019,7 @@ inlineToMarkdown :: PandocMonad m => WriterOptions -> Inline -> MD m (Doc Text) inlineToMarkdown opts (Span ("",["emoji"],kvs) [Str s]) = do case lookup "data-emoji" kvs of Just emojiname | isEnabled Ext_emoji opts -> - return $ ":" <> text emojiname <> ":" + return $ ":" <> literal emojiname <> ":" _ -> inlineToMarkdown opts (Str s) inlineToMarkdown opts (Span attrs ils) = do plain <- asks envPlain @@ -1035,7 +1034,7 @@ inlineToMarkdown opts (Span attrs ils) = do in "[" <> contents <> "]" <> attrs' | isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts -> - tagWithAttrs "span" attrs <> contents <> text "</span>" + tagWithAttrs "span" attrs <> contents <> literal "</span>" | otherwise -> contents inlineToMarkdown _ (Emph []) = return empty inlineToMarkdown opts (Emph lst) = do @@ -1074,10 +1073,10 @@ inlineToMarkdown opts (Superscript lst) = else if isEnabled Ext_raw_html opts then "<sup>" <> contents <> "</sup>" else - let rendered = T.unpack $ render Nothing contents - in case mapM toSuperscript rendered of - Just r -> text r - Nothing -> text $ "^(" ++ rendered ++ ")" + let rendered = render Nothing contents + in case mapM toSuperscript (T.unpack rendered) of + Just r -> literal $ T.pack r + Nothing -> literal $ "^(" <> rendered <> ")" inlineToMarkdown _ (Subscript []) = return empty inlineToMarkdown opts (Subscript lst) = local (\env -> env {envEscapeSpaces = True}) $ do @@ -1087,10 +1086,10 @@ inlineToMarkdown opts (Subscript lst) = else if isEnabled Ext_raw_html opts then "<sub>" <> contents <> "</sub>" else - let rendered = T.unpack $ render Nothing contents - in case mapM toSubscript rendered of - Just r -> text r - Nothing -> text $ "_(" ++ rendered ++ ")" + let rendered = render Nothing contents + in case mapM toSubscript (T.unpack rendered) of + Just r -> literal $ T.pack r + Nothing -> literal $ "_(" <> rendered <> ")" inlineToMarkdown opts (SmallCaps lst) = do plain <- asks envPlain if not plain && @@ -1114,19 +1113,19 @@ inlineToMarkdown opts (Quoted DoubleQuote lst) = do then "“" <> contents <> "”" else "“" <> contents <> "”" inlineToMarkdown opts (Code attr str) = do - let tickGroups = filter (\s -> '`' `elem` s) $ group str + let tickGroups = filter (T.any (== '`')) $ T.group str let longest = if null tickGroups then 0 - else maximum $ map length tickGroups - let marker = replicate (longest + 1) '`' + else maximum $ map T.length tickGroups + let marker = T.replicate (longest + 1) "`" let spacer = if (longest == 0) then "" else " " let attrs = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr then attrsToMarkdown attr else empty plain <- asks envPlain if plain - then return $ text str - else return $ text (marker ++ spacer ++ str ++ spacer ++ marker) <> attrs + then return $ literal str + else return $ literal (marker <> spacer <> str <> spacer <> marker) <> attrs inlineToMarkdown opts (Str str) = do isPlain <- asks envPlain let str' = (if isEnabled Ext_smart opts @@ -1134,18 +1133,18 @@ inlineToMarkdown opts (Str str) = do else id) $ if isPlain then str - else escapeString opts str - return $ text str' + else escapeText opts str + return $ literal str' inlineToMarkdown opts (Math InlineMath str) = case writerHTMLMathMethod opts of WebTeX url -> inlineToMarkdown opts - (Image nullAttr [Str str] (url ++ urlEncode str, str)) + (Image nullAttr [Str str] (url <> T.pack (urlEncode $ T.unpack str), str)) _ | isEnabled Ext_tex_math_dollars opts -> - return $ "$" <> text str <> "$" + return $ "$" <> literal str <> "$" | isEnabled Ext_tex_math_single_backslash opts -> - return $ "\\(" <> text str <> "\\)" + return $ "\\(" <> literal str <> "\\)" | isEnabled Ext_tex_math_double_backslash opts -> - return $ "\\\\(" <> text str <> "\\\\)" + return $ "\\\\(" <> literal str <> "\\\\)" | otherwise -> do plain <- asks envPlain texMathToInlines InlineMath str >>= @@ -1155,40 +1154,40 @@ inlineToMarkdown opts (Math DisplayMath str) = case writerHTMLMathMethod opts of WebTeX url -> (\x -> blankline <> x <> blankline) `fmap` inlineToMarkdown opts (Image nullAttr [Str str] - (url ++ urlEncode str, str)) + (url <> T.pack (urlEncode $ T.unpack str), str)) _ | isEnabled Ext_tex_math_dollars opts -> - return $ "$$" <> text str <> "$$" + return $ "$$" <> literal str <> "$$" | isEnabled Ext_tex_math_single_backslash opts -> - return $ "\\[" <> text str <> "\\]" + return $ "\\[" <> literal str <> "\\]" | isEnabled Ext_tex_math_double_backslash opts -> - return $ "\\\\[" <> text str <> "\\\\]" + return $ "\\\\[" <> literal str <> "\\\\]" | otherwise -> (\x -> cr <> x <> cr) `fmap` (texMathToInlines DisplayMath str >>= inlineListToMarkdown opts) inlineToMarkdown opts il@(RawInline f str) = do - let tickGroups = filter (\s -> '`' `elem` s) $ group str + let tickGroups = filter (T.any (== '`')) $ T.group str let numticks = if null tickGroups then 1 - else 1 + maximum (map length tickGroups) + else 1 + maximum (map T.length tickGroups) plain <- asks envPlain let Format fmt = f let rawAttribInline = return $ - text (replicate numticks '`') <> text str <> - text (replicate numticks '`') <> text "{=" <> text fmt <> text "}" + literal (T.replicate numticks "`") <> literal str <> + literal (T.replicate numticks "`") <> literal "{=" <> literal fmt <> literal "}" let renderEmpty = mempty <$ report (InlineNotRendered il) case () of _ | plain -> renderEmpty | f `elem` ["markdown", "markdown_github", "markdown_phpextra", "markdown_mmd", "markdown_strict"] -> - return $ text str + return $ literal str | isEnabled Ext_raw_attribute opts -> rawAttribInline | f `elem` ["html", "html5", "html4"] -> case () of - _ | isEnabled Ext_raw_html opts -> return $ text str + _ | isEnabled Ext_raw_html opts -> return $ literal str | isEnabled Ext_raw_attribute opts -> rawAttribInline | otherwise -> renderEmpty | f `elem` ["latex", "tex"] -> case () of - _ | isEnabled Ext_raw_tex opts -> return $ text str + _ | isEnabled Ext_raw_tex opts -> return $ literal str | isEnabled Ext_raw_attribute opts -> rawAttribInline | otherwise -> renderEmpty | otherwise -> renderEmpty @@ -1220,12 +1219,12 @@ inlineToMarkdown opts (Cite (c:cs) lst) rest <- mapM convertOne cs let inbr = suffs <+> joincits rest br = if isEmpty inbr then empty else char '[' <> inbr <> char ']' - return $ text ("@" ++ citationId c) <+> br + return $ literal ("@" <> citationId c) <+> br else do cits <- mapM convertOne (c:cs) - return $ text "[" <> joincits cits <> text "]" + return $ literal "[" <> joincits cits <> literal "]" where - joincits = hcat . intersperse (text "; ") . filter (not . isEmpty) + joincits = hcat . intersperse (literal "; ") . filter (not . isEmpty) convertOne Citation { citationId = k , citationPrefix = pinlines , citationSuffix = sinlines @@ -1233,9 +1232,9 @@ inlineToMarkdown opts (Cite (c:cs) lst) = do pdoc <- inlineListToMarkdown opts pinlines sdoc <- inlineListToMarkdown opts sinlines - let k' = text (modekey m ++ "@" ++ k) + let k' = literal (modekey m <> "@" <> k) r = case sinlines of - Str (y:_):_ | y `elem` (",;]@" :: String) -> k' <> sdoc + Str (T.uncons -> Just (y,_)):_ | y `elem` (",;]@" :: String) -> k' <> sdoc _ -> k' <+> sdoc return $ pdoc <+> r modekey SuppressAuthor = "-" @@ -1244,15 +1243,15 @@ inlineToMarkdown opts lnk@(Link attr txt (src, tit)) | isEnabled Ext_raw_html opts && not (isEnabled Ext_link_attributes opts) && attr /= nullAttr = -- use raw HTML - (text . T.unpack . T.strip) <$> + (literal . T.strip) <$> writeHtml5String opts{ writerTemplate = Nothing } (Pandoc nullMeta [Plain [lnk]]) | otherwise = do plain <- asks envPlain linktext <- inlineListToMarkdown opts txt - let linktitle = if null tit + let linktitle = if T.null tit then empty - else text $ " \"" ++ tit ++ "\"" - let srcSuffix = fromMaybe src (stripPrefix "mailto:" src) + else literal $ " \"" <> tit <> "\"" + let srcSuffix = fromMaybe src (T.stripPrefix "mailto:" src) let useAuto = isURI src && case txt of [Str s] | escapeURI s == srcSuffix -> True @@ -1262,12 +1261,12 @@ inlineToMarkdown opts lnk@(Link attr txt (src, tit)) let useShortcutRefLinks = shortcutable && isEnabled Ext_shortcut_reference_links opts reftext <- if useRefLinks - then text <$> getReference attr linktext (src, tit) + then literal <$> getReference attr linktext (src, tit) else return mempty return $ if useAuto then if plain - then text srcSuffix - else "<" <> text srcSuffix <> ">" + then literal srcSuffix + else "<" <> literal srcSuffix <> ">" else if useRefLinks then let first = "[" <> linktext <> "]" second = if getKey linktext == getKey reftext @@ -1279,13 +1278,13 @@ inlineToMarkdown opts lnk@(Link attr txt (src, tit)) else if plain then linktext else "[" <> linktext <> "](" <> - text src <> linktitle <> ")" <> + literal src <> linktitle <> ")" <> linkAttributes opts attr inlineToMarkdown opts img@(Image attr alternate (source, tit)) | isEnabled Ext_raw_html opts && not (isEnabled Ext_link_attributes opts) && attr /= nullAttr = -- use raw HTML - (text . T.unpack . T.strip) <$> + (literal . T.strip) <$> writeHtml5String opts{ writerTemplate = Nothing } (Pandoc nullMeta [Plain [img]]) | otherwise = do plain <- asks envPlain @@ -1300,7 +1299,7 @@ inlineToMarkdown opts img@(Image attr alternate (source, tit)) inlineToMarkdown opts (Note contents) = do modify (\st -> st{ stNotes = contents : stNotes st }) st <- get - let ref = text $ writerIdentifierPrefix opts ++ show (stNoteNum st + (length $ stNotes st) - 1) + let ref = literal $ writerIdentifierPrefix opts <> tshow (stNoteNum st + (length $ stNotes st) - 1) if isEnabled Ext_footnotes opts then return $ "[^" <> ref <> "]" else return $ "[" <> ref <> "]" diff --git a/src/Text/Pandoc/Writers/Math.hs b/src/Text/Pandoc/Writers/Math.hs index 3905a3abc..feb4b6dea 100644 --- a/src/Text/Pandoc/Writers/Math.hs +++ b/src/Text/Pandoc/Writers/Math.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} module Text.Pandoc.Writers.Math ( texMathToInlines , convertMath @@ -8,6 +9,7 @@ module Text.Pandoc.Writers.Math where import Prelude +import qualified Data.Text as T import Text.Pandoc.Class import Text.Pandoc.Definition import Text.Pandoc.Logging @@ -19,7 +21,7 @@ import Text.Pandoc.Options (defaultMathJaxURL, defaultKaTeXURL) -- can't be converted. texMathToInlines :: PandocMonad m => MathType - -> String -- ^ String to parse (assumes @'\n'@ line endings) + -> T.Text -- ^ String to parse (assumes @'\n'@ line endings) -> m [Inline] texMathToInlines mt inp = do res <- convertMath writePandoc mt inp @@ -30,8 +32,8 @@ texMathToInlines mt inp = do return [mkFallback mt inp] Left il -> return [il] -mkFallback :: MathType -> String -> Inline -mkFallback mt str = Str (delim ++ str ++ delim) +mkFallback :: MathType -> T.Text -> Inline +mkFallback mt str = Str (delim <> str <> delim) where delim = case mt of DisplayMath -> "$$" InlineMath -> "$" @@ -40,7 +42,7 @@ mkFallback mt str = Str (delim ++ str ++ delim) -- issuing a warning and producing a fallback (a raw string) -- on failure. convertMath :: PandocMonad m - => (DisplayType -> [Exp] -> a) -> MathType -> String + => (DisplayType -> [Exp] -> a) -> MathType -> T.Text -> m (Either Inline a) convertMath writer mt str = case writer dt <$> readTeX str of diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index dc7b2575e..ad292200c 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -1,4 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.MediaWiki Copyright : Copyright (C) 2008-2019 John MacFarlane @@ -16,9 +18,10 @@ module Text.Pandoc.Writers.MediaWiki ( writeMediaWiki, highlightingLangs ) where import Prelude import Control.Monad.Reader import Control.Monad.State.Strict -import Data.List (intercalate) +import Data.Maybe (fromMaybe) import qualified Data.Set as Set -import Data.Text (Text, pack) +import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.ImageSize @@ -37,7 +40,7 @@ data WriterState = WriterState { data WriterReader = WriterReader { options :: WriterOptions -- Writer options - , listLevel :: String -- String at beginning of list items, e.g. "**" + , listLevel :: [Char] -- String at beginning of list items, e.g. "**" , useTags :: Bool -- True if we should use HTML tags because we're in a complex list } @@ -55,15 +58,15 @@ pandocToMediaWiki :: PandocMonad m => Pandoc -> MediaWikiWriter m Text pandocToMediaWiki (Pandoc meta blocks) = do opts <- asks options metadata <- metaToContext opts - (fmap (literal . pack . trimr) . blockListToMediaWiki) - (fmap (literal . pack . trimr) . inlineListToMediaWiki) + (fmap (literal . trimr) . blockListToMediaWiki) + (fmap (literal . trimr) . inlineListToMediaWiki) meta body <- blockListToMediaWiki blocks notesExist <- gets stNotes let notes = if notesExist then "\n<references />" else "" - let main = pack $ body ++ notes + let main = body <> notes let context = defField "body" main $ defField "toc" (writerTableOfContents opts) metadata return $ @@ -72,43 +75,43 @@ pandocToMediaWiki (Pandoc meta blocks) = do Just tpl -> render Nothing $ renderTemplate tpl context -- | Escape special characters for MediaWiki. -escapeString :: String -> String -escapeString = escapeStringForXML +escapeText :: Text -> Text +escapeText = escapeStringForXML -- | Convert Pandoc block element to MediaWiki. blockToMediaWiki :: PandocMonad m => Block -- ^ Block element - -> MediaWikiWriter m String + -> MediaWikiWriter m Text blockToMediaWiki Null = return "" blockToMediaWiki (Div attrs bs) = do contents <- blockListToMediaWiki bs - return $ render Nothing (tagWithAttrs "div" attrs) ++ "\n\n" ++ - contents ++ "\n\n" ++ "</div>" + return $ render Nothing (tagWithAttrs "div" attrs) <> "\n\n" <> + contents <> "\n\n" <> "</div>" blockToMediaWiki (Plain inlines) = inlineListToMediaWiki inlines -- title beginning with fig: indicates that the image is a figure -blockToMediaWiki (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do +blockToMediaWiki (Para [Image attr txt (src,T.stripPrefix "fig:" -> Just tit)]) = do capt <- inlineListToMediaWiki txt img <- imageToMediaWiki attr - let opt = if null tit + let opt = if T.null tit then - if null capt + if T.null capt then "" - else "alt=" ++ capt - else "alt=" ++ tit - return $ "[[" ++ - intercalate "|" - (filter (not . null) ["File:" ++ src + else "alt=" <> capt + else "alt=" <> tit + return $ "[[" <> + T.intercalate "|" + (filter (not . T.null) ["File:" <> src , "thumb" , "none" , img , opt , capt - ]) ++ + ]) <> "]]\n" blockToMediaWiki (Para inlines) = do @@ -116,8 +119,8 @@ blockToMediaWiki (Para inlines) = do lev <- asks listLevel contents <- inlineListToMediaWiki inlines return $ if tags - then "<p>" ++ contents ++ "</p>" - else contents ++ if null lev then "\n" else "" + then "<p>" <> contents <> "</p>" + else contents <> if null lev then "\n" else "" blockToMediaWiki (LineBlock lns) = blockToMediaWiki $ linesToPara lns @@ -131,109 +134,109 @@ blockToMediaWiki HorizontalRule = return "\n-----\n" blockToMediaWiki (Header level _ inlines) = do contents <- inlineListToMediaWiki inlines - let eqs = replicate level '=' - return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n" + let eqs = T.replicate level "=" + return $ eqs <> " " <> contents <> " " <> eqs <> "\n" blockToMediaWiki (CodeBlock (_,classes,_) str) = do let at = Set.fromList classes `Set.intersection` highlightingLangs return $ case Set.toList at of - [] -> "<pre" ++ (if null classes + [] -> "<pre" <> (if null classes then ">" - else " class=\"" ++ unwords classes ++ "\">") ++ - escapeString str ++ "</pre>" - (l:_) -> "<source lang=\"" ++ l ++ "\">" ++ str ++ "</source>" + else " class=\"" <> T.unwords classes <> "\">") <> + escapeText str <> "</pre>" + (l:_) -> "<source lang=\"" <> l <> "\">" <> str <> "</source>" -- note: no escape! even for <! blockToMediaWiki (BlockQuote blocks) = do contents <- blockListToMediaWiki blocks - return $ "<blockquote>" ++ contents ++ "</blockquote>" + return $ "<blockquote>" <> contents <> "</blockquote>" blockToMediaWiki (Table capt aligns widths headers rows') = do caption <- if null capt then return "" else do c <- inlineListToMediaWiki capt - return $ "|+ " ++ trimr c ++ "\n" + return $ "|+ " <> trimr c <> "\n" let headless = all null headers let allrows = if headless then rows' else headers:rows' - tableBody <- intercalate "|-\n" `fmap` + tableBody <- T.intercalate "|-\n" `fmap` mapM (tableRowToMediaWiki headless aligns widths) (zip [1..] allrows) - return $ "{|\n" ++ caption ++ tableBody ++ "|}\n" + return $ "{|\n" <> caption <> tableBody <> "|}\n" blockToMediaWiki x@(BulletList items) = do tags <- fmap (|| not (isSimpleList x)) $ asks useTags if tags then do contents <- local (\ s -> s { useTags = True }) $ mapM listItemToMediaWiki items - return $ "<ul>\n" ++ vcat contents ++ "</ul>\n" + return $ "<ul>\n" <> vcat contents <> "</ul>\n" else do lev <- asks listLevel - contents <- local (\s -> s { listLevel = listLevel s ++ "*" }) $ mapM listItemToMediaWiki items - return $ vcat contents ++ if null lev then "\n" else "" + contents <- local (\s -> s { listLevel = listLevel s <> "*" }) $ mapM listItemToMediaWiki items + return $ vcat contents <> if null lev then "\n" else "" blockToMediaWiki x@(OrderedList attribs items) = do tags <- fmap (|| not (isSimpleList x)) $ asks useTags if tags then do contents <- local (\s -> s { useTags = True }) $ mapM listItemToMediaWiki items - return $ "<ol" ++ listAttribsToString attribs ++ ">\n" ++ vcat contents ++ "</ol>\n" + return $ "<ol" <> listAttribsToText attribs <> ">\n" <> vcat contents <> "</ol>\n" else do lev <- asks listLevel - contents <- local (\s -> s { listLevel = listLevel s ++ "#" }) $ mapM listItemToMediaWiki items - return $ vcat contents ++ if null lev then "\n" else "" + contents <- local (\s -> s { listLevel = listLevel s <> "#" }) $ mapM listItemToMediaWiki items + return $ vcat contents <> if null lev then "\n" else "" blockToMediaWiki x@(DefinitionList items) = do tags <- fmap (|| not (isSimpleList x)) $ asks useTags if tags then do contents <- local (\s -> s { useTags = True }) $ mapM definitionListItemToMediaWiki items - return $ "<dl>\n" ++ vcat contents ++ "</dl>\n" + return $ "<dl>\n" <> vcat contents <> "</dl>\n" else do lev <- asks listLevel - contents <- local (\s -> s { listLevel = listLevel s ++ ";" }) $ mapM definitionListItemToMediaWiki items - return $ vcat contents ++ if null lev then "\n" else "" + contents <- local (\s -> s { listLevel = listLevel s <> ";" }) $ mapM definitionListItemToMediaWiki items + return $ vcat contents <> if null lev then "\n" else "" -- Auxiliary functions for lists: -- | Convert ordered list attributes to HTML attribute string -listAttribsToString :: ListAttributes -> String -listAttribsToString (startnum, numstyle, _) = - let numstyle' = camelCaseToHyphenated $ show numstyle +listAttribsToText :: ListAttributes -> Text +listAttribsToText (startnum, numstyle, _) = + let numstyle' = camelCaseToHyphenated $ tshow numstyle in (if startnum /= 1 - then " start=\"" ++ show startnum ++ "\"" - else "") ++ + then " start=\"" <> tshow startnum <> "\"" + else "") <> (if numstyle /= DefaultStyle - then " style=\"list-style-type: " ++ numstyle' ++ ";\"" + then " style=\"list-style-type: " <> numstyle' <> ";\"" else "") -- | Convert bullet or ordered list item (list of blocks) to MediaWiki. -listItemToMediaWiki :: PandocMonad m => [Block] -> MediaWikiWriter m String +listItemToMediaWiki :: PandocMonad m => [Block] -> MediaWikiWriter m Text listItemToMediaWiki items = do contents <- blockListToMediaWiki items tags <- asks useTags if tags - then return $ "<li>" ++ contents ++ "</li>" + then return $ "<li>" <> contents <> "</li>" else do marker <- asks listLevel - return $ marker ++ " " ++ contents + return $ T.pack marker <> " " <> contents -- | Convert definition list item (label, list of blocks) to MediaWiki. definitionListItemToMediaWiki :: PandocMonad m => ([Inline],[[Block]]) - -> MediaWikiWriter m String + -> MediaWikiWriter m Text definitionListItemToMediaWiki (label, items) = do labelText <- inlineListToMediaWiki label contents <- mapM blockListToMediaWiki items tags <- asks useTags if tags - then return $ "<dt>" ++ labelText ++ "</dt>\n" ++ - intercalate "\n" (map (\d -> "<dd>" ++ d ++ "</dd>") contents) + then return $ "<dt>" <> labelText <> "</dt>\n" <> + T.intercalate "\n" (map (\d -> "<dd>" <> d <> "</dd>") contents) else do marker <- asks listLevel - return $ marker ++ " " ++ labelText ++ "\n" ++ - intercalate "\n" (map (\d -> init marker ++ ": " ++ d) contents) + return $ T.pack marker <> " " <> labelText <> "\n" <> + T.intercalate "\n" (map (\d -> T.pack (init marker) <> ": " <> d) contents) -- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed. isSimpleList :: Block -> Bool @@ -271,8 +274,8 @@ isPlainOrPara (Para _) = True isPlainOrPara _ = False -- | Concatenates strings with line breaks between them. -vcat :: [String] -> String -vcat = intercalate "\n" +vcat :: [Text] -> Text +vcat = T.intercalate "\n" -- Auxiliary functions for tables: @@ -281,119 +284,119 @@ tableRowToMediaWiki :: PandocMonad m -> [Alignment] -> [Double] -> (Int, [[Block]]) - -> MediaWikiWriter m String + -> MediaWikiWriter m Text tableRowToMediaWiki headless alignments widths (rownum, cells) = do cells' <- mapM (tableCellToMediaWiki headless rownum) $ zip3 alignments widths cells - return $ unlines cells' + return $ T.unlines cells' tableCellToMediaWiki :: PandocMonad m => Bool -> Int -> (Alignment, Double, [Block]) - -> MediaWikiWriter m String + -> MediaWikiWriter m Text tableCellToMediaWiki headless rownum (alignment, width, bs) = do contents <- blockListToMediaWiki bs let marker = if rownum == 1 && not headless then "!" else "|" - let percent w = show (truncate (100*w) :: Integer) ++ "%" - let attrs = ["align=" ++ show (alignmentToString alignment) | - alignment /= AlignDefault && alignment /= AlignLeft] ++ - ["width=\"" ++ percent width ++ "\"" | + let percent w = tshow (truncate (100*w) :: Integer) <> "%" + let attrs = ["align=" <> tshow (alignmentToText alignment) | + alignment /= AlignDefault && alignment /= AlignLeft] <> + ["width=\"" <> percent width <> "\"" | width /= 0.0 && rownum == 1] let attr = if null attrs then "" - else unwords attrs ++ "|" + else T.unwords attrs <> "|" let sep = case bs of [Plain _] -> " " [Para _] -> " " [] -> "" _ -> "\n" - return $ marker ++ attr ++ sep ++ trimr contents + return $ marker <> attr <> sep <> trimr contents -alignmentToString :: Alignment -> String -alignmentToString alignment = case alignment of +alignmentToText :: Alignment -> Text +alignmentToText alignment = case alignment of AlignLeft -> "left" AlignRight -> "right" AlignCenter -> "center" AlignDefault -> "left" -imageToMediaWiki :: PandocMonad m => Attr -> MediaWikiWriter m String +imageToMediaWiki :: PandocMonad m => Attr -> MediaWikiWriter m Text imageToMediaWiki attr = do opts <- gets stOptions let (_, cls, _) = attr toPx = fmap (showInPixel opts) . checkPct checkPct (Just (Percent _)) = Nothing checkPct maybeDim = maybeDim - go (Just w) Nothing = w ++ "px" - go (Just w) (Just h) = w ++ "x" ++ h ++ "px" - go Nothing (Just h) = "x" ++ h ++ "px" + go (Just w) Nothing = w <> "px" + go (Just w) (Just h) = w <> "x" <> h <> "px" + go Nothing (Just h) = "x" <> h <> "px" go Nothing Nothing = "" dims = go (toPx $ dimension Width attr) (toPx $ dimension Height attr) classes = if null cls then "" - else "class=" ++ unwords cls - return $ intercalate "|" $ filter (not . null) [dims, classes] + else "class=" <> T.unwords cls + return $ T.intercalate "|" $ filter (not . T.null) [dims, classes] -- | Convert list of Pandoc block elements to MediaWiki. blockListToMediaWiki :: PandocMonad m => [Block] -- ^ List of block elements - -> MediaWikiWriter m String + -> MediaWikiWriter m Text blockListToMediaWiki blocks = fmap vcat $ mapM blockToMediaWiki blocks -- | Convert list of Pandoc inline elements to MediaWiki. -inlineListToMediaWiki :: PandocMonad m => [Inline] -> MediaWikiWriter m String +inlineListToMediaWiki :: PandocMonad m => [Inline] -> MediaWikiWriter m Text inlineListToMediaWiki lst = - fmap concat $ mapM inlineToMediaWiki lst + fmap T.concat $ mapM inlineToMediaWiki lst -- | Convert Pandoc inline element to MediaWiki. -inlineToMediaWiki :: PandocMonad m => Inline -> MediaWikiWriter m String +inlineToMediaWiki :: PandocMonad m => Inline -> MediaWikiWriter m Text inlineToMediaWiki (Span attrs ils) = do contents <- inlineListToMediaWiki ils - return $ render Nothing (tagWithAttrs "span" attrs) ++ contents ++ "</span>" + return $ render Nothing (tagWithAttrs "span" attrs) <> contents <> "</span>" inlineToMediaWiki (Emph lst) = do contents <- inlineListToMediaWiki lst - return $ "''" ++ contents ++ "''" + return $ "''" <> contents <> "''" inlineToMediaWiki (Strong lst) = do contents <- inlineListToMediaWiki lst - return $ "'''" ++ contents ++ "'''" + return $ "'''" <> contents <> "'''" inlineToMediaWiki (Strikeout lst) = do contents <- inlineListToMediaWiki lst - return $ "<s>" ++ contents ++ "</s>" + return $ "<s>" <> contents <> "</s>" inlineToMediaWiki (Superscript lst) = do contents <- inlineListToMediaWiki lst - return $ "<sup>" ++ contents ++ "</sup>" + return $ "<sup>" <> contents <> "</sup>" inlineToMediaWiki (Subscript lst) = do contents <- inlineListToMediaWiki lst - return $ "<sub>" ++ contents ++ "</sub>" + return $ "<sub>" <> contents <> "</sub>" inlineToMediaWiki (SmallCaps lst) = inlineListToMediaWiki lst inlineToMediaWiki (Quoted SingleQuote lst) = do contents <- inlineListToMediaWiki lst - return $ "\8216" ++ contents ++ "\8217" + return $ "\8216" <> contents <> "\8217" inlineToMediaWiki (Quoted DoubleQuote lst) = do contents <- inlineListToMediaWiki lst - return $ "\8220" ++ contents ++ "\8221" + return $ "\8220" <> contents <> "\8221" inlineToMediaWiki (Cite _ lst) = inlineListToMediaWiki lst inlineToMediaWiki (Code _ str) = - return $ "<code>" ++ escapeString str ++ "</code>" + return $ "<code>" <> escapeText str <> "</code>" -inlineToMediaWiki (Str str) = return $ escapeString str +inlineToMediaWiki (Str str) = return $ escapeText str inlineToMediaWiki (Math mt str) = return $ - "<math display=\"" ++ - (if mt == DisplayMath then "block" else "inline") ++ - "\">" ++ str ++ "</math>" + "<math display=\"" <> + (if mt == DisplayMath then "block" else "inline") <> + "\">" <> str <> "</math>" -- note: str should NOT be escaped inlineToMediaWiki il@(RawInline f str) @@ -420,35 +423,34 @@ inlineToMediaWiki (Link _ txt (src, _)) = do case txt of [Str s] | isURI src && escapeURI s == src -> return src _ -> return $ if isURI src - then "[" ++ src ++ " " ++ label ++ "]" - else "[[" ++ src' ++ "|" ++ label ++ "]]" - where src' = case src of - '/':xs -> xs -- with leading / it's a - _ -> src -- link to a help page + then "[" <> src <> " " <> label <> "]" + else "[[" <> src' <> "|" <> label <> "]]" + -- with leading / it's a link to a help page + where src' = fromMaybe src $ T.stripPrefix "/" src inlineToMediaWiki (Image attr alt (source, tit)) = do img <- imageToMediaWiki attr alt' <- inlineListToMediaWiki alt - let txt = if null alt' - then if null tit + let txt = if T.null alt' + then if T.null tit then "" else tit else alt' - return $ "[[" ++ - intercalate "|" - (filter (not . null) - [ "File:" ++ source + return $ "[[" <> + T.intercalate "|" + (filter (not . T.null) + [ "File:" <> source , img , txt - ]) ++ "]]" + ]) <> "]]" inlineToMediaWiki (Note contents) = do contents' <- blockListToMediaWiki contents modify (\s -> s { stNotes = True }) - return $ "<ref>" ++ stripTrailingNewlines contents' ++ "</ref>" + return $ "<ref>" <> stripTrailingNewlines contents' <> "</ref>" -- note - does not work for notes with multiple blocks -highlightingLangs :: Set.Set String +highlightingLangs :: Set.Set Text highlightingLangs = Set.fromList [ "abap", "abl", diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs index 634255604..7e0a58134 100644 --- a/src/Text/Pandoc/Writers/Ms.hs +++ b/src/Text/Pandoc/Writers/Ms.hs @@ -1,4 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.Ms Copyright : Copyright (C) 2007-2019 John MacFarlane @@ -21,7 +23,7 @@ TODO: module Text.Pandoc.Writers.Ms ( writeMs ) where import Prelude import Control.Monad.State.Strict -import Data.Char (isLower, isUpper, toUpper, ord) +import Data.Char (isLower, isUpper, ord) import Data.List (intercalate, intersperse) import qualified Data.Map as Map import Data.Maybe (catMaybes, fromMaybe) @@ -75,32 +77,33 @@ pandocToMs opts (Pandoc meta blocks) = do let context = defField "body" main $ defField "has-inline-math" hasInlineMath $ defField "hyphenate" True - $ defField "pandoc-version" (T.pack pandocVersion) + $ defField "pandoc-version" pandocVersion $ defField "toc" (writerTableOfContents opts) - $ defField "title-meta" (T.pack titleMeta) - $ defField "author-meta" (T.pack $ intercalate "; " authorsMeta) + $ defField "title-meta" titleMeta + $ defField "author-meta" (T.intercalate "; " authorsMeta) $ defField "highlighting-macros" highlightingMacros metadata return $ render colwidth $ case writerTemplate opts of Nothing -> main Just tpl -> renderTemplate tpl context -escapeStr :: WriterOptions -> String -> String +escapeStr :: WriterOptions -> Text -> Text escapeStr opts = escapeString (if writerPreferAscii opts then AsciiOnly else AllowUTF8) -escapeUri :: String -> String -escapeUri = escapeURIString (\c -> c /= '@' && isAllowedInURI c) +escapeUri :: Text -> Text +escapeUri = T.pack . escapeURIString (\c -> c /= '@' && isAllowedInURI c) . T.unpack -toSmallCaps :: WriterOptions -> String -> String -toSmallCaps _ [] = [] -toSmallCaps opts (c:cs) - | isLower c = let (lowers,rest) = span isLower (c:cs) - in "\\s-2" ++ escapeStr opts (map toUpper lowers) ++ - "\\s0" ++ toSmallCaps opts rest - | isUpper c = let (uppers,rest) = span isUpper (c:cs) - in escapeStr opts uppers ++ toSmallCaps opts rest - | otherwise = escapeStr opts [c] ++ toSmallCaps opts cs +toSmallCaps :: WriterOptions -> Text -> Text +toSmallCaps opts s = case T.uncons s of + Nothing -> "" + Just (c, cs) + | isLower c -> let (lowers,rest) = T.span isLower s + in "\\s-2" <> escapeStr opts (T.toUpper lowers) <> + "\\s0" <> toSmallCaps opts rest + | isUpper c -> let (uppers,rest) = T.span isUpper s + in escapeStr opts uppers <> toSmallCaps opts rest + | otherwise -> escapeStr opts (T.singleton c) <> toSmallCaps opts cs -- We split inline lists into sentences, and print one sentence per -- line. roff treats the line-ending period differently. @@ -112,11 +115,11 @@ blockToMs :: PandocMonad m -> MS m (Doc Text) blockToMs _ Null = return empty blockToMs opts (Div (ident,_,_) bs) = do - let anchor = if null ident + let anchor = if T.null ident then empty else nowrap $ - text ".pdfhref M " - <> doubleQuotes (text (toAscii ident)) + literal ".pdfhref M " + <> doubleQuotes (literal (toAscii ident)) setFirstPara res <- blockListToMs opts bs setFirstPara @@ -124,38 +127,38 @@ blockToMs opts (Div (ident,_,_) bs) = do blockToMs opts (Plain inlines) = liftM vcat $ mapM (inlineListToMs' opts) $ splitSentences inlines blockToMs opts (Para [Image attr alt (src,_tit)]) - | let ext = takeExtension src in (ext == ".ps" || ext == ".eps") = do + | let ext = takeExtension (T.unpack src) in (ext == ".ps" || ext == ".eps") = do let (mbW,mbH) = (inPoints opts <$> dimension Width attr, inPoints opts <$> dimension Height attr) let sizeAttrs = case (mbW, mbH) of (Just wp, Nothing) -> space <> doubleQuotes - (text (show (floor wp :: Int) ++ "p")) + (literal (tshow (floor wp :: Int) <> "p")) (Just wp, Just hp) -> space <> doubleQuotes - (text (show (floor wp :: Int) ++ "p")) <> + (literal (tshow (floor wp :: Int) <> "p")) <> space <> - doubleQuotes (text (show (floor hp :: Int))) + doubleQuotes (literal (tshow (floor hp :: Int))) _ -> empty capt <- inlineListToMs' opts alt - return $ nowrap (text ".PSPIC -C " <> - doubleQuotes (text (escapeStr opts src)) <> + return $ nowrap (literal ".PSPIC -C " <> + doubleQuotes (literal (escapeStr opts src)) <> sizeAttrs) $$ - text ".ce 1000" $$ + literal ".ce 1000" $$ capt $$ - text ".ce 0" + literal ".ce 0" blockToMs opts (Para inlines) = do firstPara <- gets stFirstPara resetFirstPara contents <- liftM vcat $ mapM (inlineListToMs' opts) $ splitSentences inlines - return $ text (if firstPara then ".LP" else ".PP") $$ contents + return $ literal (if firstPara then ".LP" else ".PP") $$ contents blockToMs _ b@(RawBlock f str) - | f == Format "ms" = return $ text str + | f == Format "ms" = return $ literal str | otherwise = do report $ BlockNotRendered b return empty blockToMs _ HorizontalRule = do resetFirstPara - return $ text ".HLINE" + return $ literal ".HLINE" blockToMs opts (Header level (ident,classes,_) inlines) = do setFirstPara modify $ \st -> st{ stInHeader = True } @@ -165,33 +168,33 @@ blockToMs opts (Header level (ident,classes,_) inlines) = do "unnumbered" `notElem` classes then (".NH", "\\*[SN]") else (".SH", "") - let anchor = if null ident + let anchor = if T.null ident then empty else nowrap $ - text ".pdfhref M " - <> doubleQuotes (text (toAscii ident)) - let bookmark = text ".pdfhref O " <> text (show level ++ " ") <> - doubleQuotes (text $ secnum ++ - (if null secnum + literal ".pdfhref M " + <> doubleQuotes (literal (toAscii ident)) + let bookmark = literal ".pdfhref O " <> literal (tshow level <> " ") <> + doubleQuotes (literal $ secnum <> + (if T.null secnum then "" - else " ") ++ + else " ") <> escapeStr opts (stringify inlines)) - let backlink = nowrap (text ".pdfhref L -D " <> - doubleQuotes (text (toAscii ident)) <> space <> text "\\") <> cr <> - text " -- " + let backlink = nowrap (literal ".pdfhref L -D " <> + doubleQuotes (literal (toAscii ident)) <> space <> literal "\\") <> cr <> + literal " -- " let tocEntry = if writerTableOfContents opts && level <= writerTOCDepth opts - then text ".XS" + then literal ".XS" $$ backlink <> doubleQuotes ( - nowrap (text (replicate level '\t') <> - (if null secnum + nowrap (literal (T.replicate level "\t") <> + (if T.null secnum then empty - else text secnum <> text "\\~\\~") + else literal secnum <> literal "\\~\\~") <> contents)) - $$ text ".XE" + $$ literal ".XE" else empty modify $ \st -> st{ stFirstPara = True } - return $ (text heading <> space <> text (show level)) $$ + return $ (literal heading <> space <> literal (tshow level)) $$ contents $$ bookmark $$ anchor $$ @@ -200,12 +203,12 @@ blockToMs opts (CodeBlock attr str) = do hlCode <- highlightCode opts attr str setFirstPara return $ - text ".IP" $$ - text ".nf" $$ - text "\\f[C]" $$ + literal ".IP" $$ + literal ".nf" $$ + literal "\\f[C]" $$ hlCode $$ - text "\\f[]" $$ - text ".fi" + literal "\\f[]" $$ + literal ".fi" blockToMs opts (LineBlock ls) = do setFirstPara -- use .LP, see #5588 blockToMs opts $ Para $ intercalate [LineBreak] ls @@ -213,7 +216,7 @@ blockToMs opts (BlockQuote blocks) = do setFirstPara contents <- blockListToMs opts blocks setFirstPara - return $ text ".RS" $$ contents $$ text ".RE" + return $ literal ".RS" $$ contents $$ literal ".RE" blockToMs opts (Table caption alignments widths headers rows) = let aligncode AlignLeft = "l" aligncode AlignRight = "r" @@ -223,15 +226,15 @@ blockToMs opts (Table caption alignments widths headers rows) = caption' <- inlineListToMs' opts caption let iwidths = if all (== 0) widths then repeat "" - else map (printf "w(%0.1fn)" . (70 *)) widths + else map (T.pack . printf "w(%0.1fn)" . (70 *)) widths -- 78n default width - 8n indent = 70n - let coldescriptions = text $ unwords - (zipWith (\align width -> aligncode align ++ width) - alignments iwidths) ++ "." + let coldescriptions = literal $ T.unwords + (zipWith (\align width -> aligncode align <> width) + alignments iwidths) <> "." colheadings <- mapM (blockListToMs opts) headers - let makeRow cols = text "T{" $$ - vcat (intersperse (text "T}\tT{") cols) $$ - text "T}" + let makeRow cols = literal "T{" $$ + vcat (intersperse (literal "T}\tT{") cols) $$ + literal "T}" let colheadings' = if all null headers then empty else makeRow colheadings $$ char '_' @@ -239,9 +242,9 @@ blockToMs opts (Table caption alignments widths headers rows) = cols <- mapM (blockListToMs opts) row return $ makeRow cols) rows setFirstPara - return $ text ".PP" $$ caption' $$ - text ".TS" $$ text "delim(@@) tab(\t);" $$ coldescriptions $$ - colheadings' $$ vcat body $$ text ".TE" + return $ literal ".PP" $$ caption' $$ + literal ".TS" $$ literal "delim(@@) tab(\t);" $$ coldescriptions $$ + colheadings' $$ vcat body $$ literal ".TE" blockToMs opts (BulletList items) = do contents <- mapM (bulletListItemToMs opts) items @@ -250,7 +253,7 @@ blockToMs opts (BulletList items) = do blockToMs opts (OrderedList attribs items) = do let markers = take (length items) $ orderedListMarkers attribs let indent = 2 + - maximum (map length markers) + maximum (map T.length markers) contents <- mapM (\(num, item) -> orderedListItemToMs opts num indent item) $ zip markers items setFirstPara @@ -268,20 +271,20 @@ bulletListItemToMs opts (Para first:rest) = bulletListItemToMs opts (Plain first:rest) = do first' <- blockToMs opts (Plain first) rest' <- blockListToMs opts rest - let first'' = text ".IP \\[bu] 3" $$ first' + let first'' = literal ".IP \\[bu] 3" $$ first' let rest'' = if null rest then empty - else text ".RS 3" $$ rest' $$ text ".RE" + else literal ".RS 3" $$ rest' $$ literal ".RE" return (first'' $$ rest'') bulletListItemToMs opts (first:rest) = do first' <- blockToMs opts first rest' <- blockListToMs opts rest - return $ text "\\[bu] .RS 3" $$ first' $$ rest' $$ text ".RE" + return $ literal "\\[bu] .RS 3" $$ first' $$ rest' $$ literal ".RE" -- | Convert ordered list item (a list of blocks) to ms. orderedListItemToMs :: PandocMonad m => WriterOptions -- ^ options - -> String -- ^ order marker for list item + -> Text -- ^ order marker for list item -> Int -- ^ number of spaces to indent -> [Block] -- ^ list item (list of blocks) -> MS m (Doc Text) @@ -291,12 +294,12 @@ orderedListItemToMs opts num indent (Para first:rest) = orderedListItemToMs opts num indent (first:rest) = do first' <- blockToMs opts first rest' <- blockListToMs opts rest - let num' = printf ("%" ++ show (indent - 1) ++ "s") num - let first'' = text (".IP \"" ++ num' ++ "\" " ++ show indent) $$ first' + let num' = T.pack $ printf ("%" <> show (indent - 1) <> "s") num + let first'' = literal (".IP \"" <> num' <> "\" " <> tshow indent) $$ first' let rest'' = if null rest then empty - else text ".RS " <> text (show indent) $$ - rest' $$ text ".RE" + else literal ".RS " <> literal (tshow indent) $$ + rest' $$ literal ".RE" return $ first'' $$ rest'' -- | Convert definition list item (label, list of blocks) to ms. @@ -317,8 +320,8 @@ definitionListItemToMs opts (label, defs) = do rest' <- liftM vcat $ mapM (\item -> blockToMs opts item) rest first' <- blockToMs opts first - return $ first' $$ text ".RS" $$ rest' $$ text ".RE" - return $ nowrap (text ".IP " <> doubleQuotes labelText) $$ contents + return $ first' $$ literal ".RS" $$ rest' $$ literal ".RE" + return $ nowrap (literal ".IP " <> doubleQuotes labelText) $$ contents -- | Convert list of Pandoc block elements to ms. blockListToMs :: PandocMonad m @@ -353,13 +356,13 @@ inlineToMs opts (Strikeout lst) = do contents <- inlineListToMs opts lst -- we use grey color instead of strikeout, which seems quite -- hard to do in roff for arbitrary bits of text - return $ text "\\m[strikecolor]" <> contents <> text "\\m[]" + return $ literal "\\m[strikecolor]" <> contents <> literal "\\m[]" inlineToMs opts (Superscript lst) = do contents <- inlineListToMs opts lst - return $ text "\\*{" <> contents <> text "\\*}" + return $ literal "\\*{" <> contents <> literal "\\*}" inlineToMs opts (Subscript lst) = do contents <- inlineListToMs opts lst - return $ text "\\*<" <> contents <> text "\\*>" + return $ literal "\\*<" <> contents <> literal "\\*>" inlineToMs opts (SmallCaps lst) = do -- see https://lists.gnu.org/archive/html/groff/2015-01/msg00016.html modify $ \st -> st{ stSmallCaps = not (stSmallCaps st) } @@ -371,40 +374,40 @@ inlineToMs opts (Quoted SingleQuote lst) = do return $ char '`' <> contents <> char '\'' inlineToMs opts (Quoted DoubleQuote lst) = do contents <- inlineListToMs opts lst - return $ text "\\[lq]" <> contents <> text "\\[rq]" + return $ literal "\\[lq]" <> contents <> literal "\\[rq]" inlineToMs opts (Cite _ lst) = inlineListToMs opts lst inlineToMs opts (Code attr str) = do hlCode <- highlightCode opts attr str withFontFeature 'C' (return hlCode) inlineToMs opts (Str str) = do - let shim = case str of - '.':_ -> afterBreak (T.pack "\\&") - _ -> empty + let shim = case T.uncons str of + Just ('.',_) -> afterBreak "\\&" + _ -> empty smallcaps <- gets stSmallCaps if smallcaps - then return $ shim <> text (toSmallCaps opts str) - else return $ shim <> text (escapeStr opts str) + then return $ shim <> literal (toSmallCaps opts str) + else return $ shim <> literal (escapeStr opts str) inlineToMs opts (Math InlineMath str) = do modify $ \st -> st{ stHasInlineMath = True } res <- convertMath writeEqn InlineMath str case res of Left il -> inlineToMs opts il - Right r -> return $ text "@" <> text r <> text "@" + Right r -> return $ literal "@" <> literal r <> literal "@" inlineToMs opts (Math DisplayMath str) = do res <- convertMath writeEqn InlineMath str case res of Left il -> do contents <- inlineToMs opts il - return $ cr <> text ".RS" $$ contents $$ text ".RE" + return $ cr <> literal ".RS" $$ contents $$ literal ".RE" Right r -> return $ - cr <> text ".EQ" $$ text r $$ text ".EN" <> cr + cr <> literal ".EQ" $$ literal r $$ literal ".EN" <> cr inlineToMs _ il@(RawInline f str) - | f == Format "ms" = return $ text str + | f == Format "ms" = return $ literal str | otherwise = do report $ InlineNotRendered il return empty -inlineToMs _ LineBreak = return $ cr <> text ".br" <> cr +inlineToMs _ LineBreak = return $ cr <> literal ".br" <> cr inlineToMs opts SoftBreak = handleNotes opts $ case writerWrapText opts of @@ -412,27 +415,27 @@ inlineToMs opts SoftBreak = WrapNone -> space WrapPreserve -> cr inlineToMs opts Space = handleNotes opts space -inlineToMs opts (Link _ txt ('#':ident, _)) = do +inlineToMs opts (Link _ txt (T.uncons -> Just ('#',ident), _)) = do -- internal link contents <- inlineListToMs' opts $ map breakToSpace txt - return $ text "\\c" <> cr <> nowrap (text ".pdfhref L -D " <> - doubleQuotes (text (toAscii ident)) <> text " -A " <> - doubleQuotes (text "\\c") <> space <> text "\\") <> cr <> - text " -- " <> doubleQuotes (nowrap contents) <> cr <> text "\\&" + return $ literal "\\c" <> cr <> nowrap (literal ".pdfhref L -D " <> + doubleQuotes (literal (toAscii ident)) <> literal " -A " <> + doubleQuotes (literal "\\c") <> space <> literal "\\") <> cr <> + literal " -- " <> doubleQuotes (nowrap contents) <> cr <> literal "\\&" inlineToMs opts (Link _ txt (src, _)) = do -- external link contents <- inlineListToMs' opts $ map breakToSpace txt - return $ text "\\c" <> cr <> nowrap (text ".pdfhref W -D " <> - doubleQuotes (text (escapeUri src)) <> text " -A " <> - doubleQuotes (text "\\c") <> space <> text "\\") <> cr <> - text " -- " <> doubleQuotes (nowrap contents) <> cr <> text "\\&" + return $ literal "\\c" <> cr <> nowrap (literal ".pdfhref W -D " <> + doubleQuotes (literal (escapeUri src)) <> literal " -A " <> + doubleQuotes (literal "\\c") <> space <> literal "\\") <> cr <> + literal " -- " <> doubleQuotes (nowrap contents) <> cr <> literal "\\&" inlineToMs opts (Image _ alternate (_, _)) = - return $ char '[' <> text "IMAGE: " <> - text (escapeStr opts (stringify alternate)) + return $ char '[' <> literal "IMAGE: " <> + literal (escapeStr opts (stringify alternate)) <> char ']' inlineToMs _ (Note contents) = do modify $ \st -> st{ stNotes = contents : stNotes st } - return $ text "\\**" + return $ literal "\\**" handleNotes :: PandocMonad m => WriterOptions -> Doc Text -> MS m (Doc Text) handleNotes opts fallback = do @@ -451,7 +454,7 @@ handleNote opts bs = do (Para ils : rest) -> Plain ils : rest _ -> bs contents <- blockListToMs opts bs' - return $ cr <> text ".FS" $$ contents $$ text ".FE" <> cr + return $ cr <> literal ".FS" $$ contents $$ literal ".FE" <> cr setFirstPara :: PandocMonad m => MS m () setFirstPara = modify $ \st -> st{ stFirstPara = True } @@ -467,38 +470,38 @@ breakToSpace x = x -- Highlighting styleToMs :: Style -> Doc Text -styleToMs sty = vcat $ colordefs ++ map (toMacro sty) alltoktypes +styleToMs sty = vcat $ colordefs <> map (toMacro sty) alltoktypes where alltoktypes = enumFromTo KeywordTok NormalTok colordefs = map toColorDef allcolors - toColorDef c = text (".defcolor " ++ - hexColor c ++ " rgb #" ++ hexColor c) + toColorDef c = literal (".defcolor " <> + hexColor c <> " rgb #" <> hexColor c) allcolors = catMaybes $ ordNub $ [defaultColor sty, backgroundColor sty, - lineNumberColor sty, lineNumberBackgroundColor sty] ++ + lineNumberColor sty, lineNumberBackgroundColor sty] <> concatMap (colorsForToken. snd) (Map.toList (tokenStyles sty)) colorsForToken ts = [tokenColor ts, tokenBackground ts] -hexColor :: Color -> String -hexColor (RGB r g b) = printf "%02x%02x%02x" r g b +hexColor :: Color -> Text +hexColor (RGB r g b) = T.pack $ printf "%02x%02x%02x" r g b toMacro :: Style -> TokenType -> Doc Text toMacro sty toktype = - nowrap (text ".ds " <> text (show toktype) <> text " " <> + nowrap (literal ".ds " <> literal (tshow toktype) <> literal " " <> setbg <> setcolor <> setfont <> - text "\\\\$1" <> + literal "\\\\$1" <> resetfont <> resetcolor <> resetbg) where setcolor = maybe empty fgcol tokCol - resetcolor = maybe empty (const $ text "\\\\m[]") tokCol + resetcolor = maybe empty (const $ literal "\\\\m[]") tokCol setbg = empty -- maybe empty bgcol tokBg resetbg = empty -- maybe empty (const $ text "\\\\M[]") tokBg - fgcol c = text $ "\\\\m[" ++ hexColor c ++ "]" - -- bgcol c = text $ "\\\\M[" ++ hexColor c ++ "]" + fgcol c = literal $ "\\\\m[" <> hexColor c <> "]" + -- bgcol c = literal $ "\\\\M[" <> hexColor c <> "]" setfont = if tokBold || tokItalic - then text $ "\\\\f[C" ++ ['B' | tokBold] ++ - ['I' | tokItalic] ++ "]" + then literal $ T.pack $ "\\\\f[C" <> ['B' | tokBold] <> + ['I' | tokItalic] <> "]" else empty resetfont = if tokBold || tokItalic - then text "\\\\f[C]" + then literal "\\\\f[C]" else empty tokSty = Map.lookup toktype (tokenStyles sty) tokCol = (tokSty >>= tokenColor) `mplus` defaultColor sty @@ -513,24 +516,24 @@ msFormatter :: WriterOptions -> FormatOptions -> [SourceLine] -> Doc Text msFormatter opts _fmtopts = vcat . map fmtLine where fmtLine = hcat . map fmtToken - fmtToken (toktype, tok) = text "\\*" <> - brackets (text (show toktype) <> text " \"" - <> text (escapeStr opts (T.unpack tok)) <> text "\"") + fmtToken (toktype, tok) = literal "\\*" <> + brackets (literal (tshow toktype) <> literal " \"" + <> literal (escapeStr opts tok) <> literal "\"") -highlightCode :: PandocMonad m => WriterOptions -> Attr -> String -> MS m (Doc Text) +highlightCode :: PandocMonad m => WriterOptions -> Attr -> Text -> MS m (Doc Text) highlightCode opts attr str = case highlight (writerSyntaxMap opts) (msFormatter opts) attr str of Left msg -> do - unless (null msg) $ report $ CouldNotHighlight msg - return $ text (escapeStr opts str) + unless (T.null msg) $ report $ CouldNotHighlight msg + return $ literal (escapeStr opts str) Right h -> do modify (\st -> st{ stHighlighting = True }) return h -- This is used for PDF anchors. -toAscii :: String -> String -toAscii = concatMap +toAscii :: Text -> Text +toAscii = T.concatMap (\c -> case toAsciiChar c of - Nothing -> '_':'u':show (ord c) ++ "_" - Just '/' -> '_':'u':show (ord c) ++ "_" -- see #4515 - Just c' -> [c']) + Nothing -> "_u" <> tshow (ord c) <> "_" + Just '/' -> "_u" <> tshow (ord c) <> "_" -- see #4515 + Just c' -> T.singleton c') diff --git a/src/Text/Pandoc/Writers/Muse.hs b/src/Text/Pandoc/Writers/Muse.hs index c6ff70f5b..b70345b3a 100644 --- a/src/Text/Pandoc/Writers/Muse.hs +++ b/src/Text/Pandoc/Writers/Muse.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.Muse Copyright : Copyright (C) 2017-2019 Alexander Krotov @@ -31,7 +32,7 @@ import Control.Monad.Reader import Control.Monad.State.Strict import Data.Char (isAlphaNum, isAsciiLower, isAsciiUpper, isDigit, isSpace) import Data.Default -import Data.List (intersperse, isInfixOf, transpose) +import Data.List (intersperse, transpose) import qualified Data.Set as Set import qualified Data.Text as T import Data.Text (Text) @@ -66,7 +67,7 @@ data WriterEnv = data WriterState = WriterState { stNotes :: Notes , stNoteNum :: Int - , stIds :: Set.Set String + , stIds :: Set.Set Text , stUseTags :: Bool -- ^ Use tags for emphasis, for example because previous character is a letter } @@ -161,7 +162,7 @@ simpleTable caption headers rows = do rows' <- mapM (mapM blockListToMuse) rows let widthsInChars = maximum . map offset <$> transpose (headers' : rows') let hpipeBlocks sep blocks = hcat $ intersperse sep' blocks - where sep' = lblock (length sep) $ text sep + where sep' = lblock (T.length sep) $ literal sep let makeRow sep = hpipeBlocks sep . zipWith lblock widthsInChars let head' = makeRow " || " headers' rows'' <- mapM (\row -> makeRow rowSeparator <$> mapM blockListToMuse row) rows @@ -192,12 +193,12 @@ blockToMuse (Para inlines) = do return $ contents <> blankline blockToMuse (LineBlock lns) = do lns' <- local (\env -> env { envOneLine = True }) $ mapM inlineListToMuse lns - return $ nowrap $ vcat (map (text "> " <>) lns') <> blankline + return $ nowrap $ vcat (map (literal "> " <>) lns') <> blankline blockToMuse (CodeBlock (_,_,_) str) = - return $ "<example>" $$ text str $$ "</example>" $$ blankline + return $ "<example>" $$ literal str $$ "</example>" $$ blankline blockToMuse (RawBlock (Format format) str) = - return $ blankline $$ "<literal style=\"" <> text format <> "\">" $$ - text str $$ "</literal>" $$ blankline + return $ blankline $$ "<literal style=\"" <> literal format <> "\">" $$ + literal str $$ "</literal>" $$ blankline blockToMuse (BlockQuote blocks) = do contents <- flatBlockListToMuse blocks return $ blankline @@ -212,10 +213,10 @@ blockToMuse (OrderedList (start, style, _) items) = do topLevel <- asks envTopLevel return $ (if topLevel then nest 1 else id) (vcat contents) $$ blankline where orderedListItemToMuse :: PandocMonad m - => String -- ^ marker for list item + => Text -- ^ marker for list item -> [Block] -- ^ list item (list of blocks) -> Muse m (Doc Text) - orderedListItemToMuse marker item = hang (length marker + 1) (text marker <> space) + orderedListItemToMuse marker item = hang (T.length marker + 1) (literal marker <> space) <$> blockListToMuse item blockToMuse (BulletList items) = do contents <- mapM bulletListItemToMuse items @@ -253,10 +254,10 @@ blockToMuse (Header level (ident,_,_) inlines) = do let autoId = uniqueIdent (writerExtensions opts) inlines ids modify $ \st -> st{ stIds = Set.insert autoId ids } - let attr' = if null ident || (isEnabled Ext_auto_identifiers opts && ident == autoId) + let attr' = if T.null ident || (isEnabled Ext_auto_identifiers opts && ident == autoId) then empty - else "#" <> text ident <> cr - let header' = if topLevel then text (replicate level '*') <> space else mempty + else "#" <> literal ident <> cr + let header' = if topLevel then literal (T.replicate level "*") <> space else mempty return $ blankline <> attr' $$ nowrap (header' <> contents) <> blankline -- https://www.gnu.org/software/emacs-muse/manual/muse.html#Horizontal-Rules-and-Anchors blockToMuse HorizontalRule = return $ blankline $$ "----" $$ blankline @@ -297,14 +298,14 @@ noteToMuse :: PandocMonad m -> [Block] -> Muse m (Doc Text) noteToMuse num note = do - res <- hang (length marker) (text marker) <$> + res <- hang (T.length marker) (literal marker) <$> local (\env -> env { envInsideBlock = True , envInlineStart = True , envAfterSpace = True }) (blockListToMuse note) return $ res <> blankline where - marker = "[" ++ show num ++ "] " + marker = "[" <> tshow num <> "] " -- | Return Muse representation of block and accumulated notes. blockToMuseWithNotes :: PandocMonad m @@ -330,30 +331,26 @@ blockToMuseWithNotes blk = do else return b -- | Escape special characters for Muse. -escapeString :: String -> String -escapeString s = - "<verbatim>" ++ - substitute "</verbatim>" "<</verbatim><verbatim>/verbatim>" s ++ +escapeText :: Text -> Text +escapeText s = + "<verbatim>" <> + T.replace "</verbatim>" "<</verbatim><verbatim>/verbatim>" s <> "</verbatim>" -- | Replace newlines with spaces -replaceNewlines :: String -> String -replaceNewlines ('\n':xs) = ' ':replaceNewlines xs -replaceNewlines (x:xs) = x:replaceNewlines xs -replaceNewlines [] = [] - -startsWithMarker :: (Char -> Bool) -> String -> Bool -startsWithMarker f (' ':xs) = startsWithMarker f xs -startsWithMarker f (x:xs) = - f x && (startsWithMarker f xs || startsWithDot xs) +replaceNewlines :: Text -> Text +replaceNewlines = T.map $ \c -> + if c == '\n' then ' ' else c + +startsWithMarker :: (Char -> Bool) -> Text -> Bool +startsWithMarker f t = case T.uncons $ T.dropWhile f' t of + Just ('.', xs) -> T.null xs || isSpace (T.head xs) + _ -> False where - startsWithDot ['.'] = True - startsWithDot ('.':c:_) = isSpace c - startsWithDot _ = False -startsWithMarker _ [] = False + f' c = c == ' ' || f c -containsNotes :: Char -> Char -> String -> Bool -containsNotes left right = p +containsNotes :: Char -> Char -> Text -> Bool +containsNotes left right = p . T.unpack -- This ought to be a parser where p (left':xs) | left' == left = q xs || p xs | otherwise = p xs @@ -370,29 +367,29 @@ containsNotes left right = p s [] = False -- | Return True if string should be escaped with <verbatim> tags -shouldEscapeString :: PandocMonad m - => String +shouldEscapeText :: PandocMonad m + => Text -> Muse m Bool -shouldEscapeString s = do +shouldEscapeText s = do insideLink <- asks envInsideLinkDescription - return $ null s || - any (`elem` ("#*<=|" :: String)) s || - "::" `isInfixOf` s || - "~~" `isInfixOf` s || - "[[" `isInfixOf` s || - ">>>" `isInfixOf` s || - ("]" `isInfixOf` s && insideLink) || + return $ T.null s || + T.any (`elem` ("#*<=|" :: String)) s || + "::" `T.isInfixOf` s || + "~~" `T.isInfixOf` s || + "[[" `T.isInfixOf` s || + ">>>" `T.isInfixOf` s || + ("]" `T.isInfixOf` s && insideLink) || containsNotes '[' ']' s || containsNotes '{' '}' s -- | Escape special characters for Muse if needed. -conditionalEscapeString :: PandocMonad m - => String - -> Muse m String -conditionalEscapeString s = do - shouldEscape <- shouldEscapeString s +conditionalEscapeText :: PandocMonad m + => Text + -> Muse m Text +conditionalEscapeText s = do + shouldEscape <- shouldEscapeText s return $ if shouldEscape - then escapeString s + then escapeText s else s -- Expand Math and Cite before normalizing inline list @@ -425,23 +422,23 @@ normalizeInlineList (Str "" : xs) normalizeInlineList (x : Str "" : xs) = normalizeInlineList (x:xs) normalizeInlineList (Str x1 : Str x2 : xs) - = normalizeInlineList $ Str (x1 ++ x2) : xs + = normalizeInlineList $ Str (x1 <> x2) : xs normalizeInlineList (Emph x1 : Emph x2 : ils) - = normalizeInlineList $ Emph (x1 ++ x2) : ils + = normalizeInlineList $ Emph (x1 <> x2) : ils normalizeInlineList (Strong x1 : Strong x2 : ils) - = normalizeInlineList $ Strong (x1 ++ x2) : ils + = normalizeInlineList $ Strong (x1 <> x2) : ils normalizeInlineList (Strikeout x1 : Strikeout x2 : ils) - = normalizeInlineList $ Strikeout (x1 ++ x2) : ils + = normalizeInlineList $ Strikeout (x1 <> x2) : ils normalizeInlineList (Superscript x1 : Superscript x2 : ils) - = normalizeInlineList $ Superscript (x1 ++ x2) : ils + = normalizeInlineList $ Superscript (x1 <> x2) : ils normalizeInlineList (Subscript x1 : Subscript x2 : ils) - = normalizeInlineList $ Subscript (x1 ++ x2) : ils + = normalizeInlineList $ Subscript (x1 <> x2) : ils normalizeInlineList (SmallCaps x1 : SmallCaps x2 : ils) - = normalizeInlineList $ SmallCaps (x1 ++ x2) : ils + = normalizeInlineList $ SmallCaps (x1 <> x2) : ils normalizeInlineList (Code _ x1 : Code _ x2 : ils) - = normalizeInlineList $ Code nullAttr (x1 ++ x2) : ils + = normalizeInlineList $ Code nullAttr (x1 <> x2) : ils normalizeInlineList (RawInline f1 x1 : RawInline f2 x2 : ils) | f1 == f2 - = normalizeInlineList $ RawInline f1 (x1 ++ x2) : ils + = normalizeInlineList $ RawInline f1 (x1 <> x2) : ils -- Do not join Span's during normalization normalizeInlineList (x:xs) = x : normalizeInlineList xs normalizeInlineList [] = [] @@ -461,33 +458,41 @@ startsWithSpace _ = False endsWithSpace :: [Inline] -> Bool endsWithSpace [Space] = True endsWithSpace [SoftBreak] = True -endsWithSpace [Str s] = stringStartsWithSpace $ reverse s +endsWithSpace [Str s] = stringEndsWithSpace s endsWithSpace (_:xs) = endsWithSpace xs endsWithSpace [] = False -urlEscapeBrackets :: String -> String -urlEscapeBrackets (']':xs) = '%':'5':'D':urlEscapeBrackets xs -urlEscapeBrackets (x:xs) = x:urlEscapeBrackets xs -urlEscapeBrackets [] = [] +urlEscapeBrackets :: Text -> Text +urlEscapeBrackets = T.concatMap $ \c -> case c of + ']' -> "%5D" + _ -> T.singleton c -isHorizontalRule :: String -> Bool -isHorizontalRule s = length s >= 4 && all (== '-') s +isHorizontalRule :: Text -> Bool +isHorizontalRule s = T.length s >= 4 && T.all (== '-') s -stringStartsWithSpace :: String -> Bool -stringStartsWithSpace (x:_) = isSpace x -stringStartsWithSpace "" = False +stringStartsWithSpace :: Text -> Bool +stringStartsWithSpace = maybe False (isSpace . fst) . T.uncons + +stringEndsWithSpace :: Text -> Bool +stringEndsWithSpace = maybe False (isSpace . snd) . T.unsnoc fixOrEscape :: Bool -> Inline -> Bool -fixOrEscape sp (Str "-") = sp -fixOrEscape sp (Str s@('-':x:_)) = (sp && isSpace x) || isHorizontalRule s -fixOrEscape sp (Str ";") = not sp -fixOrEscape sp (Str (';':x:_)) = not sp && isSpace x -fixOrEscape _ (Str ">") = True -fixOrEscape _ (Str ('>':x:_)) = isSpace x -fixOrEscape sp (Str s) = (sp && (startsWithMarker isDigit s || - startsWithMarker isAsciiLower s || - startsWithMarker isAsciiUpper s)) - || stringStartsWithSpace s +fixOrEscape b (Str s) = fixOrEscapeStr b s + where + fixOrEscapeStr sp t = case T.uncons t of + Just ('-', xs) + | T.null xs -> sp + | otherwise -> (sp && isSpace (T.head xs)) || isHorizontalRule t + Just (';', xs) + | T.null xs -> not sp + | otherwise -> not sp && isSpace (T.head xs) + Just ('>', xs) + | T.null xs -> True + | otherwise -> isSpace (T.head xs) + _ -> (sp && (startsWithMarker isDigit s || + startsWithMarker isAsciiLower s || + startsWithMarker isAsciiUpper s)) + || stringStartsWithSpace s fixOrEscape _ Space = True fixOrEscape _ SoftBreak = True fixOrEscape _ _ = False @@ -496,8 +501,8 @@ inlineListStartsWithAlnum :: PandocMonad m => [Inline] -> Muse m Bool inlineListStartsWithAlnum (Str s:_) = do - esc <- shouldEscapeString s - return $ esc || isAlphaNum (head s) + esc <- shouldEscapeText s + return $ esc || isAlphaNum (T.head s) inlineListStartsWithAlnum _ = return False -- | Convert list of Pandoc inline elements to Muse @@ -527,7 +532,7 @@ renderInlineList (x:xs) = do , envNearAsterisks = False }) $ renderInlineList xs if start && fixOrEscape afterSpace x - then pure (text "<verbatim></verbatim>" <> r <> lst') + then pure (literal "<verbatim></verbatim>" <> r <> lst') else pure (r <> lst') -- | Normalize and convert list of Pandoc inline elements to Muse. @@ -551,23 +556,23 @@ inlineListToMuse' lst = do , envAfterSpace = afterSpace || not topLevel }) $ inlineListToMuse lst -emphasis :: PandocMonad m => String -> String -> [Inline] -> Muse m (Doc Text) +emphasis :: PandocMonad m => Text -> Text -> [Inline] -> Muse m (Doc Text) emphasis b e lst = do contents <- local (\env -> env { envInsideAsterisks = inAsterisks }) $ inlineListToMuse lst modify $ \st -> st { stUseTags = useTags } - return $ text b <> contents <> text e - where inAsterisks = last b == '*' || head e == '*' - useTags = last e /= '>' + return $ literal b <> contents <> literal e + where inAsterisks = T.last b == '*' || T.head e == '*' + useTags = T.last e /= '>' -- | Convert Pandoc inline element to Muse. inlineToMuse :: PandocMonad m => Inline -> Muse m (Doc Text) inlineToMuse (Str str) = do - escapedStr <- conditionalEscapeString $ replaceNewlines str - let useTags = isAlphaNum $ last escapedStr -- escapedStr is never empty because empty strings are escaped + escapedStr <- conditionalEscapeText $ replaceNewlines str + let useTags = isAlphaNum $ T.last escapedStr -- escapedStr is never empty because empty strings are escaped modify $ \st -> st { stUseTags = useTags } - return $ text escapedStr + return $ literal escapedStr inlineToMuse (Emph [Strong lst]) = do useTags <- gets stUseTags let lst' = normalizeInlineList lst @@ -625,15 +630,16 @@ inlineToMuse Cite {} = inlineToMuse (Code _ str) = do useTags <- gets stUseTags modify $ \st -> st { stUseTags = False } - return $ if useTags || null str || '=' `elem` str || isSpace (head str) || isSpace (last str) - then "<code>" <> text (substitute "</code>" "<</code><code>/code>" str) <> "</code>" - else "=" <> text str <> "=" + return $ if useTags || T.null str || T.any (== '=') str + || isSpace (T.head str) || isSpace (T.last str) + then "<code>" <> literal (T.replace "</code>" "<</code><code>/code>" str) <> "</code>" + else "=" <> literal str <> "=" inlineToMuse Math{} = throwError $ PandocShouldNeverHappenError "Math should be expanded before normalization" inlineToMuse (RawInline (Format f) str) = do modify $ \st -> st { stUseTags = False } - return $ "<literal style=\"" <> text f <> "\">" <> text str <> "</literal>" + return $ "<literal style=\"" <> literal f <> "\">" <> literal str <> "</literal>" inlineToMuse LineBreak = do oneline <- asks envOneLine modify $ \st -> st { stUseTags = False } @@ -650,27 +656,27 @@ inlineToMuse (Link _ txt (src, _)) = case txt of [Str x] | escapeURI x == src -> do modify $ \st -> st { stUseTags = False } - return $ "[[" <> text (escapeLink x) <> "]]" + return $ "[[" <> literal (escapeLink x) <> "]]" _ -> do contents <- local (\env -> env { envInsideLinkDescription = True }) $ inlineListToMuse txt modify $ \st -> st { stUseTags = False } - return $ "[[" <> text (escapeLink src) <> "][" <> contents <> "]]" - where escapeLink lnk = if isImageUrl lnk then "URL:" ++ urlEscapeBrackets lnk else urlEscapeBrackets lnk + return $ "[[" <> literal (escapeLink src) <> "][" <> contents <> "]]" + where escapeLink lnk = if isImageUrl lnk then "URL:" <> urlEscapeBrackets lnk else urlEscapeBrackets lnk -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"] - isImageUrl = (`elem` imageExtensions) . takeExtension -inlineToMuse (Image attr alt (source,'f':'i':'g':':':title)) = + isImageUrl = (`elem` imageExtensions) . takeExtension . T.unpack +inlineToMuse (Image attr alt (source,T.stripPrefix "fig:" -> Just title)) = inlineToMuse (Image attr alt (source,title)) inlineToMuse (Image attr@(_, classes, _) inlines (source, title)) = do opts <- asks envOptions alt <- local (\env -> env { envInsideLinkDescription = True }) $ inlineListToMuse inlines - title' <- if null title + title' <- if T.null title then if null inlines then return "" else return $ "[" <> alt <> "]" - else do s <- local (\env -> env { envInsideLinkDescription = True }) $ conditionalEscapeString title - return $ "[" <> text s <> "]" + else do s <- local (\env -> env { envInsideLinkDescription = True }) $ conditionalEscapeText title + return $ "[" <> literal s <> "]" let width = case dimension Width attr of - Just (Percent x) | isEnabled Ext_amuse opts -> " " ++ show (round x :: Integer) + Just (Percent x) | isEnabled Ext_amuse opts -> " " <> tshow (round x :: Integer) _ -> "" let leftalign = if "align-left" `elem` classes then " l" @@ -679,7 +685,7 @@ inlineToMuse (Image attr@(_, classes, _) inlines (source, title)) = do then " r" else "" modify $ \st -> st { stUseTags = False } - return $ "[[" <> text (urlEscapeBrackets source ++ width ++ leftalign ++ rightalign) <> "]" <> title' <> "]" + return $ "[[" <> literal (urlEscapeBrackets source <> width <> leftalign <> rightalign) <> "]" <> title' <> "]" inlineToMuse (Note contents) = do -- add to notes in state notes <- gets stNotes @@ -687,19 +693,19 @@ inlineToMuse (Note contents) = do , stUseTags = False } n <- gets stNoteNum - let ref = show $ n + length notes - return $ "[" <> text ref <> "]" + let ref = tshow $ n + length notes + return $ "[" <> literal ref <> "]" inlineToMuse (Span (anchor,names,kvs) inlines) = do contents <- inlineListToMuse inlines let (contents', hasDir) = case lookup "dir" kvs of Just "rtl" -> ("<<<" <> contents <> ">>>", True) Just "ltr" -> (">>>" <> contents <> "<<<", True) _ -> (contents, False) - let anchorDoc = if null anchor + let anchorDoc = if T.null anchor then mempty - else text ('#':anchor) <> space + else literal ("#" <> anchor) <> space modify $ \st -> st { stUseTags = False } - return $ anchorDoc <> (if null inlines && not (null anchor) + return $ anchorDoc <> (if null inlines && not (T.null anchor) then mempty else (if null names then (if hasDir then contents' else "<class>" <> contents' <> "</class>") - else "<class name=\"" <> text (head names) <> "\">" <> contents' <> "</class>")) + else "<class name=\"" <> literal (head names) <> "\">" <> contents' <> "</class>")) diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 3d8bfbca7..a5ea4b641 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -1,5 +1,6 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.ODT Copyright : Copyright (C) 2008-2019 John MacFarlane @@ -18,9 +19,9 @@ import Control.Monad.Except (catchError) import Control.Monad.State.Strict import qualified Data.ByteString.Lazy as B import Data.Generics (everywhere', mkT) -import Data.List (isPrefixOf, intercalate) -import Data.Maybe (fromMaybe) +import Data.List (isPrefixOf) import qualified Data.Map as Map +import qualified Data.Text as T import qualified Data.Text.Lazy as TL import Data.Time import System.FilePath (takeDirectory, takeExtension, (<.>)) @@ -33,7 +34,7 @@ import Text.Pandoc.Logging import Text.Pandoc.MIME (extensionFromMimeType, getMimeType) import Text.Pandoc.Options (WrapOption (..), WriterOptions (..)) import Text.DocLayout -import Text.Pandoc.Shared (stringify, pandocVersion) +import Text.Pandoc.Shared (stringify, pandocVersion, tshow) import Text.Pandoc.Writers.Shared (lookupMetaString, lookupMetaBlocks, fixDisplayMath) import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toStringLazy) @@ -89,7 +90,7 @@ pandocToODT opts doc@(Pandoc meta _) = do Nothing -> empty Just m -> selfClosingTag "manifest:file-entry" [("manifest:media-type", m) - ,("manifest:full-path", fp) + ,("manifest:full-path", T.pack fp) ,("manifest:version", "1.2") ] let files = [ ent | ent <- filesInArchive archive, @@ -114,7 +115,7 @@ pandocToODT opts doc@(Pandoc meta _) = do let userDefinedMetaFields = [k | k <- Map.keys (unMeta meta) , k `notElem` ["title", "lang", "author" , "description", "subject", "keywords"]] - let escapedText = text . escapeStringForXML + let escapedText = text . T.unpack . escapeStringForXML let keywords = case lookupMeta "keywords" meta of Just (MetaList xs) -> map stringify xs _ -> [] @@ -136,17 +137,17 @@ pandocToODT opts doc@(Pandoc meta _) = do ,("xmlns:ooo","http://openoffice.org/2004/office") ,("xmlns:grddl","http://www.w3.org/2003/g/data-view#") ,("office:version","1.2")] ( inTags True "office:meta" [] $ - ( metaTag "meta:generator" ("Pandoc/" ++ pandocVersion) + ( metaTag "meta:generator" ("Pandoc/" <> pandocVersion) $$ metaTag "dc:title" (stringify title) $$ metaTag "dc:description" - (intercalate "\n" (map stringify $ + (T.intercalate "\n" (map stringify $ lookupMetaBlocks "description" meta)) $$ metaTag "dc:subject" (lookupMetaString "subject" meta) $$ - metaTag "meta:keyword" (intercalate ", " keywords) + metaTag "meta:keyword" (T.intercalate ", " keywords) $$ case lang of Just l -> metaTag "dc:language" (renderLang l) @@ -156,8 +157,8 @@ pandocToODT opts doc@(Pandoc meta _) = do $$ metaTag "dc:creator" a $$ metaTag "meta:creation-date" d $$ metaTag "dc:date" d - ) (formatTime defaultTimeLocale "%FT%XZ" utctime) - (intercalate "; " (map stringify authors)) + ) (T.pack $ formatTime defaultTimeLocale "%FT%XZ" utctime) + (T.intercalate "; " (map stringify authors)) $$ vcat userDefinedMeta ) @@ -190,9 +191,9 @@ updateStyleWithLang (Just lang) arch = do addLang :: Lang -> Element -> Element addLang lang = everywhere' (mkT updateLangAttr) where updateLangAttr (Attr n@(QName "language" _ (Just "fo")) _) - = Attr n (langLanguage lang) + = Attr n (T.unpack $ langLanguage lang) updateLangAttr (Attr n@(QName "country" _ (Just "fo")) _) - = Attr n (langRegion lang) + = Attr n (T.unpack $ langRegion lang) updateLangAttr x = x -- | transform both Image and Math elements @@ -206,12 +207,12 @@ transformPicMath opts (Image attr@(id', cls, _) lab (src,t)) = catchError return (100, 100) let dims = case (getDim Width, getDim Height) of - (Just w, Just h) -> [("width", show w), ("height", show h)] - (Just w@(Percent _), Nothing) -> [("rel-width", show w),("rel-height", "scale"),("width", show ptX ++ "pt"),("height", show ptY ++ "pt")] - (Nothing, Just h@(Percent _)) -> [("rel-width", "scale"),("rel-height", show h),("width", show ptX ++ "pt"),("height", show ptY ++ "pt")] - (Just w@(Inch i), Nothing) -> [("width", show w), ("height", show (i / ratio) ++ "in")] - (Nothing, Just h@(Inch i)) -> [("width", show (i * ratio) ++ "in"), ("height", show h)] - _ -> [("width", show ptX ++ "pt"), ("height", show ptY ++ "pt")] + (Just w, Just h) -> [("width", tshow w), ("height", tshow h)] + (Just w@(Percent _), Nothing) -> [("rel-width", tshow w),("rel-height", "scale"),("width", tshow ptX <> "pt"),("height", tshow ptY <> "pt")] + (Nothing, Just h@(Percent _)) -> [("rel-width", "scale"),("rel-height", tshow h),("width", tshow ptX <> "pt"),("height", tshow ptY <> "pt")] + (Just w@(Inch i), Nothing) -> [("width", tshow w), ("height", tshow (i / ratio) <> "in")] + (Nothing, Just h@(Inch i)) -> [("width", tshow (i * ratio) <> "in"), ("height", tshow h)] + _ -> [("width", tshow ptX <> "pt"), ("height", tshow ptY <> "pt")] where ratio = ptX / ptY getDim dir = case dimension dir attr of @@ -220,16 +221,16 @@ transformPicMath opts (Image attr@(id', cls, _) lab (src,t)) = catchError Nothing -> Nothing let newattr = (id', cls, dims) entries <- gets stEntries - let extension = fromMaybe (takeExtension $ takeWhile (/='?') src) + let extension = maybe (takeExtension $ takeWhile (/='?') $ T.unpack src) T.unpack (mbMimeType >>= extensionFromMimeType) let newsrc = "Pictures/" ++ show (length entries) <.> extension let toLazy = B.fromChunks . (:[]) epochtime <- floor `fmap` lift P.getPOSIXTime let entry = toEntry newsrc epochtime $ toLazy img modify $ \st -> st{ stEntries = entry : entries } - return $ Image newattr lab (newsrc, t)) + return $ Image newattr lab (T.pack newsrc, t)) (\e -> do - report $ CouldNotFetchResource src (show e) + report $ CouldNotFetchResource src $ T.pack (show e) return $ Emph lab) transformPicMath _ (Math t math) = do @@ -257,7 +258,7 @@ transformPicMath _ (Math t math) = do ,("text:anchor-type","paragraph")] else [("draw:style-name","fr1") ,("text:anchor-type","as-char")]) $ - selfClosingTag "draw:object" [("xlink:href", dirname) + selfClosingTag "draw:object" [("xlink:href", T.pack dirname) , ("xlink:type", "simple") , ("xlink:show", "embed") , ("xlink:actuate", "onLoad")] diff --git a/src/Text/Pandoc/Writers/OOXML.hs b/src/Text/Pandoc/Writers/OOXML.hs index 97ff86156..3f1d9701c 100644 --- a/src/Text/Pandoc/Writers/OOXML.hs +++ b/src/Text/Pandoc/Writers/OOXML.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.OOXML Copyright : Copyright (C) 2012-2019 John MacFarlane @@ -11,6 +12,7 @@ Functions common to OOXML writers (Docx and Powerpoint) -} module Text.Pandoc.Writers.OOXML ( mknode + , mktnode , nodename , toLazy , renderXml @@ -31,6 +33,7 @@ import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BL8 import Data.Maybe (mapMaybe) +import qualified Data.Text as T import Text.Pandoc.Class (PandocMonad) import qualified Text.Pandoc.UTF8 as UTF8 import Text.XML.Light as XML @@ -39,6 +42,9 @@ mknode :: Node t => String -> [(String,String)] -> t -> Element mknode s attrs = add_attrs (map (\(k,v) -> Attr (nodename k) v) attrs) . node (nodename s) +mktnode :: String -> [(String,String)] -> T.Text -> Element +mktnode s attrs = mknode s attrs . T.unpack + nodename :: String -> QName nodename s = QName{ qName = name, qURI = Nothing, qPrefix = prefix } where (name, prefix) = case break (==':') s of @@ -57,10 +63,10 @@ parseXml refArchive distArchive relpath = case findEntryByPath relpath refArchive `mplus` findEntryByPath relpath distArchive of Nothing -> throwError $ PandocSomeError $ - relpath ++ " missing in reference file" + T.pack relpath <> " missing in reference file" Just e -> case parseXMLDoc . UTF8.toStringLazy . fromEntry $ e of Nothing -> throwError $ PandocSomeError $ - relpath ++ " corrupt in reference file" + T.pack relpath <> " corrupt in reference file" Just d -> return d -- Copied from Util diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs index cf6f9a037..3f5c0d341 100644 --- a/src/Text/Pandoc/Writers/OPML.hs +++ b/src/Text/Pandoc/Writers/OPML.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE CPP #-} {- | Module : Text.Pandoc.Writers.OPML Copyright : Copyright (C) 2013-2019 John MacFarlane @@ -56,12 +57,12 @@ writeHtmlInlines ils = T.strip <$> writeHtml5String def (Pandoc nullMeta [Plain ils]) -- date format: RFC 822: Thu, 14 Jul 2005 23:41:05 GMT -showDateTimeRFC822 :: UTCTime -> String -showDateTimeRFC822 = formatTime defaultTimeLocale "%a, %d %b %Y %X %Z" +showDateTimeRFC822 :: UTCTime -> Text +showDateTimeRFC822 = T.pack . formatTime defaultTimeLocale "%a, %d %b %Y %X %Z" -convertDate :: [Inline] -> String +convertDate :: [Inline] -> Text convertDate ils = maybe "" showDateTimeRFC822 $ - parseTimeM True defaultTimeLocale "%F" =<< normalizeDate (stringify ils) + parseTimeM True defaultTimeLocale "%F" . T.unpack =<< normalizeDate (stringify ils) -- | Convert a Block to OPML. blockToOPML :: PandocMonad m => WriterOptions -> Block -> m (Doc Text) @@ -73,8 +74,8 @@ blockToOPML opts (Div (_,"section":_,_) (Header _ _ title : xs)) = do md <- if null blocks then return mempty else writeMarkdown def $ Pandoc nullMeta blocks - let attrs = ("text", T.unpack htmlIls) : - [("_note", T.unpack $ T.stripEnd md) | not (null blocks)] + let attrs = ("text", htmlIls) : + [("_note", T.stripEnd md) | not (null blocks)] rest' <- vcat <$> mapM (blockToOPML opts) rest return $ inTags True "outline" attrs rest' blockToOPML _ _ = return empty diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 9c6867797..58d4698a8 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -2,6 +2,7 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.OpenDocument Copyright : Copyright (C) 2008-2019 Andrea Rossato and John MacFarlane @@ -24,6 +25,7 @@ import Data.Maybe (fromMaybe) import Data.Ord (comparing) import qualified Data.Set as Set import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.BCP47 (Lang (..), parseBCP47) import Text.Pandoc.Class (PandocMonad, report, translateTerm, setTranslations, toLang) @@ -31,7 +33,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Logging import Text.Pandoc.Options import Text.DocLayout -import Text.Pandoc.Shared (linesToPara) +import Text.Pandoc.Shared (linesToPara, tshow) import Text.Pandoc.Templates (renderTemplate) import qualified Text.Pandoc.Translations as Term (Term(Figure, Table)) import Text.Pandoc.Writers.Math @@ -56,7 +58,7 @@ data WriterState = , stParaStyles :: [Doc Text] , stListStyles :: [(Int, [Doc Text])] , stTextStyles :: Map.Map (Set.Set TextStyle) - (String, Doc Text) + (Text, Doc Text) , stTextStyleAttr :: Set.Set TextStyle , stIndentPara :: Int , stInDefinition :: Bool @@ -97,7 +99,7 @@ addParaStyle :: PandocMonad m => Doc Text -> OD m () addParaStyle i = modify $ \s -> s { stParaStyles = i : stParaStyles s } addTextStyle :: PandocMonad m - => Set.Set TextStyle -> (String, Doc Text) -> OD m () + => Set.Set TextStyle -> (Text, Doc Text) -> OD m () addTextStyle attrs i = modify $ \s -> s { stTextStyles = Map.insert attrs i (stTextStyles s) } @@ -130,10 +132,10 @@ inParagraphTags d = do else return [("text:style-name", "Text_20_body")] return $ inTags False "text:p" a d -inParagraphTagsWithStyle :: String -> Doc Text -> Doc Text +inParagraphTagsWithStyle :: Text -> Doc Text -> Doc Text inParagraphTagsWithStyle sty = inTags False "text:p" [("text:style-name", sty)] -inSpanTags :: String -> Doc Text -> Doc Text +inSpanTags :: Text -> Doc Text -> Doc Text inSpanTags s = inTags False "text:span" [("text:style-name",s)] withTextStyle :: PandocMonad m => TextStyle -> OD m a -> OD m a @@ -155,7 +157,7 @@ inTextStyle d = do Just (styleName, _) -> return $ inTags False "text:span" [("text:style-name",styleName)] d Nothing -> do - let styleName = "T" ++ show (Map.size styles + 1) + let styleName = "T" <> tshow (Map.size styles + 1) addTextStyle at (styleName, inTags False "style:style" [("style:name", styleName) @@ -184,11 +186,11 @@ formulaStyle mt = inTags False "style:style" ,("style:horizontal-rel", "paragraph-content") ,("style:wrap", "none")] -inHeaderTags :: PandocMonad m => Int -> String -> Doc Text -> OD m (Doc Text) +inHeaderTags :: PandocMonad m => Int -> Text -> Doc Text -> OD m (Doc Text) inHeaderTags i ident d = - return $ inTags False "text:h" [ ("text:style-name", "Heading_20_" ++ show i) - , ("text:outline-level", show i)] - $ if null ident + return $ inTags False "text:h" [ ("text:style-name", "Heading_20_" <> tshow i) + , ("text:outline-level", tshow i)] + $ if T.null ident then d else selfClosingTag "text:bookmark-start" [ ("text:name", ident) ] <> d <> @@ -198,18 +200,19 @@ inQuotes :: QuoteType -> Doc Text -> Doc Text inQuotes SingleQuote s = char '\8216' <> s <> char '\8217' inQuotes DoubleQuote s = char '\8220' <> s <> char '\8221' -handleSpaces :: String -> Doc Text -handleSpaces s - | ( ' ':_) <- s = genTag s - | ('\t':x) <- s = selfClosingTag "text:tab" [] <> rm x - | otherwise = rm s - where - genTag = span (==' ') >>> tag . length *** rm >>> uncurry (<>) - tag n = when (n /= 0) $ selfClosingTag "text:s" [("text:c", show n)] - rm ( ' ':xs) = char ' ' <> genTag xs - rm ('\t':xs) = selfClosingTag "text:tab" [] <> genTag xs - rm ( x:xs) = char x <> rm xs - rm [] = empty +handleSpaces :: Text -> Doc Text +handleSpaces s = case T.uncons s of + Just (' ', _) -> genTag s + Just ('\t',x) -> selfClosingTag "text:tab" [] <> rm x + _ -> rm s + where + genTag = T.span (==' ') >>> tag . T.length *** rm >>> uncurry (<>) + tag n = when (n /= 0) $ selfClosingTag "text:s" [("text:c", tshow n)] + rm t = case T.uncons t of + Just ( ' ',xs) -> char ' ' <> genTag xs + Just ('\t',xs) -> selfClosingTag "text:tab" [] <> genTag xs + Just ( x,xs) -> char x <> rm xs + Nothing -> empty -- | Convert Pandoc document to string in OpenDocument format. writeOpenDocument :: PandocMonad m => WriterOptions -> Pandoc -> m Text @@ -234,7 +237,7 @@ writeOpenDocument opts (Pandoc meta blocks) = do map snd (sortBy (flip (comparing fst)) ( Map.elems (stTextStyles s))) listStyle (n,l) = inTags True "text:list-style" - [("style:name", "L" ++ show n)] (vcat l) + [("style:name", "L" <> tshow n)] (vcat l) let listStyles = map listStyle (stListStyles s) let automaticStyles = vcat $ reverse $ styles ++ listStyles let context = defField "body" body @@ -247,17 +250,17 @@ writeOpenDocument opts (Pandoc meta blocks) = do Just tpl -> renderTemplate tpl context withParagraphStyle :: PandocMonad m - => WriterOptions -> String -> [Block] -> OD m (Doc Text) + => WriterOptions -> Text -> [Block] -> OD m (Doc Text) withParagraphStyle o s (b:bs) | Para l <- b = go =<< inParagraphTagsWithStyle s <$> inlinesToOpenDocument o l | otherwise = go =<< blockToOpenDocument o b where go i = (<>) i <$> withParagraphStyle o s bs withParagraphStyle _ _ [] = return empty -inPreformattedTags :: PandocMonad m => String -> OD m (Doc Text) +inPreformattedTags :: PandocMonad m => Text -> OD m (Doc Text) inPreformattedTags s = do n <- paraStyle [("style:parent-style-name","Preformatted_20_Text")] - return . inParagraphTagsWithStyle ("P" ++ show n) . handleSpaces $ s + return . inParagraphTagsWithStyle ("P" <> tshow n) . handleSpaces $ s orderedListToOpenDocument :: PandocMonad m => WriterOptions -> Int -> [[Block]] -> OD m (Doc Text) @@ -269,7 +272,7 @@ orderedItemToOpenDocument :: PandocMonad m => WriterOptions -> Int -> [Block] -> OD m (Doc Text) orderedItemToOpenDocument o n bs = vcat <$> mapM go bs where go (OrderedList a l) = newLevel a l - go (Para l) = inParagraphTagsWithStyle ("P" ++ show n) <$> + go (Para l) = inParagraphTagsWithStyle ("P" <> tshow n) <$> inlinesToOpenDocument o l go b = blockToOpenDocument o b newLevel a l = do @@ -300,11 +303,11 @@ bulletListToOpenDocument o b = do ln <- (+) 1 . length <$> gets stListStyles (pn,ns) <- if isTightList b then inTightList (bulletListStyle ln) else bulletListStyle ln modify $ \s -> s { stListStyles = ns : stListStyles s } - is <- listItemsToOpenDocument ("P" ++ show pn) o b - return $ inTags True "text:list" [("text:style-name", "L" ++ show ln)] is + is <- listItemsToOpenDocument ("P" <> tshow pn) o b + return $ inTags True "text:list" [("text:style-name", "L" <> tshow ln)] is listItemsToOpenDocument :: PandocMonad m - => String -> WriterOptions -> [[Block]] -> OD m (Doc Text) + => Text -> WriterOptions -> [[Block]] -> OD m (Doc Text) listItemsToOpenDocument s o is = vcat . map (inTagsIndented "text:list-item") <$> mapM (withParagraphStyle o s . map plainToPara) is @@ -326,7 +329,7 @@ inBlockQuote o i (b:bs) ni <- paraStyle [("style:parent-style-name","Quotations")] go =<< inBlockQuote o ni (map plainToPara l) - | Para l <- b = go =<< inParagraphTagsWithStyle ("P" ++ show i) <$> inlinesToOpenDocument o l + | Para l <- b = go =<< inParagraphTagsWithStyle ("P" <> tshow i) <$> inlinesToOpenDocument o l | otherwise = go =<< blockToOpenDocument o b where go block = ($$) block <$> inBlockQuote o i bs inBlockQuote _ _ [] = resetIndent >> return empty @@ -341,7 +344,7 @@ blockToOpenDocument o bs | Plain b <- bs = if null b then return empty else inParagraphTags =<< inlinesToOpenDocument o b - | Para [Image attr c (s,'f':'i':'g':':':t)] <- bs + | Para [Image attr c (s,T.stripPrefix "fig:" -> Just t)] <- bs = figure attr c s t | Para b <- bs = if null b && not (isEnabled Ext_empty_paragraphs o) @@ -362,7 +365,7 @@ blockToOpenDocument o bs | HorizontalRule <- bs = setFirstPara >> return (selfClosingTag "text:p" [ ("text:style-name", "Horizontal_20_Line") ]) | RawBlock f s <- bs = if f == Format "opendocument" - then return $ text s + then return $ text $ T.unpack s else do report $ BlockNotRendered bs return empty @@ -373,21 +376,21 @@ blockToOpenDocument o bs r <- vcat <$> mapM (deflistItemToOpenDocument o) b setInDefinitionList False return r - preformatted s = (flush . vcat) <$> mapM (inPreformattedTags . escapeStringForXML) (lines s) + preformatted s = (flush . vcat) <$> mapM (inPreformattedTags . escapeStringForXML) (T.lines s) mkBlockQuote b = do increaseIndent i <- paraStyle [("style:parent-style-name","Quotations")] inBlockQuote o i (map plainToPara b) orderedList a b = do (ln,pn) <- newOrderedListStyle (isTightList b) a - inTags True "text:list" [ ("text:style-name", "L" ++ show ln)] + inTags True "text:list" [ ("text:style-name", "L" <> tshow ln)] <$> orderedListToOpenDocument o pn b table c a w h r = do tn <- length <$> gets stTableStyles pn <- length <$> gets stParaStyles let genIds = map chr [65..] - name = "Table" ++ show (tn + 1) + name = "Table" <> tshow (tn + 1) columnIds = zip genIds w - mkColumn n = selfClosingTag "table:table-column" [("table:style-name", name ++ "." ++ [fst n])] + mkColumn n = selfClosingTag "table:table-column" [("table:style-name", name <> "." <> T.singleton (fst n))] columns = map mkColumn columnIds paraHStyles = paraTableStyles "Heading" pn a paraStyles = paraTableStyles "Contents" (pn + length (newPara paraHStyles)) a @@ -434,36 +437,36 @@ numberedFigureCaption caption = do capterm <- translateTerm Term.Figure return $ numberedCaption "FigureCaption" capterm "Illustration" id' caption -numberedCaption :: String -> String -> String -> Int -> Doc Text -> Doc Text +numberedCaption :: Text -> Text -> Text -> Int -> Doc Text -> Doc Text numberedCaption style term name num caption = - let t = text term + let t = text $ T.unpack term r = num - 1 - s = inTags False "text:sequence" [ ("text:ref-name", "ref" ++ name ++ show r), + s = inTags False "text:sequence" [ ("text:ref-name", "ref" <> name <> tshow r), ("text:name", name), - ("text:formula", "ooow:" ++ name ++ "+1"), + ("text:formula", "ooow:" <> name <> "+1"), ("style:num-format", "1") ] $ text $ show num c = text ": " in inParagraphTagsWithStyle style $ hcat [ t, text " ", s, c, caption ] -unNumberedCaption :: Monad m => String -> Doc Text -> OD m (Doc Text) +unNumberedCaption :: Monad m => Text -> Doc Text -> OD m (Doc Text) unNumberedCaption style caption = return $ inParagraphTagsWithStyle style caption colHeadsToOpenDocument :: PandocMonad m - => WriterOptions -> [String] -> [[Block]] + => WriterOptions -> [Text] -> [[Block]] -> OD m (Doc Text) colHeadsToOpenDocument o ns hs = inTagsIndented "table:table-header-rows" . inTagsIndented "table:table-row" . vcat <$> mapM (tableItemToOpenDocument o "TableHeaderRowCell") (zip ns hs) tableRowToOpenDocument :: PandocMonad m - => WriterOptions -> [String] -> [[Block]] + => WriterOptions -> [Text] -> [[Block]] -> OD m (Doc Text) tableRowToOpenDocument o ns cs = inTagsIndented "table:table-row" . vcat <$> mapM (tableItemToOpenDocument o "TableRowCell") (zip ns cs) tableItemToOpenDocument :: PandocMonad m - => WriterOptions -> String -> (String,[Block]) + => WriterOptions -> Text -> (Text,[Block]) -> OD m (Doc Text) tableItemToOpenDocument o s (n,i) = let a = [ ("table:style-name" , s ) @@ -520,7 +523,7 @@ inlineToOpenDocument o ils inlinesToOpenDocument o Cite _ l -> inlinesToOpenDocument o l RawInline f s -> if f == Format "opendocument" - then return $ text s + then return $ text $ T.unpack s else do report $ InlineNotRendered ils return empty @@ -544,7 +547,7 @@ inlineToOpenDocument o ils getDims (("rel-height", w):xs) = ("style:rel-height", w) : getDims xs getDims (_:xs) = getDims xs return $ inTags False "draw:frame" - (("draw:name", "img" ++ show id') : getDims kvs) $ + (("draw:name", "img" <> tshow id') : getDims kvs) $ selfClosingTag "draw:image" [ ("xlink:href" , s ) , ("xlink:type" , "simple") , ("xlink:show" , "embed" ) @@ -552,7 +555,7 @@ inlineToOpenDocument o ils mkNote l = do n <- length <$> gets stNotes let footNote t = inTags False "text:note" - [ ("text:id" , "ftn" ++ show n) + [ ("text:id" , "ftn" <> tshow n) , ("text:note-class", "footnote" )] $ inTagsSimple "text:note-citation" (text . show $ n + 1) <> inTagsSimple "text:note-body" t @@ -563,10 +566,10 @@ inlineToOpenDocument o ils bulletListStyle :: PandocMonad m => Int -> OD m (Int,(Int,[Doc Text])) bulletListStyle l = do let doStyles i = inTags True "text:list-level-style-bullet" - [ ("text:level" , show (i + 1) ) - , ("text:style-name" , "Bullet_20_Symbols") - , ("style:num-suffix", "." ) - , ("text:bullet-char", [bulletList !! i] ) + [ ("text:level" , tshow (i + 1)) + , ("text:style-name" , "Bullet_20_Symbols" ) + , ("style:num-suffix", "." ) + , ("text:bullet-char", T.singleton (bulletList !! i)) ] (listLevelStyle (1 + i)) bulletList = map chr $ cycle [8226,9702,9642] listElStyle = map doStyles [0..9] @@ -587,16 +590,16 @@ orderedListLevelStyle (s,n, d) (l,ls) = LowerRoman -> "i" _ -> "1" listStyle = inTags True "text:list-level-style-number" - ([ ("text:level" , show $ 1 + length ls ) + ([ ("text:level" , tshow $ 1 + length ls ) , ("text:style-name" , "Numbering_20_Symbols") , ("style:num-format", format ) - , ("text:start-value", show s ) + , ("text:start-value", tshow s ) ] ++ suffix) (listLevelStyle (1 + length ls)) in (l, ls ++ [listStyle]) listLevelStyle :: Int -> Doc Text listLevelStyle i = - let indent = show (0.25 + (0.25 * fromIntegral i :: Double)) in + let indent = tshow (0.25 + (0.25 * fromIntegral i :: Double)) in inTags True "style:list-level-properties" [ ("text:list-level-position-and-space-mode", "label-alignment") @@ -604,27 +607,27 @@ listLevelStyle i = ] $ selfClosingTag "style:list-level-label-alignment" [ ("text:label-followed-by", "listtab") - , ("text:list-tab-stop-position", indent ++ "in") + , ("text:list-tab-stop-position", indent <> "in") , ("fo:text-indent", "-0.25in") - , ("fo:margin-left", indent ++ "in") + , ("fo:margin-left", indent <> "in") ] tableStyle :: Int -> [(Char,Double)] -> Doc Text tableStyle num wcs = - let tableId = "Table" ++ show (num + 1) + let tableId = "Table" <> tshow (num + 1) table = inTags True "style:style" [("style:name", tableId) ,("style:family", "table")] $ selfClosingTag "style:table-properties" [("table:align" , "center")] colStyle (c,0) = selfClosingTag "style:style" - [ ("style:name" , tableId ++ "." ++ [c]) + [ ("style:name" , tableId <> "." <> T.singleton c) , ("style:family", "table-column" )] colStyle (c,w) = inTags True "style:style" - [ ("style:name" , tableId ++ "." ++ [c]) + [ ("style:name" , tableId <> "." <> T.singleton c) , ("style:family", "table-column" )] $ selfClosingTag "style:table-column-properties" - [("style:rel-column-width", printf "%d*" (floor $ w * 65535 :: Integer))] + [("style:rel-column-width", T.pack $ printf "%d*" (floor $ w * 65535 :: Integer))] headerRowCellStyle = inTags True "style:style" [ ("style:name" , "TableHeaderRowCell") , ("style:family", "table-cell" )] $ @@ -641,15 +644,15 @@ tableStyle num wcs = columnStyles = map colStyle wcs in cellStyles $$ table $$ vcat columnStyles -paraStyle :: PandocMonad m => [(String,String)] -> OD m Int +paraStyle :: PandocMonad m => [(Text,Text)] -> OD m Int paraStyle attrs = do pn <- (+) 1 . length <$> gets stParaStyles i <- (*) (0.5 :: Double) . fromIntegral <$> gets stIndentPara b <- gets stInDefinition t <- gets stTight - let styleAttr = [ ("style:name" , "P" ++ show pn) + let styleAttr = [ ("style:name" , "P" <> tshow pn) , ("style:family" , "paragraph" )] - indentVal = flip (++) "in" . show $ if b then max 0.5 i else i + indentVal = flip (<>) "in" . tshow $ if b then max 0.5 i else i tight = if t then [ ("fo:margin-top" , "0in" ) , ("fo:margin-bottom" , "0in" )] else [] @@ -659,30 +662,30 @@ paraStyle attrs = do , ("fo:text-indent" , "0in" ) , ("style:auto-text-indent" , "false" )] else [] - attributes = indent ++ tight + attributes = indent <> tight paraProps = if null attributes then mempty else selfClosingTag "style:paragraph-properties" attributes - addParaStyle $ inTags True "style:style" (styleAttr ++ attrs) paraProps + addParaStyle $ inTags True "style:style" (styleAttr <> attrs) paraProps return pn paraListStyle :: PandocMonad m => Int -> OD m Int paraListStyle l = paraStyle [("style:parent-style-name","Text_20_body") - ,("style:list-style-name", "L" ++ show l )] + ,("style:list-style-name", "L" <> tshow l)] -paraTableStyles :: String -> Int -> [Alignment] -> [(String, Doc Text)] +paraTableStyles :: Text -> Int -> [Alignment] -> [(Text, Doc Text)] paraTableStyles _ _ [] = [] paraTableStyles t s (a:xs) | AlignRight <- a = ( pName s, res s "end" ) : paraTableStyles t (s + 1) xs | AlignCenter <- a = ( pName s, res s "center") : paraTableStyles t (s + 1) xs - | otherwise = ("Table_20_" ++ t, empty ) : paraTableStyles t s xs - where pName sn = "P" ++ show (sn + 1) + | otherwise = ("Table_20_" <> t, empty ) : paraTableStyles t s xs + where pName sn = "P" <> tshow (sn + 1) res sn x = inTags True "style:style" [ ("style:name" , pName sn ) , ("style:family" , "paragraph" ) - , ("style:parent-style-name", "Table_20_" ++ t)] $ + , ("style:parent-style-name", "Table_20_" <> t)] $ selfClosingTag "style:paragraph-properties" [ ("fo:text-align", x) , ("style:justify-single-word", "false")] @@ -697,9 +700,9 @@ data TextStyle = Italic | Language Lang deriving ( Eq,Ord ) -textStyleAttr :: Map.Map String String +textStyleAttr :: Map.Map Text Text -> TextStyle - -> Map.Map String String + -> Map.Map Text Text textStyleAttr m s | Italic <- s = Map.insert "fo:font-style" "italic" . Map.insert "style:font-style-asian" "italic" . diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 3c4f1b237..e21d3f8c2 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.Org - Copyright : © 2010-2015 Puneeth Chaganti <punchagan@gmail.com> + Copyright : © 2010-2015 Puneeth Chaganti <punchagan@gmail.com> 2010-2019 John MacFarlane <jgm@berkeley.edu> 2016-2019 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> License : GNU GPL, version 2 or above @@ -18,9 +18,10 @@ Org-Mode: <http://orgmode.org> module Text.Pandoc.Writers.Org (writeOrg) where import Prelude import Control.Monad.State.Strict -import Data.Char (isAlphaNum, toLower) -import Data.List (intersect, intersperse, isPrefixOf, partition, transpose) +import Data.Char (isAlphaNum) +import Data.List (intersect, intersperse, partition, transpose) import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.Logging @@ -82,7 +83,7 @@ noteToOrg num note = do return $ hang (length marker) (text marker) contents -- | Escape special characters for Org. -escapeString :: String -> String +escapeString :: Text -> Text escapeString = escapeStringUsing $ [ ('\x2014',"---") , ('\x2013',"--") @@ -101,10 +102,10 @@ blockToOrg :: PandocMonad m blockToOrg Null = return empty blockToOrg (Div (_,classes@(cls:_),kvs) bs) | "drawer" `elem` classes = do contents <- blockListToOrg bs - let drawerNameTag = ":" <> text cls <> ":" + let drawerNameTag = ":" <> literal cls <> ":" let keys = vcat $ map (\(k,v) -> - ":" <> text k <> ":" - <> space <> text v) kvs + ":" <> literal k <> ":" + <> space <> literal v) kvs let drawerEndTag = text ":END:" return $ drawerNameTag $$ cr $$ keys $$ blankline $$ contents $$ @@ -115,28 +116,29 @@ blockToOrg (Div (ident, classes, kv) bs) = do -- if one class looks like the name of a greater block then output as such: -- The ID, if present, is added via the #+NAME keyword; other classes and -- key-value pairs are kept as #+ATTR_HTML attributes. - let isGreaterBlockClass = (`elem` ["center", "quote"]) . map toLower + let isGreaterBlockClass = (`elem` ["center", "quote"]) . T.toLower (blockTypeCand, classes') = partition isGreaterBlockClass classes return $ case blockTypeCand of (blockType:classes'') -> blankline $$ attrHtml (ident, classes'' <> classes', kv) $$ - "#+BEGIN_" <> text blockType $$ contents $$ - "#+END_" <> text blockType $$ blankline + "#+BEGIN_" <> literal blockType $$ contents $$ + "#+END_" <> literal blockType $$ blankline _ -> -- fallback with id: add id as an anchor if present, discard classes and -- key-value pairs, unwrap the content. - let contents' = if not (null ident) - then "<<" <> text ident <> ">>" $$ contents + let contents' = if not (T.null ident) + then "<<" <> literal ident <> ">>" $$ contents else contents in blankline $$ contents' $$ blankline blockToOrg (Plain inlines) = inlineListToOrg inlines -- title beginning with fig: indicates that the image is a figure -blockToOrg (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do - capt <- if null txt - then return empty - else ("#+CAPTION: " <>) `fmap` inlineListToOrg txt - img <- inlineToOrg (Image attr txt (src,tit)) - return $ capt $$ img $$ blankline +blockToOrg (Para [Image attr txt (src,tgt)]) + | Just tit <- T.stripPrefix "fig:" tgt = do + capt <- if null txt + then return empty + else ("#+CAPTION: " <>) `fmap` inlineListToOrg txt + img <- inlineToOrg (Image attr txt (src,tit)) + return $ capt $$ img $$ blankline blockToOrg (Para inlines) = do contents <- inlineListToOrg inlines return $ contents <> blankline @@ -153,9 +155,9 @@ blockToOrg (LineBlock lns) = do nest 2 contents $$ "#+END_VERSE" <> blankline blockToOrg (RawBlock "html" str) = return $ blankline $$ "#+BEGIN_HTML" $$ - nest 2 (text str) $$ "#+END_HTML" $$ blankline + nest 2 (literal str) $$ "#+END_HTML" $$ blankline blockToOrg b@(RawBlock f str) - | isRawFormat f = return $ text str + | isRawFormat f = return $ literal str | otherwise = do report $ BlockNotRendered b return empty @@ -168,17 +170,17 @@ blockToOrg (Header level attr inlines) = do else cr <> nest (level + 1) (propertiesDrawer attr) return $ headerStr <> " " <> contents <> drawerStr <> blankline blockToOrg (CodeBlock (_,classes,kvs) str) = do - let startnum = maybe "" (\x -> ' ' : trimr x) $ lookup "startFrom" kvs + let startnum = maybe "" (\x -> " " <> trimr x) $ lookup "startFrom" kvs let numberlines = if "numberLines" `elem` classes then if "continuedSourceBlock" `elem` classes - then " +n" ++ startnum - else " -n" ++ startnum + then " +n" <> startnum + else " -n" <> startnum else "" let at = map pandocLangToOrg classes `intersect` orgLangIdentifiers let (beg, end) = case at of - [] -> ("#+BEGIN_EXAMPLE" ++ numberlines, "#+END_EXAMPLE") - (x:_) -> ("#+BEGIN_SRC " ++ x ++ numberlines, "#+END_SRC") - return $ text beg $$ nest 2 (text str) $$ text end $$ blankline + [] -> ("#+BEGIN_EXAMPLE" <> numberlines, "#+END_EXAMPLE") + (x:_) -> ("#+BEGIN_SRC " <> x <> numberlines, "#+END_SRC") + return $ literal beg $$ nest 2 (literal str) $$ text end $$ blankline blockToOrg (BlockQuote blocks) = do contents <- blockListToOrg blocks return $ blankline $$ "#+BEGIN_QUOTE" $$ @@ -225,9 +227,9 @@ blockToOrg (OrderedList (start, _, delim) items) = do x -> x let markers = take (length items) $ orderedListMarkers (start, Decimal, delim') - let maxMarkerLength = maximum $ map length markers - let markers' = map (\m -> let s = maxMarkerLength - length m - in m ++ replicate s ' ') markers + let maxMarkerLength = maximum $ map T.length markers + let markers' = map (\m -> let s = maxMarkerLength - T.length m + in m <> T.replicate s " ") markers contents <- zipWithM orderedListItemToOrg markers' items -- ensure that sublists have preceding blank line return $ blankline $$ @@ -249,12 +251,12 @@ bulletListItemToOrg items = do -- | Convert ordered list item (a list of blocks) to Org. orderedListItemToOrg :: PandocMonad m - => String -- ^ marker for list item + => Text -- ^ marker for list item -> [Block] -- ^ list item (list of blocks) -> Org m (Doc Text) orderedListItemToOrg marker items = do contents <- blockListToOrg items - return $ hang (length marker + 1) (text marker <> space) contents $$ + return $ hang (T.length marker + 1) (literal marker <> space) contents $$ if endsWithPlain items then cr else blankline @@ -276,25 +278,25 @@ propertiesDrawer (ident, classes, kv) = let drawerStart = text ":PROPERTIES:" drawerEnd = text ":END:" - kv' = if classes == mempty then kv else ("CLASS", unwords classes):kv + kv' = if classes == mempty then kv else ("CLASS", T.unwords classes):kv kv'' = if ident == mempty then kv' else ("CUSTOM_ID", ident):kv' properties = vcat $ map kvToOrgProperty kv'' in drawerStart <> cr <> properties <> cr <> drawerEnd where - kvToOrgProperty :: (String, String) -> Doc Text + kvToOrgProperty :: (Text, Text) -> Doc Text kvToOrgProperty (key, value) = - text ":" <> text key <> text ": " <> text value <> cr + text ":" <> literal key <> text ": " <> literal value <> cr attrHtml :: Attr -> Doc Text attrHtml ("" , [] , []) = mempty attrHtml (ident, classes, kvs) = let - name = if null ident then mempty else "#+NAME: " <> text ident <> cr + name = if T.null ident then mempty else "#+NAME: " <> literal ident <> cr keyword = "#+ATTR_HTML" - classKv = ("class", unwords classes) + classKv = ("class", T.unwords classes) kvStrings = map (\(k,v) -> ":" <> k <> " " <> v) (classKv:kvs) - in name <> keyword <> ": " <> text (unwords kvStrings) <> cr + in name <> keyword <> ": " <> literal (T.unwords kvStrings) <> cr -- | Convert list of Pandoc block elements to Org. blockListToOrg :: PandocMonad m @@ -322,7 +324,7 @@ inlineListToOrg lst = hcat <$> mapM inlineToOrg (fixMarkers lst) -- | Convert Pandoc inline element to Org. inlineToOrg :: PandocMonad m => Inline -> Org m (Doc Text) inlineToOrg (Span (uid, [], []) []) = - return $ "<<" <> text uid <> ">>" + return $ "<<" <> literal uid <> ">>" inlineToOrg (Span _ lst) = inlineListToOrg lst inlineToOrg (Emph lst) = do @@ -348,15 +350,15 @@ inlineToOrg (Quoted DoubleQuote lst) = do contents <- inlineListToOrg lst return $ "\"" <> contents <> "\"" inlineToOrg (Cite _ lst) = inlineListToOrg lst -inlineToOrg (Code _ str) = return $ "=" <> text str <> "=" -inlineToOrg (Str str) = return . text $ escapeString str +inlineToOrg (Code _ str) = return $ "=" <> literal str <> "=" +inlineToOrg (Str str) = return . literal $ escapeString str inlineToOrg (Math t str) = do modify $ \st -> st{ stHasMath = True } return $ if t == InlineMath - then "$" <> text str <> "$" - else "$$" <> text str <> "$$" + then "$" <> literal str <> "$" + else "$$" <> literal str <> "$$" inlineToOrg il@(RawInline f str) - | isRawFormat f = return $ text str + | isRawFormat f = return $ literal str | otherwise = do report $ InlineNotRendered il return empty @@ -371,39 +373,38 @@ inlineToOrg SoftBreak = do inlineToOrg (Link _ txt (src, _)) = case txt of [Str x] | escapeURI x == src -> -- autolink - return $ "[[" <> text (orgPath x) <> "]]" + return $ "[[" <> literal (orgPath x) <> "]]" _ -> do contents <- inlineListToOrg txt - return $ "[[" <> text (orgPath src) <> "][" <> contents <> "]]" + return $ "[[" <> literal (orgPath src) <> "][" <> contents <> "]]" inlineToOrg (Image _ _ (source, _)) = - return $ "[[" <> text (orgPath source) <> "]]" + return $ "[[" <> literal (orgPath source) <> "]]" inlineToOrg (Note contents) = do -- add to notes in state notes <- gets stNotes modify $ \st -> st { stNotes = contents:notes } - let ref = show $ length notes + 1 - return $ "[fn:" <> text ref <> "]" + let ref = tshow $ length notes + 1 + return $ "[fn:" <> literal ref <> "]" -orgPath :: String -> String -orgPath src = - case src of - [] -> mempty -- wiki link - ('#':_) -> src -- internal link - _ | isUrl src -> src - _ | isFilePath src -> src - _ -> "file:" <> src - where - isFilePath :: String -> Bool - isFilePath cs = any (`isPrefixOf` cs) ["/", "./", "../", "file:"] +orgPath :: Text -> Text +orgPath src = case T.uncons src of + Nothing -> "" -- wiki link + Just ('#', _) -> src -- internal link + _ | isUrl src -> src + _ | isFilePath src -> src + _ -> "file:" <> src + where + isFilePath :: Text -> Bool + isFilePath cs = any (`T.isPrefixOf` cs) ["/", "./", "../", "file:"] - isUrl :: String -> Bool - isUrl cs = - let (scheme, path) = break (== ':') cs - in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme - && not (null path) + isUrl :: Text -> Bool + isUrl cs = + let (scheme, path) = T.break (== ':') cs + in T.all (\c -> isAlphaNum c || c `elemText` ".-") scheme + && not (T.null path) -- | Translate from pandoc's programming language identifiers to those used by -- org-mode. -pandocLangToOrg :: String -> String +pandocLangToOrg :: Text -> Text pandocLangToOrg cs = case cs of "c" -> "C" @@ -414,7 +415,7 @@ pandocLangToOrg cs = _ -> cs -- | List of language identifiers recognized by org-mode. -orgLangIdentifiers :: [String] +orgLangIdentifiers :: [Text] orgLangIdentifiers = [ "asymptote", "awk", "C", "C++", "clojure", "css", "d", "ditaa", "dot" , "calc", "emacs-lisp", "fortran", "gnuplot", "haskell", "java", "js" diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs index 58f230a9d..344a5564a 100644 --- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs +++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs @@ -136,7 +136,7 @@ data MediaInfo = MediaInfo { mInfoFilePath :: FilePath , mInfoLocalId :: Int , mInfoGlobalId :: Int , mInfoMimeType :: Maybe MimeType - , mInfoExt :: Maybe String + , mInfoExt :: Maybe T.Text , mInfoCaption :: Bool } deriving (Show, Eq) @@ -159,16 +159,20 @@ runP env st p = evalStateT (runReaderT p env) st -------------------------------------------------------------------- -monospaceFont :: Monad m => P m String +findAttrText :: QName -> Element -> Maybe T.Text +findAttrText n = fmap T.pack . findAttr n + +monospaceFont :: Monad m => P m T.Text monospaceFont = do vars <- writerVariables <$> asks envOpts case lookupContext "monofont" vars of - Just s -> return (T.unpack s) + Just s -> return s Nothing -> return "Courier" +-- Kept as string for XML.Light fontSizeAttributes :: Monad m => RunProps -> P m [(String, String)] fontSizeAttributes RunProps { rPropForceSize = Just sz } = - return [("sz", (show $ sz * 100))] + return [("sz", show $ sz * 100)] fontSizeAttributes _ = return [] copyFileToArchive :: PandocMonad m => Archive -> FilePath -> P m Archive @@ -177,7 +181,8 @@ copyFileToArchive arch fp = do distArchive <- asks envDistArchive case findEntryByPath fp refArchive `mplus` findEntryByPath fp distArchive of Nothing -> throwError $ PandocSomeError - $ fp ++ " missing in reference file" + $ T.pack + $ fp <> " missing in reference file" Just e -> return $ addEntryToArchive e arch alwaysInheritedPatterns :: [Pattern] @@ -196,7 +201,7 @@ alwaysInheritedPatterns = -- We only look for these under special conditions contingentInheritedPatterns :: Presentation -> [Pattern] -contingentInheritedPatterns pres = [] ++ +contingentInheritedPatterns pres = [] <> if presHasSpeakerNotes pres then map compile [ "ppt/notesMasters/notesMaster*.xml" , "ppt/notesMasters/_rels/notesMaster*.xml.rels" @@ -207,7 +212,7 @@ contingentInheritedPatterns pres = [] ++ inheritedPatterns :: Presentation -> [Pattern] inheritedPatterns pres = - alwaysInheritedPatterns ++ contingentInheritedPatterns pres + alwaysInheritedPatterns <> contingentInheritedPatterns pres patternToFilePaths :: PandocMonad m => Pattern -> P m [FilePath] patternToFilePaths pat = do @@ -248,8 +253,8 @@ presentationToArchiveP p@(Presentation docProps slides) = do unless (null missingFiles) (throwError $ PandocSomeError $ - "The following required files are missing:\n" ++ - (unlines $ map (" " ++) missingFiles) + "The following required files are missing:\n" <> + (T.unlines $ map (T.pack . (" " <>)) missingFiles) ) newArch' <- foldM copyFileToArchive emptyArchive filePaths @@ -276,11 +281,11 @@ presentationToArchiveP p@(Presentation docProps slides) = do contentTypesEntry <- presentationToContentTypes p >>= contentTypesToEntry -- fold everything into our inherited archive and return it. return $ foldr addEntryToArchive newArch' $ - slideEntries ++ - slideRelEntries ++ - spkNotesEntries ++ - spkNotesRelEntries ++ - mediaEntries ++ + slideEntries <> + slideRelEntries <> + spkNotesEntries <> + spkNotesRelEntries <> + mediaEntries <> [contentTypesEntry, docPropsEntry, docCustomPropsEntry, relsEntry, presEntry, presRelsEntry, viewPropsEntry] @@ -352,11 +357,11 @@ getLayout layout = do distArchive <- asks envDistArchive parseXml refArchive distArchive layoutpath -shapeHasId :: NameSpaces -> String -> Element -> Bool +shapeHasId :: NameSpaces -> T.Text -> Element -> Bool shapeHasId ns ident element | Just nvSpPr <- findChild (elemName ns "p" "nvSpPr") element , Just cNvPr <- findChild (elemName ns "p" "cNvPr") nvSpPr - , Just nm <- findAttr (QName "id" Nothing Nothing) cNvPr = + , Just nm <- findAttrText (QName "id" Nothing Nothing) cNvPr = nm == ident | otherwise = False @@ -397,7 +402,7 @@ getShapeDimensions ns element | otherwise = Nothing -getMasterShapeDimensionsById :: String +getMasterShapeDimensionsById :: T.Text -> Element -> Maybe ((Integer, Integer), (Integer, Integer)) getMasterShapeDimensionsById ident master = do @@ -422,7 +427,7 @@ getContentShapeSize ns layout master Nothing -> do let mbSz = findChild (elemName ns "p" "nvSpPr") sp >>= findChild (elemName ns "p" "cNvPr") >>= - findAttr (QName "id" Nothing Nothing) >>= + findAttrText (QName "id" Nothing Nothing) >>= flip getMasterShapeDimensionsById master case mbSz of Just sz' -> return sz' @@ -436,7 +441,7 @@ getContentShapeSize _ _ _ = throwError $ buildSpTree :: NameSpaces -> Element -> [Element] -> Element buildSpTree ns spTreeElem newShapes = emptySpTreeElem { elContent = newContent } - where newContent = elContent emptySpTreeElem ++ map Elem newShapes + where newContent = elContent emptySpTreeElem <> map Elem newShapes emptySpTreeElem = spTreeElem { elContent = filter fn (elContent spTreeElem) } fn :: Content -> Bool fn (Elem e) = isElem ns "p" "nvGrpSpPr" e || @@ -506,8 +511,8 @@ registerMedia fp caption = do [] -> 0 ids -> maximum ids - (imgBytes, mbMt) <- P.fetchItem fp - let imgExt = (mbMt >>= extensionFromMimeType >>= (\x -> return $ '.':x)) + (imgBytes, mbMt) <- P.fetchItem $ T.pack fp + let imgExt = (mbMt >>= extensionFromMimeType >>= (\x -> return $ "." <> x)) <|> case imageType imgBytes of Just Png -> Just ".png" @@ -546,11 +551,11 @@ registerMedia fp caption = do makeMediaEntry :: PandocMonad m => MediaInfo -> P m Entry makeMediaEntry mInfo = do epochtime <- (floor . utcTimeToPOSIXSeconds) <$> asks envUTCTime - (imgBytes, _) <- P.fetchItem (mInfoFilePath mInfo) + (imgBytes, _) <- P.fetchItem (T.pack $ mInfoFilePath mInfo) let ext = case mInfoExt mInfo of Just e -> e Nothing -> "" - let fp = "ppt/media/image" ++ (show $ mInfoGlobalId mInfo) ++ ext + let fp = "ppt/media/image" <> (show $ mInfoGlobalId mInfo) <> T.unpack ext return $ toEntry fp epochtime $ BL.fromStrict imgBytes makeMediaEntries :: PandocMonad m => P m [Entry] @@ -642,7 +647,7 @@ createCaption contentShapeDimensions paraElements = do elements <- mapM paragraphToElement [para] let ((x, y), (cx, cy)) = contentShapeDimensions let txBody = mknode "p:txBody" [] $ - [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] ++ elements + [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] <> elements return $ mknode "p:sp" [] [ mknode "p:nvSpPr" [] [ mknode "p:cNvPr" [("id","1"), ("name","TextBox 3")] () @@ -675,7 +680,7 @@ makePicElements layout picProps mInfo alt = do (pageWidth, pageHeight) <- asks envPresentationSize -- hasHeader <- asks envSlideHasHeader let hasCaption = mInfoCaption mInfo - (imgBytes, _) <- P.fetchItem (mInfoFilePath mInfo) + (imgBytes, _) <- P.fetchItem (T.pack $ mInfoFilePath mInfo) let (pxX, pxY) = case imageSize opts imgBytes of Right sz -> sizeInPixels $ sz Left _ -> sizeInPixels $ def @@ -707,14 +712,14 @@ makePicElements layout picProps mInfo alt = do cNvPr <- case picPropLink picProps of Just link -> do idNum <- registerLink link return $ mknode "p:cNvPr" cNvPrAttr $ - mknode "a:hlinkClick" [("r:id", "rId" ++ show idNum)] () + mknode "a:hlinkClick" [("r:id", "rId" <> show idNum)] () Nothing -> return $ mknode "p:cNvPr" cNvPrAttr () let nvPicPr = mknode "p:nvPicPr" [] [ cNvPr , cNvPicPr , mknode "p:nvPr" [] ()] let blipFill = mknode "p:blipFill" [] - [ mknode "a:blip" [("r:embed", "rId" ++ (show $ mInfoLocalId mInfo))] () + [ mknode "a:blip" [("r:embed", "rId" <> (show $ mInfoLocalId mInfo))] () , mknode "a:stretch" [] $ mknode "a:fillRect" [] () ] let xfrm = mknode "a:xfrm" [] @@ -746,23 +751,23 @@ paraElemToElements :: PandocMonad m => ParaElem -> P m [Element] paraElemToElements Break = return [mknode "a:br" [] ()] paraElemToElements (Run rpr s) = do sizeAttrs <- fontSizeAttributes rpr - let attrs = sizeAttrs ++ - (if rPropBold rpr then [("b", "1")] else []) ++ - (if rPropItalics rpr then [("i", "1")] else []) ++ - (if rPropUnderline rpr then [("u", "sng")] else []) ++ + let attrs = sizeAttrs <> + (if rPropBold rpr then [("b", "1")] else []) <> + (if rPropItalics rpr then [("i", "1")] else []) <> + (if rPropUnderline rpr then [("u", "sng")] else []) <> (case rStrikethrough rpr of Just NoStrike -> [("strike", "noStrike")] Just SingleStrike -> [("strike", "sngStrike")] Just DoubleStrike -> [("strike", "dblStrike")] - Nothing -> []) ++ + Nothing -> []) <> (case rBaseline rpr of Just n -> [("baseline", show n)] - Nothing -> []) ++ + Nothing -> []) <> (case rCap rpr of Just NoCapitals -> [("cap", "none")] Just SmallCapitals -> [("cap", "small")] Just AllCapitals -> [("cap", "all")] - Nothing -> []) ++ + Nothing -> []) <> [] linkProps <- case rLink rpr of Just link -> do @@ -773,14 +778,14 @@ paraElemToElements (Run rpr s) = do return $ case link of InternalTarget _ -> let linkAttrs = - [ ("r:id", "rId" ++ show idNum) + [ ("r:id", "rId" <> show idNum) , ("action", "ppaction://hlinksldjump") ] in [mknode "a:hlinkClick" linkAttrs ()] -- external ExternalTarget _ -> let linkAttrs = - [ ("r:id", "rId" ++ show idNum) + [ ("r:id", "rId" <> show idNum) ] in [mknode "a:hlinkClick" linkAttrs ()] Nothing -> return [] @@ -794,11 +799,11 @@ paraElemToElements (Run rpr s) = do Nothing -> [] codeFont <- monospaceFont let codeContents = if rPropCode rpr - then [mknode "a:latin" [("typeface", codeFont)] ()] + then [mknode "a:latin" [("typeface", T.unpack codeFont)] ()] else [] - let propContents = linkProps ++ colorContents ++ codeContents + let propContents = linkProps <> colorContents <> codeContents return [mknode "a:r" [] [ mknode "a:rPr" attrs $ propContents - , mknode "a:t" [] s + , mknode "a:t" [] $ T.unpack s ]] paraElemToElements (MathElem mathType texStr) = do res <- convertMath writeOMML mathType (unTeXString texStr) @@ -839,29 +844,29 @@ surroundWithMathAlternate element = paragraphToElement :: PandocMonad m => Paragraph -> P m Element paragraphToElement par = do let - attrs = [("lvl", show $ pPropLevel $ paraProps par)] ++ + attrs = [("lvl", show $ pPropLevel $ paraProps par)] <> (case pPropMarginLeft (paraProps par) of Just px -> [("marL", show $ pixelsToEmu px)] Nothing -> [] - ) ++ + ) <> (case pPropIndent (paraProps par) of Just px -> [("indent", show $ pixelsToEmu px)] Nothing -> [] - ) ++ + ) <> (case pPropAlign (paraProps par) of Just AlgnLeft -> [("algn", "l")] Just AlgnRight -> [("algn", "r")] Just AlgnCenter -> [("algn", "ctr")] Nothing -> [] ) - props = [] ++ + props = [] <> (case pPropSpaceBefore $ paraProps par of Just px -> [mknode "a:spcBef" [] [ mknode "a:spcPts" [("val", show $ 100 * px)] () ] ] Nothing -> [] - ) ++ + ) <> (case pPropBullet $ paraProps par of Just Bullet -> [] Just (AutoNumbering attrs') -> @@ -869,7 +874,7 @@ paragraphToElement par = do Nothing -> [mknode "a:buNone" [] ()] ) paras <- concat <$> mapM paraElemToElements (paraElems par) - return $ mknode "a:p" [] $ [mknode "a:pPr" attrs props] ++ paras + return $ mknode "a:p" [] $ [mknode "a:pPr" attrs props] <> paras shapeToElement :: PandocMonad m => Element -> Shape -> P m Element shapeToElement layout (TextBox paras) @@ -879,7 +884,7 @@ shapeToElement layout (TextBox paras) sp <- getContentShape ns spTree elements <- mapM paragraphToElement paras let txBody = mknode "p:txBody" [] $ - [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] ++ elements + [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] <> elements emptySpPr = mknode "p:spPr" [] () return $ surroundWithMathAlternate $ @@ -933,19 +938,19 @@ graphicFrameToElements layout tbls caption = do [ mknode "a:off" [("x", show $ 12700 * x), ("y", show $ 12700 * y)] () , mknode "a:ext" [("cx", show $ 12700 * cx), ("cy", show $ 12700 * cy)] () ] - ] ++ elements + ] <> elements if (not $ null caption) then do capElt <- createCaption ((x, y), (cx, cytmp)) caption return [graphicFrameElts, capElt] else return [graphicFrameElts] -getDefaultTableStyle :: PandocMonad m => P m (Maybe String) +getDefaultTableStyle :: PandocMonad m => P m (Maybe T.Text) getDefaultTableStyle = do refArchive <- asks envRefArchive distArchive <- asks envDistArchive tblStyleLst <- parseXml refArchive distArchive "ppt/tableStyles.xml" - return $ findAttr (QName "def" Nothing Nothing) tblStyleLst + return $ findAttrText (QName "def" Nothing Nothing) tblStyleLst graphicToElement :: PandocMonad m => Integer -> Graphic -> P m Element graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do @@ -970,7 +975,7 @@ graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do [mknode "a:txBody" [] $ ([ mknode "a:bodyPr" [] () , mknode "a:lstStyle" [] ()] - ++ elements')] + <> elements')] headers' <- mapM cellToOpenXML hdrCells rows' <- mapM (mapM cellToOpenXML) rows let borderProps = mknode "a:tcPr" [] () @@ -978,7 +983,7 @@ graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do let mkcell border contents = mknode "a:tc" [] $ (if null contents then emptyCell - else contents) ++ [ borderProps | border ] + else contents) <> [ borderProps | border ] let mkrow border cells = mknode "a:tr" [("h", "0")] $ map (mkcell border) cells let mkgridcol w = mknode "a:gridCol" @@ -991,7 +996,7 @@ graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do , ("bandRow", if tblPrBandRow tblPr then "1" else "0") ] (case mbDefTblStyle of Nothing -> [] - Just sty -> [mknode "a:tableStyleId" [] sty]) + Just sty -> [mknode "a:tableStyleId" [] $ T.unpack sty]) return $ mknode "a:graphic" [] $ [mknode "a:graphicData" [("uri", "http://schemas.openxmlformats.org/drawingml/2006/table")] $ @@ -1001,7 +1006,7 @@ graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do then [] else map mkgridcol colWidths) ] - ++ [ mkrow True headers' | hasHeader ] ++ map (mkrow False) rows' + <> [ mkrow True headers' | hasHeader ] <> map (mkrow False) rows' ] ] @@ -1009,7 +1014,7 @@ graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do -- We get the shape by placeholder type. If there is NO type, it -- defaults to a content placeholder. -data PHType = PHType String | ObjType +data PHType = PHType T.Text | ObjType deriving (Show, Eq) findPHType :: NameSpaces -> Element -> PHType -> Bool @@ -1024,7 +1029,7 @@ findPHType ns spElem phType -- if it's a named PHType, we want to check that the attribute -- value matches. Just phElem | (PHType tp) <- phType -> - case findAttr (QName "type" Nothing Nothing) phElem of + case findAttrText (QName "type" Nothing Nothing) phElem of Just tp' -> tp == tp' Nothing -> False -- if it's an ObjType, we want to check that there is NO @@ -1063,7 +1068,7 @@ nonBodyTextToElement layout phTypes paraElements let hdrPara = Paragraph def paraElements element <- paragraphToElement hdrPara let txBody = mknode "p:txBody" [] $ - [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] ++ + [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] <> [element] return $ replaceNamedChildren ns "p" "txBody" [txBody] sp -- XXX: TODO @@ -1081,7 +1086,7 @@ contentToElement layout hdrShape shapes contentElements <- local (\env -> env {envContentType = NormalContent}) (shapesToElements layout shapes) - return $ buildSpTree ns spTree (hdrShapeElements ++ contentElements) + return $ buildSpTree ns spTree (hdrShapeElements <> contentElements) contentToElement _ _ _ = return $ mknode "p:sp" [] () twoColumnToElement :: PandocMonad m => Element -> [ParaElem] -> [Shape] -> [Shape] -> P m Element @@ -1101,7 +1106,7 @@ twoColumnToElement layout hdrShape shapesL shapesR (shapesToElements layout shapesR) -- let contentElementsL' = map (setIdx ns "1") contentElementsL -- contentElementsR' = map (setIdx ns "2") contentElementsR - return $ buildSpTree ns spTree (hdrShapeElements ++ contentElementsL ++ contentElementsR) + return $ buildSpTree ns spTree (hdrShapeElements <> contentElementsL <> contentElementsR) twoColumnToElement _ _ _ _= return $ mknode "p:sp" [] () @@ -1133,7 +1138,7 @@ metadataToElement layout titleElems subtitleElems authorsElems dateElems dateShapeElements <- if null dateElems then return [] else sequence [nonBodyTextToElement layout [PHType "dt"] dateElems] - return $ buildSpTree ns spTree (titleShapeElements ++ subtitleShapeElements ++ dateShapeElements) + return $ buildSpTree ns spTree (titleShapeElements <> subtitleShapeElements <> dateShapeElements) metadataToElement _ _ _ _ _ = return $ mknode "p:sp" [] () slideToElement :: PandocMonad m => Slide -> P m Element @@ -1186,7 +1191,7 @@ getNotesMaster = do distArchive <- asks envDistArchive parseXml refArchive distArchive "ppt/notesMasters/notesMaster1.xml" -getSlideNumberFieldId :: PandocMonad m => Element -> P m String +getSlideNumberFieldId :: PandocMonad m => Element -> P m T.Text getSlideNumberFieldId notesMaster | ns <- elemToNameSpaces notesMaster , Just cSld <- findChild (elemName ns "p" "cSld") notesMaster @@ -1195,7 +1200,7 @@ getSlideNumberFieldId notesMaster , Just txBody <- findChild (elemName ns "p" "txBody") sp , Just p <- findChild (elemName ns "a" "p") txBody , Just fld <- findChild (elemName ns "a" "fld") p - , Just fldId <- findAttr (QName "id" Nothing Nothing) fld = + , Just fldId <- findAttrText (QName "id" Nothing Nothing) fld = return fldId | otherwise = throwError $ PandocSomeError $ @@ -1236,7 +1241,7 @@ speakerNotesBody :: PandocMonad m => [Paragraph] -> P m Element speakerNotesBody paras = do elements <- mapM paragraphToElement $ spaceParas $ map removeParaLinks paras let txBody = mknode "p:txBody" [] $ - [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] ++ elements + [mknode "a:bodyPr" [] (), mknode "a:lstStyle" [] ()] <> elements return $ mknode "p:sp" [] $ [ mknode "p:nvSpPr" [] $ @@ -1252,7 +1257,7 @@ speakerNotesBody paras = do , txBody ] -speakerNotesSlideNumber :: Int -> String -> Element +speakerNotesSlideNumber :: Int -> T.Text -> Element speakerNotesSlideNumber pgNum fieldId = mknode "p:sp" [] $ [ mknode "p:nvSpPr" [] $ @@ -1273,7 +1278,7 @@ speakerNotesSlideNumber pgNum fieldId = [ mknode "a:bodyPr" [] () , mknode "a:lstStyle" [] () , mknode "a:p" [] $ - [ mknode "a:fld" [ ("id", fieldId) + [ mknode "a:fld" [ ("id", T.unpack fieldId) , ("type", "slidenum") ] [ mknode "a:rPr" [("lang", "en-US")] () @@ -1329,24 +1334,24 @@ getSlideIdNum sldId = do Just n -> return n Nothing -> throwError $ PandocShouldNeverHappenError $ - "Slide Id " ++ (show sldId) ++ " not found." + "Slide Id " <> T.pack (show sldId) <> " not found." slideNum :: PandocMonad m => Slide -> P m Int slideNum slide = getSlideIdNum $ slideId slide idNumToFilePath :: Int -> FilePath -idNumToFilePath idNum = "slide" ++ (show $ idNum) ++ ".xml" +idNumToFilePath idNum = "slide" <> (show $ idNum) <> ".xml" slideToFilePath :: PandocMonad m => Slide -> P m FilePath slideToFilePath slide = do idNum <- slideNum slide - return $ "slide" ++ (show $ idNum) ++ ".xml" + return $ "slide" <> (show $ idNum) <> ".xml" -slideToRelId :: PandocMonad m => Slide -> P m String +slideToRelId :: PandocMonad m => Slide -> P m T.Text slideToRelId slide = do n <- slideNum slide offset <- asks envSlideIdOffset - return $ "rId" ++ (show $ n + offset) + return $ "rId" <> T.pack (show $ n + offset) data Relationship = Relationship { relId :: Int @@ -1362,7 +1367,7 @@ elementToRel element num <- case reads numStr :: [(Int, String)] of (n, _) : _ -> Just n [] -> Nothing - type' <- findAttr (QName "Type" Nothing Nothing) element + type' <- findAttrText (QName "Type" Nothing Nothing) element target <- findAttr (QName "Target" Nothing Nothing) element return $ Relationship num type' target | otherwise = Nothing @@ -1372,7 +1377,7 @@ slideToPresRel slide = do idNum <- slideNum slide n <- asks envSlideIdOffset let rId = idNum + n - fp = "slides/" ++ idNumToFilePath idNum + fp = "slides/" <> idNumToFilePath idNum return $ Relationship { relId = rId , relType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide" , relTarget = fp @@ -1397,7 +1402,7 @@ presentationToRels pres@(Presentation _ slides) = do , relTarget = "notesMasters/notesMaster1.xml" }] else [] - insertedRels = mySlideRels ++ notesMasterRels + insertedRels = mySlideRels <> notesMasterRels rels <- getRels -- we remove the slide rels and the notesmaster (if it's -- there). We'll put these back in ourselves, if necessary. @@ -1427,7 +1432,7 @@ presentationToRels pres@(Presentation _ slides) = do relsWeKeep' = map (\r -> r{relId = modifyRelNum $ relId r}) relsWeKeep - return $ insertedRels ++ relsWeKeep' + return $ insertedRels <> relsWeKeep' -- We make this ourselves, in case there's a thumbnail in the one from -- the template. @@ -1455,8 +1460,8 @@ topLevelRelsEntry :: PandocMonad m => P m Entry topLevelRelsEntry = elemToEntry "_rels/.rels" $ relsToElement topLevelRels relToElement :: Relationship -> Element -relToElement rel = mknode "Relationship" [ ("Id", "rId" ++ (show $ relId rel)) - , ("Type", relType rel) +relToElement rel = mknode "Relationship" [ ("Id", "rId" <> (show $ relId rel)) + , ("Type", T.unpack $ relType rel) , ("Target", relTarget rel) ] () relsToElement :: [Relationship] -> Element @@ -1479,7 +1484,7 @@ slideToEntry slide = do idNum <- slideNum slide local (\env -> env{envCurSlideId = idNum}) $ do element <- slideToElement slide - elemToEntry ("ppt/slides/" ++ idNumToFilePath idNum) element + elemToEntry ("ppt/slides/" <> idNumToFilePath idNum) element slideToSpeakerNotesEntry :: PandocMonad m => Slide -> P m (Maybe Entry) slideToSpeakerNotesEntry slide = do @@ -1492,7 +1497,7 @@ slideToSpeakerNotesEntry slide = do Just element | Just notesIdNum <- mbNotesIdNum -> Just <$> elemToEntry - ("ppt/notesSlides/notesSlide" ++ show notesIdNum ++ ".xml") + ("ppt/notesSlides/notesSlide" <> show notesIdNum <> ".xml") element _ -> return Nothing @@ -1505,7 +1510,7 @@ slideToSpeakerNotesRelElement slide@(Slide _ _ _) = do [("xmlns", "http://schemas.openxmlformats.org/package/2006/relationships")] [ mknode "Relationship" [ ("Id", "rId2") , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide") - , ("Target", "../slides/slide" ++ show idNum ++ ".xml") + , ("Target", "../slides/slide" <> show idNum <> ".xml") ] () , mknode "Relationship" [ ("Id", "rId1") , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesMaster") @@ -1524,7 +1529,7 @@ slideToSpeakerNotesRelEntry slide = do Just element | Just notesIdNum <- mbNotesIdNum -> Just <$> elemToEntry - ("ppt/notesSlides/_rels/notesSlide" ++ show notesIdNum ++ ".xml.rels") + ("ppt/notesSlides/_rels/notesSlide" <> show notesIdNum <> ".xml.rels") element _ -> return Nothing @@ -1532,21 +1537,21 @@ slideToSlideRelEntry :: PandocMonad m => Slide -> P m Entry slideToSlideRelEntry slide = do idNum <- slideNum slide element <- slideToSlideRelElement slide - elemToEntry ("ppt/slides/_rels/" ++ idNumToFilePath idNum ++ ".rels") element + elemToEntry ("ppt/slides/_rels/" <> idNumToFilePath idNum <> ".rels") element linkRelElement :: PandocMonad m => Int -> LinkTarget -> P m Element linkRelElement rIdNum (InternalTarget targetId) = do targetIdNum <- getSlideIdNum targetId return $ - mknode "Relationship" [ ("Id", "rId" ++ show rIdNum) + mknode "Relationship" [ ("Id", "rId" <> show rIdNum) , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide") - , ("Target", "slide" ++ show targetIdNum ++ ".xml") + , ("Target", "slide" <> show targetIdNum <> ".xml") ] () linkRelElement rIdNum (ExternalTarget (url, _)) = do return $ - mknode "Relationship" [ ("Id", "rId" ++ show rIdNum) + mknode "Relationship" [ ("Id", "rId" <> show rIdNum) , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink") - , ("Target", url) + , ("Target", T.unpack url) , ("TargetMode", "External") ] () @@ -1559,9 +1564,9 @@ mediaRelElement mInfo = Just e -> e Nothing -> "" in - mknode "Relationship" [ ("Id", "rId" ++ (show $ mInfoLocalId mInfo)) + mknode "Relationship" [ ("Id", "rId" <> (show $ mInfoLocalId mInfo)) , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image") - , ("Target", "../media/image" ++ (show $ mInfoGlobalId mInfo) ++ ext) + , ("Target", "../media/image" <> (show $ mInfoGlobalId mInfo) <> T.unpack ext) ] () speakerNotesSlideRelElement :: PandocMonad m => Slide -> P m (Maybe Element) @@ -1571,7 +1576,7 @@ speakerNotesSlideRelElement slide = do return $ case M.lookup idNum mp of Nothing -> Nothing Just n -> - let target = "../notesSlides/notesSlide" ++ show n ++ ".xml" + let target = "../notesSlides/notesSlide" <> show n <> ".xml" in Just $ mknode "Relationship" [ ("Id", "rId2") , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide") @@ -1605,14 +1610,14 @@ slideToSlideRelElement slide = do ([mknode "Relationship" [ ("Id", "rId1") , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout") , ("Target", target)] () - ] ++ speakerNotesRels ++ linkRels ++ mediaRels) + ] <> speakerNotesRels <> linkRels <> mediaRels) slideToSldIdElement :: PandocMonad m => Slide -> P m Element slideToSldIdElement slide = do n <- slideNum slide let id' = show $ n + 255 rId <- slideToRelId slide - return $ mknode "p:sldId" [("id", id'), ("r:id", rId)] () + return $ mknode "p:sldId" [("id", id'), ("r:id", T.unpack rId)] () presentationToSldIdLst :: PandocMonad m => Presentation -> P m Element presentationToSldIdLst (Presentation _ slides) = do @@ -1637,7 +1642,7 @@ presentationToPresentationElement pres@(Presentation _ slds) = do notesMasterElem = mknode "p:notesMasterIdLst" [] [ mknode "p:NotesMasterId" - [("r:id", "rId" ++ show notesMasterRId)] + [("r:id", "rId" <> show notesMasterRId)] () ] @@ -1683,7 +1688,7 @@ docPropsElement :: PandocMonad m => DocProps -> P m Element docPropsElement docProps = do utctime <- asks envUTCTime let keywords = case dcKeywords docProps of - Just xs -> intercalate ", " xs + Just xs -> T.intercalate ", " xs Nothing -> "" return $ mknode "cp:coreProperties" @@ -1692,16 +1697,16 @@ docPropsElement docProps = do ,("xmlns:dcterms","http://purl.org/dc/terms/") ,("xmlns:dcmitype","http://purl.org/dc/dcmitype/") ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")] - $ (mknode "dc:title" [] $ fromMaybe "" $ dcTitle docProps) - : (mknode "dc:creator" [] $ fromMaybe "" $ dcCreator docProps) - : (mknode "cp:keywords" [] keywords) + $ (mknode "dc:title" [] $ maybe "" T.unpack $ dcTitle docProps) + : (mknode "dc:creator" [] $ maybe "" T.unpack $ dcCreator docProps) + : (mknode "cp:keywords" [] $ T.unpack keywords) : (if isNothing (dcSubject docProps) then [] else - [mknode "dc:subject" [] $ fromMaybe "" $ dcSubject docProps]) - ++ (if isNothing (dcDescription docProps) then [] else - [mknode "dc:description" [] $ fromMaybe "" $ dcDescription docProps]) - ++ (if isNothing (cpCategory docProps) then [] else - [mknode "cp:category" [] $ fromMaybe "" $ cpCategory docProps]) - ++ (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x + [mknode "dc:subject" [] $ maybe "" T.unpack $ dcSubject docProps]) + <> (if isNothing (dcDescription docProps) then [] else + [mknode "dc:description" [] $ maybe "" T.unpack $ dcDescription docProps]) + <> (if isNothing (cpCategory docProps) then [] else + [mknode "cp:category" [] $ maybe "" T.unpack $ cpCategory docProps]) + <> (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x ]) (formatTime defaultTimeLocale "%FT%XZ" utctime) @@ -1715,7 +1720,7 @@ docCustomPropsElement docProps = do let mkCustomProp (k, v) pid = mknode "property" [("fmtid","{D5CDD505-2E9C-101B-9397-08002B2CF9AE}") ,("pid", show pid) - ,("name", k)] $ mknode "vt:lpwstr" [] v + ,("name", T.unpack k)] $ mknode "vt:lpwstr" [] (T.unpack v) return $ mknode "Properties" [("xmlns","http://schemas.openxmlformats.org/officeDocument/2006/custom-properties") ,("xmlns:vt","http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes") @@ -1745,15 +1750,15 @@ makeViewPropsEntry = viewPropsElement >>= elemToEntry "ppt/viewProps.xml" defaultContentTypeToElem :: DefaultContentType -> Element defaultContentTypeToElem dct = mknode "Default" - [("Extension", defContentTypesExt dct), - ("ContentType", defContentTypesType dct)] + [("Extension", T.unpack $ defContentTypesExt dct), + ("ContentType", T.unpack $ defContentTypesType dct)] () overrideContentTypeToElem :: OverrideContentType -> Element overrideContentTypeToElem oct = mknode "Override" [("PartName", overrideContentTypesPart oct), - ("ContentType", overrideContentTypesType oct)] + ("ContentType", T.unpack $ overrideContentTypesType oct)] () contentTypesToElement :: ContentTypes -> Element @@ -1761,11 +1766,11 @@ contentTypesToElement ct = let ns = "http://schemas.openxmlformats.org/package/2006/content-types" in mknode "Types" [("xmlns", ns)] $ - (map defaultContentTypeToElem $ contentTypesDefaults ct) ++ + (map defaultContentTypeToElem $ contentTypesDefaults ct) <> (map overrideContentTypeToElem $ contentTypesOverrides ct) data DefaultContentType = DefaultContentType - { defContentTypesExt :: String + { defContentTypesExt :: T.Text , defContentTypesType:: MimeType } deriving (Show, Eq) @@ -1785,12 +1790,12 @@ contentTypesToEntry :: PandocMonad m => ContentTypes -> P m Entry contentTypesToEntry ct = elemToEntry "[Content_Types].xml" $ contentTypesToElement ct pathToOverride :: FilePath -> Maybe OverrideContentType -pathToOverride fp = OverrideContentType ("/" ++ fp) <$> (getContentType fp) +pathToOverride fp = OverrideContentType ("/" <> fp) <$> (getContentType fp) mediaFileContentType :: FilePath -> Maybe DefaultContentType mediaFileContentType fp = case takeExtension fp of '.' : ext -> Just $ - DefaultContentType { defContentTypesExt = ext + DefaultContentType { defContentTypesExt = T.pack ext , defContentTypesType = case getMimeType fp of Just mt -> mt @@ -1800,7 +1805,8 @@ mediaFileContentType fp = case takeExtension fp of mediaContentType :: MediaInfo -> Maybe DefaultContentType mediaContentType mInfo - | Just ('.' : ext) <- mInfoExt mInfo = + | Just t <- mInfoExt mInfo + , Just ('.', ext) <- T.uncons t = Just $ DefaultContentType { defContentTypesExt = ext , defContentTypesType = case mInfoMimeType mInfo of @@ -1813,7 +1819,7 @@ getSpeakerNotesFilePaths :: PandocMonad m => P m [FilePath] getSpeakerNotesFilePaths = do mp <- asks envSpeakerNotesIdMap let notesIdNums = M.elems mp - return $ map (\n -> "ppt/notesSlides/notesSlide" ++ show n ++ ".xml") notesIdNums + return $ map (\n -> "ppt/notesSlides/notesSlide" <> show n <> ".xml") notesIdNums presentationToContentTypes :: PandocMonad m => Presentation -> P m ContentTypes presentationToContentTypes p@(Presentation _ slides) = do @@ -1824,7 +1830,7 @@ presentationToContentTypes p@(Presentation _ slides) = do , DefaultContentType "rels" "application/vnd.openxmlformats-package.relationships+xml" ] mediaDefaults = nub $ - (mapMaybe mediaContentType $ mediaInfos) ++ + (mapMaybe mediaContentType $ mediaInfos) <> (mapMaybe mediaFileContentType $ mediaFps) inheritedOverrides = mapMaybe pathToOverride filePaths @@ -1835,55 +1841,56 @@ presentationToContentTypes p@(Presentation _ slides) = do ] relativePaths <- mapM slideToFilePath slides let slideOverrides = mapMaybe - (\fp -> pathToOverride $ "ppt/slides/" ++ fp) + (\fp -> pathToOverride $ "ppt/slides/" <> fp) relativePaths speakerNotesOverrides <- (mapMaybe pathToOverride) <$> getSpeakerNotesFilePaths return $ ContentTypes - (defaults ++ mediaDefaults) - (inheritedOverrides ++ createdOverrides ++ slideOverrides ++ speakerNotesOverrides) + (defaults <> mediaDefaults) + (inheritedOverrides <> createdOverrides <> slideOverrides <> speakerNotesOverrides) -presML :: String +presML :: T.Text presML = "application/vnd.openxmlformats-officedocument.presentationml" -noPresML :: String +noPresML :: T.Text noPresML = "application/vnd.openxmlformats-officedocument" getContentType :: FilePath -> Maybe MimeType getContentType fp - | fp == "ppt/presentation.xml" = Just $ presML ++ ".presentation.main+xml" - | fp == "ppt/presProps.xml" = Just $ presML ++ ".presProps+xml" - | fp == "ppt/viewProps.xml" = Just $ presML ++ ".viewProps+xml" - | fp == "ppt/tableStyles.xml" = Just $ presML ++ ".tableStyles+xml" + | fp == "ppt/presentation.xml" = Just $ presML <> ".presentation.main+xml" + | fp == "ppt/presProps.xml" = Just $ presML <> ".presProps+xml" + | fp == "ppt/viewProps.xml" = Just $ presML <> ".viewProps+xml" + | fp == "ppt/tableStyles.xml" = Just $ presML <> ".tableStyles+xml" | fp == "docProps/core.xml" = Just $ "application/vnd.openxmlformats-package.core-properties+xml" | fp == "docProps/custom.xml" = Just $ "application/vnd.openxmlformats-officedocument.custom-properties+xml" - | fp == "docProps/app.xml" = Just $ noPresML ++ ".extended-properties+xml" + | fp == "docProps/app.xml" = Just $ noPresML <> ".extended-properties+xml" | "ppt" : "slideMasters" : f : [] <- splitDirectories fp , (_, ".xml") <- splitExtension f = - Just $ presML ++ ".slideMaster+xml" + Just $ presML <> ".slideMaster+xml" | "ppt" : "slides" : f : [] <- splitDirectories fp , (_, ".xml") <- splitExtension f = - Just $ presML ++ ".slide+xml" + Just $ presML <> ".slide+xml" | "ppt" : "notesMasters" : f : [] <- splitDirectories fp , (_, ".xml") <- splitExtension f = - Just $ presML ++ ".notesMaster+xml" + Just $ presML <> ".notesMaster+xml" | "ppt" : "notesSlides" : f : [] <- splitDirectories fp , (_, ".xml") <- splitExtension f = - Just $ presML ++ ".notesSlide+xml" + Just $ presML <> ".notesSlide+xml" | "ppt" : "theme" : f : [] <- splitDirectories fp , (_, ".xml") <- splitExtension f = - Just $ noPresML ++ ".theme+xml" + Just $ noPresML <> ".theme+xml" | "ppt" : "slideLayouts" : _ : [] <- splitDirectories fp= - Just $ presML ++ ".slideLayout+xml" + Just $ presML <> ".slideLayout+xml" | otherwise = Nothing +-- Kept as String for XML.Light autoNumAttrs :: ListAttributes -> [(String, String)] autoNumAttrs (startNum, numStyle, numDelim) = - numAttr ++ typeAttr + numAttr <> typeAttr where numAttr = if startNum == 1 then [] else [("startAt", show startNum)] - typeAttr = [("type", typeString ++ delimString)] + typeAttr = [("type", typeString <> delimString)] typeString = case numStyle of Decimal -> "arabic" UpperAlpha -> "alphaUc" diff --git a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs index 8667c79f4..75ce0dd4e 100644 --- a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs +++ b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs @@ -1,5 +1,7 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} +{-# LANGUAGE ViewPatterns #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {- | Module : Text.Pandoc.Writers.Powerpoint.Presentation @@ -54,6 +56,7 @@ import Text.Pandoc.Logging import Text.Pandoc.Walk import Data.Time (UTCTime) import qualified Text.Pandoc.Shared as Shared -- so we don't overlap "Element" +import Text.Pandoc.Shared (tshow) import Text.Pandoc.Writers.Shared (lookupMetaInlines, lookupMetaBlocks , lookupMetaString, toTableOfContents) import qualified Data.Map as M @@ -93,7 +96,7 @@ instance Default WriterEnv where data WriterState = WriterState { stNoteIds :: M.Map Int [Block] -- associate anchors with slide id - , stAnchorMap :: M.Map String SlideId + , stAnchorMap :: M.Map T.Text SlideId , stSlideIdSet :: S.Set SlideId , stLog :: [LogMessage] , stSpeakerNotes :: SpeakerNotes @@ -123,17 +126,17 @@ reservedSlideIds = S.fromList [ metadataSlideId , endNotesSlideId ] -uniqueSlideId' :: Integer -> S.Set SlideId -> String -> SlideId +uniqueSlideId' :: Integer -> S.Set SlideId -> T.Text -> SlideId uniqueSlideId' n idSet s = - let s' = if n == 0 then s else s ++ "-" ++ show n + let s' = if n == 0 then s else s <> "-" <> tshow n in if SlideId s' `S.member` idSet then uniqueSlideId' (n+1) idSet s else SlideId s' -uniqueSlideId :: S.Set SlideId -> String -> SlideId +uniqueSlideId :: S.Set SlideId -> T.Text -> SlideId uniqueSlideId = uniqueSlideId' 0 -runUniqueSlideId :: String -> Pres SlideId +runUniqueSlideId :: T.Text -> Pres SlideId runUniqueSlideId s = do idSet <- gets stSlideIdSet let sldId = uniqueSlideId idSet s @@ -159,14 +162,14 @@ type Pixels = Integer data Presentation = Presentation DocProps [Slide] deriving (Show) -data DocProps = DocProps { dcTitle :: Maybe String - , dcSubject :: Maybe String - , dcCreator :: Maybe String - , dcKeywords :: Maybe [String] - , dcDescription :: Maybe String - , cpCategory :: Maybe String +data DocProps = DocProps { dcTitle :: Maybe T.Text + , dcSubject :: Maybe T.Text + , dcCreator :: Maybe T.Text + , dcKeywords :: Maybe [T.Text] + , dcDescription :: Maybe T.Text + , cpCategory :: Maybe T.Text , dcCreated :: Maybe UTCTime - , customProperties :: Maybe [(String, String)] + , customProperties :: Maybe [(T.Text, T.Text)] } deriving (Show, Eq) @@ -175,7 +178,7 @@ data Slide = Slide { slideId :: SlideId , slideSpeakerNotes :: SpeakerNotes } deriving (Show, Eq) -newtype SlideId = SlideId String +newtype SlideId = SlideId T.Text deriving (Show, Eq, Ord) -- In theory you could have anything on a notes slide but it seems @@ -197,7 +200,7 @@ data Layout = MetadataSlide [ParaElem] [ParaElem] [[ParaElem]] [ParaElem] data Shape = Pic PicProps FilePath [ParaElem] | GraphicFrame [Graphic] [ParaElem] | TextBox [Paragraph] - | RawOOXMLShape String + | RawOOXMLShape T.Text deriving (Show, Eq) type Cell = [Paragraph] @@ -240,17 +243,17 @@ instance Default ParaProps where , pPropIndent = Just 0 } -newtype TeXString = TeXString {unTeXString :: String} +newtype TeXString = TeXString {unTeXString :: T.Text} deriving (Eq, Show) data ParaElem = Break - | Run RunProps String + | Run RunProps T.Text -- It would be more elegant to have native TeXMath -- Expressions here, but this allows us to use -- `convertmath` from T.P.Writers.Math. Will perhaps -- revisit in the future. | MathElem MathType TeXString - | RawOOXMLParaElem String + | RawOOXMLParaElem T.Text deriving (Show, Eq) data Strikethrough = NoStrike | SingleStrike | DoubleStrike @@ -259,9 +262,9 @@ data Strikethrough = NoStrike | SingleStrike | DoubleStrike data Capitals = NoCapitals | SmallCapitals | AllCapitals deriving (Show, Eq) -type URL = String +type URL = T.Text -data LinkTarget = ExternalTarget (URL, String) +data LinkTarget = ExternalTarget (URL, T.Text) | InternalTarget SlideId deriving (Show, Eq) @@ -360,7 +363,7 @@ inlineToParElems (Note blks) = do curNoteId = maxNoteId + 1 modify $ \st -> st { stNoteIds = M.insert curNoteId blks notes } local (\env -> env{envRunProps = (envRunProps env){rLink = Just $ InternalTarget endNotesSlideId}}) $ - inlineToParElems $ Superscript [Str $ show curNoteId] + inlineToParElems $ Superscript [Str $ tshow curNoteId] inlineToParElems (Span (_, ["underline"], _) ils) = local (\r -> r{envRunProps = (envRunProps r){rPropUnderline=True}}) $ inlinesToParElems ils @@ -389,11 +392,11 @@ isListType (BulletList _) = True isListType (DefinitionList _) = True isListType _ = False -registerAnchorId :: String -> Pres () +registerAnchorId :: T.Text -> Pres () registerAnchorId anchor = do anchorMap <- gets stAnchorMap sldId <- asks envCurSlideId - unless (null anchor) $ + unless (T.null anchor) $ modify $ \st -> st {stAnchorMap = M.insert anchor sldId anchorMap} -- Currently hardcoded, until I figure out how to make it dynamic. @@ -531,11 +534,11 @@ withAttr _ sp = sp blockToShape :: Block -> Pres Shape blockToShape (Plain ils) = blockToShape (Para ils) blockToShape (Para (il:_)) | Image attr ils (url, _) <- il = - (withAttr attr . Pic def url) <$> inlinesToParElems ils + (withAttr attr . Pic def (T.unpack url)) <$> inlinesToParElems ils blockToShape (Para (il:_)) | Link _ (il':_) target <- il , Image attr ils (url, _) <- il' = - (withAttr attr . Pic def{picPropLink = Just $ ExternalTarget target} url) <$> - inlinesToParElems ils + (withAttr attr . Pic def{picPropLink = Just $ ExternalTarget target} (T.unpack url)) + <$> inlinesToParElems ils blockToShape (Table caption algn _ hdrCells rows) = do caption' <- inlinesToParElems caption hdrCells' <- rowToParagraphs algn hdrCells @@ -711,7 +714,7 @@ blocksToSlide blks = do makeNoteEntry :: Int -> [Block] -> [Block] makeNoteEntry n blks = - let enum = Str (show n ++ ".") + let enum = Str (tshow n <> ".") in case blks of (Para ils : blks') -> (Para $ enum : Space : ils) : blks' @@ -786,7 +789,7 @@ combineParaElems' (Just pElem') (pElem : pElems) | Run rPr' s' <- pElem' , Run rPr s <- pElem , rPr == rPr' = - combineParaElems' (Just $ Run rPr' $ s' ++ s) pElems + combineParaElems' (Just $ Run rPr' $ s' <> s) pElems | otherwise = pElem' : combineParaElems' (Just pElem) pElems @@ -831,7 +834,8 @@ applyToSlide f slide = do replaceAnchor :: ParaElem -> Pres ParaElem replaceAnchor (Run rProps s) - | Just (ExternalTarget ('#':anchor, _)) <- rLink rProps = do + | Just (ExternalTarget (T.uncons -> Just ('#', anchor), _)) <- rLink rProps + = do anchorMap <- gets stAnchorMap -- If the anchor is not in the anchormap, we just remove the -- link. @@ -843,9 +847,9 @@ replaceAnchor pe = return pe emptyParaElem :: ParaElem -> Bool emptyParaElem (Run _ s) = - null $ Shared.trim s + T.null $ Shared.trim s emptyParaElem (MathElem _ ts) = - null $ Shared.trim $ unTeXString ts + T.null $ Shared.trim $ unTeXString ts emptyParaElem _ = False emptyParagraph :: Paragraph -> Bool @@ -900,7 +904,7 @@ blocksToPresentationSlides blks = do -- slide later blksLst <- splitBlocks blks' bodySlideIds <- mapM - (\n -> runUniqueSlideId $ "BodySlide" ++ show n) + (\n -> runUniqueSlideId $ "BodySlide" <> tshow n) (take (length blksLst) [1..] :: [Integer]) bodyslides <- mapM (\(bs, ident) -> @@ -935,11 +939,11 @@ metaToDocProps meta = authors = case map Shared.stringify $ docAuthors meta of [] -> Nothing - ss -> Just $ intercalate "; " ss + ss -> Just $ T.intercalate "; " ss description = case map Shared.stringify $ lookupMetaBlocks "description" meta of [] -> Nothing - ss -> Just $ intercalate "_x000d_\n" ss + ss -> Just $ T.intercalate "_x000d_\n" ss customProperties' = case [(k, lookupMetaString k meta) | k <- M.keys (unMeta meta) , k `notElem` (["title", "author", "keywords", "description" @@ -987,7 +991,7 @@ formatToken sty (tokType, txt) = Just tokSty -> applyTokStyToRunProps tokSty rProps Nothing -> rProps in - Run rProps' $ T.unpack txt + Run rProps' txt formatSourceLine :: Style -> FormatOptions -> SourceLine -> [ParaElem] formatSourceLine sty _ srcLn = map (formatToken sty) srcLn diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index efe86e73b..5f035ee1f 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.RST Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -16,8 +17,8 @@ reStructuredText: <http://docutils.sourceforge.net/rst.html> module Text.Pandoc.Writers.RST ( writeRST, flatten ) where import Prelude import Control.Monad.State.Strict -import Data.Char (isSpace, toLower) -import Data.List (isPrefixOf, stripPrefix, transpose, intersperse) +import Data.Char (isSpace) +import Data.List (transpose, intersperse) import Data.Maybe (fromMaybe) import qualified Data.Text as T import Data.Text (Text) @@ -38,7 +39,7 @@ type Refs = [([Inline], Target)] data WriterState = WriterState { stNotes :: [[Block]] , stLinks :: Refs - , stImages :: [([Inline], (Attr, String, String, Maybe String))] + , stImages :: [([Inline], (Attr, Text, Text, Maybe Text))] , stHasMath :: Bool , stHasRawTeX :: Bool , stOptions :: WriterOptions @@ -81,7 +82,7 @@ pandocToRST (Pandoc meta blocks) = do let main = vsep [body, notes, refs, pics] let context = defField "body" main $ defField "toc" (writerTableOfContents opts) - $ defField "toc-depth" (T.pack $ show $ writerTOCDepth opts) + $ defField "toc-depth" (tshow $ writerTOCDepth opts) $ defField "number-sections" (writerNumberSections opts) $ defField "math" hasMath $ defField "titleblock" (render Nothing title :: Text) @@ -105,13 +106,13 @@ refsToRST :: PandocMonad m => Refs -> RST m (Doc Text) refsToRST refs = mapM keyToRST refs >>= return . vcat -- | Return RST representation of a reference key. -keyToRST :: PandocMonad m => ([Inline], (String, String)) -> RST m (Doc Text) +keyToRST :: PandocMonad m => ([Inline], (Text, Text)) -> RST m (Doc Text) keyToRST (label, (src, _)) = do label' <- inlineListToRST label let label'' = if (==':') `T.any` (render Nothing label' :: Text) then char '`' <> label' <> char '`' else label' - return $ nowrap $ ".. _" <> label'' <> ": " <> text src + return $ nowrap $ ".. _" <> label'' <> ": " <> literal src -- | Return RST representation of notes. notesToRST :: PandocMonad m => [[Block]] -> RST m (Doc Text) @@ -128,13 +129,13 @@ noteToRST num note = do -- | Return RST representation of picture reference table. pictRefsToRST :: PandocMonad m - => [([Inline], (Attr, String, String, Maybe String))] + => [([Inline], (Attr, Text, Text, Maybe Text))] -> RST m (Doc Text) pictRefsToRST refs = mapM pictToRST refs >>= return . vcat -- | Return RST representation of a picture substitution reference. pictToRST :: PandocMonad m - => ([Inline], (Attr, String, String, Maybe String)) + => ([Inline], (Attr, Text, Text, Maybe Text)) -> RST m (Doc Text) pictToRST (label, (attr, src, _, mbtarget)) = do label' <- inlineListToRST label @@ -145,32 +146,32 @@ pictToRST (label, (attr, src, _, mbtarget)) = do ["align-right"] -> ":align: right" ["align-left"] -> ":align: left" ["align-center"] -> ":align: center" - _ -> ":class: " <> text (unwords cls) + _ -> ":class: " <> literal (T.unwords cls) return $ nowrap - $ ".. |" <> label' <> "| image:: " <> text src $$ hang 3 empty (classes $$ dims) + $ ".. |" <> label' <> "| image:: " <> literal src $$ hang 3 empty (classes $$ dims) $$ case mbtarget of Nothing -> empty - Just t -> " :target: " <> text t + Just t -> " :target: " <> literal t -- | Escape special characters for RST. -escapeString :: WriterOptions -> String -> String -escapeString = escapeString' True +escapeText :: WriterOptions -> Text -> Text +escapeText o = T.pack . escapeString' True o . T.unpack -- This ought to be parser where escapeString' _ _ [] = [] escapeString' firstChar opts (c:cs) = case c of - _ | c `elem` ['\\','`','*','_','|'] && - (firstChar || null cs) -> '\\':c:escapeString' False opts cs + _ | c `elemText` "\\`*_|" && + (firstChar || null cs) -> '\\':c:escapeString' False opts cs '\'' | isEnabled Ext_smart opts -> '\\':'\'':escapeString' False opts cs - '"' | isEnabled Ext_smart opts -> '\\':'"':escapeString' False opts cs - '-' | isEnabled Ext_smart opts -> - case cs of - '-':_ -> '\\':'-':escapeString' False opts cs - _ -> '-':escapeString' False opts cs - '.' | isEnabled Ext_smart opts -> - case cs of - '.':'.':rest -> '\\':'.':'.':'.':escapeString' False opts rest - _ -> '.':escapeString' False opts cs + '"' | isEnabled Ext_smart opts -> '\\':'"':escapeString' False opts cs + '-' | isEnabled Ext_smart opts -> + case cs of + '-':_ -> '\\':'-':escapeString' False opts cs + _ -> '-':escapeString' False opts cs + '.' | isEnabled Ext_smart opts -> + case cs of + '.':'.':rest -> '\\':'.':'.':'.':escapeString' False opts rest + _ -> '.':escapeString' False opts cs _ -> c : escapeString' False opts cs titleToRST :: PandocMonad m => [Inline] -> [Inline] -> RST m (Doc Text) @@ -186,7 +187,7 @@ bordered contents c = then border $$ contents $$ border else empty where len = offset contents - border = text (replicate len c) + border = literal (T.replicate len $ T.singleton c) -- | Convert Pandoc block element to RST. blockToRST :: PandocMonad m @@ -203,30 +204,30 @@ blockToRST (Div (ident,classes,_kvs) bs) = do let admonition = case classes of (cl:_) | cl `elem` admonitions - -> ".. " <> text cl <> "::" + -> ".. " <> literal cl <> "::" cls -> ".. container::" <> space <> - text (unwords (filter (/= "container") cls)) + literal (T.unwords (filter (/= "container") cls)) return $ blankline $$ admonition $$ - (if null ident + (if T.null ident then blankline - else " :name: " <> text ident $$ blankline) $$ + else " :name: " <> literal ident $$ blankline) $$ nest 3 contents $$ blankline blockToRST (Plain inlines) = inlineListToRST inlines -- title beginning with fig: indicates that the image is a figure -blockToRST (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do +blockToRST (Para [Image attr txt (src,T.stripPrefix "fig:" -> Just tit)]) = do capt <- inlineListToRST txt dims <- imageDimsToRST attr - let fig = "figure:: " <> text src - alt = ":alt: " <> if null tit then capt else text tit + let fig = "figure:: " <> literal src + alt = ":alt: " <> if T.null tit then capt else literal tit (_,cls,_) = attr classes = case cls of [] -> empty ["align-right"] -> ":align: right" ["align-left"] -> ":align: left" ["align-center"] -> ":align: center" - _ -> ":figclass: " <> text (unwords cls) + _ -> ":figclass: " <> literal (T.unwords cls) return $ hang 3 ".. " (fig $$ alt $$ classes $$ dims $+$ capt) $$ blankline blockToRST (Para inlines) | LineBreak `elem` inlines = @@ -237,11 +238,11 @@ blockToRST (Para inlines) blockToRST (LineBlock lns) = linesToLineBlock lns blockToRST (RawBlock f@(Format f') str) - | f == "rst" = return $ text str + | f == "rst" = return $ literal str | f == "tex" = blockToRST (RawBlock (Format "latex") str) | otherwise = return $ blankline <> ".. raw:: " <> - text (map toLower f') $+$ - nest 3 (text str) $$ blankline + literal (T.toLower f') $+$ + nest 3 (literal str) $$ blankline blockToRST HorizontalRule = return $ blankline $$ "--------------" $$ blankline blockToRST (Header level (name,classes,_) inlines) = do @@ -254,33 +255,33 @@ blockToRST (Header level (name,classes,_) inlines) = do if isTopLevel then do let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1) - let border = text $ replicate (offset contents) headerChar - let anchor | null name || name == autoId = empty - | otherwise = ".. _" <> text name <> ":" $$ blankline + let border = literal $ T.replicate (offset contents) $ T.singleton headerChar + let anchor | T.null name || name == autoId = empty + | otherwise = ".. _" <> literal name <> ":" $$ blankline return $ nowrap $ anchor $$ contents $$ border $$ blankline else do let rub = "rubric:: " <> contents - let name' | null name = empty - | otherwise = ":name: " <> text name - let cls | null classes = empty - | otherwise = ":class: " <> text (unwords classes) + let name' | T.null name = empty + | otherwise = ":name: " <> literal name + let cls | null classes = empty + | otherwise = ":class: " <> literal (T.unwords classes) return $ nowrap $ hang 3 ".. " (rub $$ name' $$ cls) $$ blankline blockToRST (CodeBlock (_,classes,kvs) str) = do opts <- gets stOptions - let startnum = maybe "" (\x -> " " <> text x) $ lookup "startFrom" kvs + let startnum = maybe "" (\x -> " " <> literal x) $ lookup "startFrom" kvs let numberlines = if "numberLines" `elem` classes then " :number-lines:" <> startnum else empty if "haskell" `elem` classes && "literate" `elem` classes && isEnabled Ext_literate_haskell opts - then return $ prefixed "> " (text str) $$ blankline + then return $ prefixed "> " (literal str) $$ blankline else return $ (case [c | c <- classes, c `notElem` ["sourceCode","literate","numberLines", "number-lines","example"]] of [] -> "::" - (lang:_) -> (".. code:: " <> text lang) $$ numberlines) - $+$ nest 3 (text str) $$ blankline + (lang:_) -> (".. code:: " <> literal lang) $$ numberlines) + $+$ nest 3 (literal str) $$ blankline blockToRST (BlockQuote blocks) = do contents <- blockListToRST blocks return $ nest 3 contents <> blankline @@ -314,9 +315,9 @@ blockToRST (OrderedList (start, style', delim) items) = do then replicate (length items) "#." else take (length items) $ orderedListMarkers (start, style', delim) - let maxMarkerLength = maximum $ map length markers - let markers' = map (\m -> let s = maxMarkerLength - length m - in m ++ replicate s ' ') markers + let maxMarkerLength = maximum $ map T.length markers + let markers' = map (\m -> let s = maxMarkerLength - T.length m + in m <> T.replicate s " ") markers contents <- zipWithM orderedListItemToRST markers' items -- ensure that sublists have preceding blank line return $ blankline $$ @@ -338,13 +339,13 @@ bulletListItemToRST items = do -- | Convert ordered list item (a list of blocks) to RST. orderedListItemToRST :: PandocMonad m - => String -- ^ marker for list item + => Text -- ^ marker for list item -> [Block] -- ^ list item (list of blocks) -> RST m (Doc Text) orderedListItemToRST marker items = do contents <- blockListToRST items - let marker' = marker ++ " " - return $ hang (length marker') (text marker') contents $$ + let marker' = marker <> " " + return $ hang (T.length marker') (literal marker') contents $$ if endsWithPlain items then cr else blankline @@ -364,7 +365,7 @@ linesToLineBlock :: PandocMonad m => [[Inline]] -> RST m (Doc Text) linesToLineBlock inlineLines = do lns <- mapM inlineListToRST inlineLines return $ - vcat (map (hang 2 (text "| ")) lns) <> blankline + vcat (map (hang 2 (literal "| ")) lns) <> blankline -- | Convert list of Pandoc block elements to RST. blockListToRST' :: PandocMonad m @@ -376,13 +377,13 @@ blockListToRST' topLevel blocks = do let fixBlocks (b1:b2@(BlockQuote _):bs) | toClose b1 = b1 : commentSep : b2 : fixBlocks bs where - toClose Plain{} = False - toClose Header{} = False - toClose LineBlock{} = False - toClose HorizontalRule = False - toClose (Para [Image _ _ (_,'f':'i':'g':':':_)]) = True - toClose Para{} = False - toClose _ = True + toClose Plain{} = False + toClose Header{} = False + toClose LineBlock{} = False + toClose HorizontalRule = False + toClose (Para [Image _ _ (_,t)]) = "fig:" `T.isPrefixOf` t + toClose Para{} = False + toClose _ = True commentSep = RawBlock "rst" "..\n\n" fixBlocks (b:bs) = b : fixBlocks bs fixBlocks [] = [] @@ -438,26 +439,30 @@ transformInlines = insertBS . transformNested :: [Inline] -> [Inline] transformNested = map (mapNested stripLeadingTrailingSpace) surroundComplex :: Inline -> Inline -> Bool - surroundComplex (Str s@(_:_)) (Str s'@(_:_)) = - case (last s, head s') of - ('\'','\'') -> True - ('"','"') -> True - ('<','>') -> True - ('[',']') -> True - ('{','}') -> True - _ -> False + surroundComplex (Str s) (Str s') + | Just (_, c) <- T.unsnoc s + , Just (c', _) <- T.uncons s' + = case (c, c') of + ('\'','\'') -> True + ('"','"') -> True + ('<','>') -> True + ('[',']') -> True + ('{','}') -> True + _ -> False surroundComplex _ _ = False okAfterComplex :: Inline -> Bool okAfterComplex Space = True okAfterComplex SoftBreak = True okAfterComplex LineBreak = True - okAfterComplex (Str (c:_)) = isSpace c || c `elem` ("-.,:;!?\\/'\")]}>–—" :: String) + okAfterComplex (Str (T.uncons -> Just (c,_))) + = isSpace c || c `elemText` "-.,:;!?\\/'\")]}>–—" okAfterComplex _ = False okBeforeComplex :: Inline -> Bool okBeforeComplex Space = True okBeforeComplex SoftBreak = True okBeforeComplex LineBreak = True - okBeforeComplex (Str (c:_)) = isSpace c || c `elem` ("-:/'\"<([{–—" :: String) + okBeforeComplex (Str (T.uncons -> Just (c,_))) + = isSpace c || c `elemText` "-:/'\"<([{–—" okBeforeComplex _ = False isComplex :: Inline -> Bool isComplex (Emph _) = True @@ -563,7 +568,7 @@ inlineToRST (Span (_,_,kvs) ils) = do contents <- writeInlines ils return $ case lookup "role" kvs of - Just role -> ":" <> text role <> ":`" <> contents <> "`" + Just role -> ":" <> literal role <> ":`" <> contents <> "`" Nothing -> contents inlineToRST (Emph lst) = do contents <- writeInlines lst @@ -596,7 +601,7 @@ inlineToRST (Quoted DoubleQuote lst) = do inlineToRST (Cite _ lst) = writeInlines lst inlineToRST (Code (_,["interpreted-text"],[("role",role)]) str) = do - return $ ":" <> text role <> ":`" <> text str <> "`" + return $ ":" <> literal role <> ":`" <> literal str <> "`" inlineToRST (Code _ str) = do opts <- gets stOptions -- we trim the string because the delimiters must adjoin a @@ -604,28 +609,28 @@ inlineToRST (Code _ str) = do -- we use :literal: when the code contains backticks, since -- :literal: allows backslash-escapes; see #3974 return $ - if '`' `elem` str - then ":literal:`" <> text (escapeString opts (trim str)) <> "`" - else "``" <> text (trim str) <> "``" + if '`' `elemText` str + then ":literal:`" <> literal (escapeText opts (trim str)) <> "`" + else "``" <> literal (trim str) <> "``" inlineToRST (Str str) = do opts <- gets stOptions - return $ text $ + return $ literal $ (if isEnabled Ext_smart opts then unsmartify opts - else id) $ escapeString opts str + else id) $ escapeText opts str inlineToRST (Math t str) = do modify $ \st -> st{ stHasMath = True } return $ if t == InlineMath - then ":math:`" <> text str <> "`" - else if '\n' `elem` str + then ":math:`" <> literal str <> "`" + else if '\n' `elemText` str then blankline $$ ".. math::" $$ - blankline $$ nest 3 (text str) $$ blankline - else blankline $$ (".. math:: " <> text str) $$ blankline + blankline $$ nest 3 (literal str) $$ blankline + else blankline $$ (".. math:: " <> literal str) $$ blankline inlineToRST il@(RawInline f x) - | f == "rst" = return $ text x + | f == "rst" = return $ literal x | f == "latex" || f == "tex" = do modify $ \st -> st{ stHasRawTeX = True } - return $ ":raw-latex:`" <> text x <> "`" + return $ ":raw-latex:`" <> literal x <> "`" | otherwise = empty <$ report (InlineNotRendered il) inlineToRST LineBreak = return cr -- there's no line break in RST (see Para) inlineToRST Space = return space @@ -638,11 +643,11 @@ inlineToRST SoftBreak = do -- autolink inlineToRST (Link _ [Str str] (src, _)) | isURI src && - if "mailto:" `isPrefixOf` src - then src == escapeURI ("mailto:" ++ str) + if "mailto:" `T.isPrefixOf` src + then src == escapeURI ("mailto:" <> str) else src == escapeURI str = do - let srcSuffix = fromMaybe src (stripPrefix "mailto:" src) - return $ text srcSuffix + let srcSuffix = fromMaybe src (T.stripPrefix "mailto:" src) + return $ literal srcSuffix inlineToRST (Link _ [Image attr alt (imgsrc,imgtit)] (src, _tit)) = do label <- registerImage attr alt (imgsrc,imgtit) (Just src) return $ "|" <> label <> "|" @@ -656,11 +661,11 @@ inlineToRST (Link _ txt (src, tit)) = do if src == src' && tit == tit' then return $ "`" <> linktext <> "`_" else - return $ "`" <> linktext <> " <" <> text src <> ">`__" + return $ "`" <> linktext <> " <" <> literal src <> ">`__" Nothing -> do modify $ \st -> st { stLinks = (txt,(src,tit)):refs } return $ "`" <> linktext <> "`_" - else return $ "`" <> linktext <> " <" <> text src <> ">`__" + else return $ "`" <> linktext <> " <" <> literal src <> ">`__" inlineToRST (Image attr alternate (source, tit)) = do label <- registerImage attr alternate (source,tit) Nothing return $ "|" <> label <> "|" @@ -671,7 +676,7 @@ inlineToRST (Note contents) = do let ref = show $ length notes + 1 return $ " [" <> text ref <> "]_" -registerImage :: PandocMonad m => Attr -> [Inline] -> Target -> Maybe String -> RST m (Doc Text) +registerImage :: PandocMonad m => Attr -> [Inline] -> Target -> Maybe Text -> RST m (Doc Text) registerImage attr alt (src,tit) mbtarget = do pics <- gets stImages txt <- case lookup alt pics of @@ -679,7 +684,7 @@ registerImage attr alt (src,tit) mbtarget = do -> return alt _ -> do let alt' = if null alt || alt == [Str ""] - then [Str $ "image" ++ show (length pics)] + then [Str $ "image" <> tshow (length pics)] else alt modify $ \st -> st { stImages = (alt', (attr,src,tit, mbtarget)):stImages st } @@ -689,9 +694,9 @@ registerImage attr alt (src,tit) mbtarget = do imageDimsToRST :: PandocMonad m => Attr -> RST m (Doc Text) imageDimsToRST attr = do let (ident, _, _) = attr - name = if null ident + name = if T.null ident then empty - else ":name: " <> text ident + else ":name: " <> literal ident showDim dir = let cols d = ":" <> text (show dir) <> ": " <> text (show d) in case dimension dir attr of Just (Percent a) -> @@ -711,7 +716,7 @@ simpleTable :: PandocMonad m simpleTable opts blocksToDoc headers rows = do -- can't have empty cells in first column: let fixEmpties (d:ds) = if isEmpty d - then text "\\ " : ds + then literal "\\ " : ds else d : ds fixEmpties [] = [] headerDocs <- if all null headers @@ -722,7 +727,7 @@ simpleTable opts blocksToDoc headers rows = do numChars xs = maximum . map offset $ xs let colWidths = map numChars $ transpose (headerDocs : rowDocs) let toRow = mconcat . intersperse (lblock 1 " ") . zipWith lblock colWidths - let hline = nowrap $ hsep (map (\n -> text (replicate n '=')) colWidths) + let hline = nowrap $ hsep (map (\n -> literal (T.replicate n "=")) colWidths) let hdr = if all null headers then mempty else hline $$ toRow headerDocs diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 366b4cdcd..08f0df0f8 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -1,5 +1,6 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.RTF Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -18,7 +19,6 @@ import Control.Monad.Except (catchError, throwError) import Control.Monad import qualified Data.ByteString as B import Data.Char (chr, isDigit, ord) -import Data.List (intercalate, isSuffixOf) import qualified Data.Map as M import Data.Text (Text) import qualified Data.Text as T @@ -46,28 +46,28 @@ rtfEmbedImage opts x@(Image attr _ (src,_)) = catchError case result of (imgdata, Just mime) | mime == "image/jpeg" || mime == "image/png" -> do - let bytes = map (printf "%02x") $ B.unpack imgdata + let bytes = map (T.pack . printf "%02x") $ B.unpack imgdata filetype <- case mime of "image/jpeg" -> return "\\jpegblip" "image/png" -> return "\\pngblip" _ -> throwError $ PandocShouldNeverHappenError $ - "Unknown file type " ++ mime + "Unknown file type " <> mime sizeSpec <- case imageSize opts imgdata of Left msg -> do report $ CouldNotDetermineImageSize src msg return "" - Right sz -> return $ "\\picw" ++ show xpx ++ - "\\pich" ++ show ypx ++ - "\\picwgoal" ++ show (floor (xpt * 20) :: Integer) - ++ "\\pichgoal" ++ show (floor (ypt * 20) :: Integer) + Right sz -> return $ "\\picw" <> tshow xpx <> + "\\pich" <> tshow ypx <> + "\\picwgoal" <> tshow (floor (xpt * 20) :: Integer) + <> "\\pichgoal" <> tshow (floor (ypt * 20) :: Integer) -- twip = 1/1440in = 1/20pt where (xpx, ypx) = sizeInPixels sz (xpt, ypt) = desiredSizeInPoints opts attr sz - let raw = "{\\pict" ++ filetype ++ sizeSpec ++ "\\bin " ++ - concat bytes ++ "}" + let raw = "{\\pict" <> filetype <> sizeSpec <> "\\bin " <> + T.concat bytes <> "}" if B.null imgdata then do report $ CouldNotFetchResource src "image contained no data" @@ -80,7 +80,7 @@ rtfEmbedImage opts x@(Image attr _ (src,_)) = catchError report $ CouldNotDetermineMimeType src return x) (\e -> do - report $ CouldNotFetchResource src (show e) + report $ CouldNotFetchResource src $ tshow e return x) rtfEmbedImage _ x = return x @@ -98,12 +98,12 @@ writeRTF options doc = do . M.adjust toPlain "date" $ metamap metadata <- metaToContext options - (fmap (literal . T.pack . concat) . + (fmap (literal . T.concat) . mapM (blockToRTF 0 AlignDefault)) - (fmap (literal . T.pack) . inlinesToRTF) + (fmap literal . inlinesToRTF) meta' - body <- T.pack <$> blocksToRTF 0 AlignDefault blocks - toc <- T.pack <$> blocksToRTF 0 AlignDefault + body <- blocksToRTF 0 AlignDefault blocks + toc <- blocksToRTF 0 AlignDefault [toTableOfContents options $ filter isHeaderBlock blocks] let context = defField "body" body $ defField "spacer" spacer @@ -122,25 +122,24 @@ writeRTF options doc = do _ -> body <> T.singleton '\n' -- | Convert unicode characters (> 127) into rich text format representation. -handleUnicode :: String -> String -handleUnicode [] = [] -handleUnicode (c:cs) = +handleUnicode :: Text -> Text +handleUnicode = T.concatMap $ \c -> if ord c > 127 then if surrogate c then let x = ord c - 0x10000 (q, r) = x `divMod` 0x400 upper = q + 0xd800 lower = r + 0xDC00 - in enc (chr upper) ++ enc (chr lower) ++ handleUnicode cs - else enc c ++ handleUnicode cs - else c:handleUnicode cs + in enc (chr upper) <> enc (chr lower) + else enc c + else T.singleton c where surrogate x = not ( (0x0000 <= ord x && ord x <= 0xd7ff) || (0xe000 <= ord x && ord x <= 0xffff) ) - enc x = '\\':'u':show (ord x) ++ "?" + enc x = "\\u" <> tshow (ord x) <> "?" -- | Escape special characters. -escapeSpecial :: String -> String +escapeSpecial :: Text -> Text escapeSpecial = escapeStringUsing $ [ ('\t',"\\tab ") , ('\8216',"\\u8216'") @@ -149,47 +148,47 @@ escapeSpecial = escapeStringUsing $ , ('\8221',"\\u8221\"") , ('\8211',"\\u8211-") , ('\8212',"\\u8212-") - ] ++ backslashEscapes "{\\}" + ] <> backslashEscapes "{\\}" -- | Escape strings as needed for rich text format. -stringToRTF :: String -> String +stringToRTF :: Text -> Text stringToRTF = handleUnicode . escapeSpecial -- | Escape things as needed for code block in RTF. -codeStringToRTF :: String -> String -codeStringToRTF str = intercalate "\\line\n" $ lines (stringToRTF str) +codeStringToRTF :: Text -> Text +codeStringToRTF str = T.intercalate "\\line\n" $ T.lines (stringToRTF str) -- | Make a paragraph with first-line indent, block indent, and space after. rtfParSpaced :: Int -- ^ space after (in twips) -> Int -- ^ block indent (in twips) -> Int -- ^ first line indent (relative to block) (in twips) -> Alignment -- ^ alignment - -> String -- ^ string with content - -> String + -> Text -- ^ string with content + -> Text rtfParSpaced spaceAfter indent firstLineIndent alignment content = let alignString = case alignment of AlignLeft -> "\\ql " AlignRight -> "\\qr " AlignCenter -> "\\qc " AlignDefault -> "\\ql " - in "{\\pard " ++ alignString ++ - "\\f0 \\sa" ++ show spaceAfter ++ " \\li" ++ show indent ++ - " \\fi" ++ show firstLineIndent ++ " " ++ content ++ "\\par}\n" + in "{\\pard " <> alignString <> + "\\f0 \\sa" <> tshow spaceAfter <> " \\li" <> T.pack (show indent) <> + " \\fi" <> tshow firstLineIndent <> " " <> content <> "\\par}\n" -- | Default paragraph. rtfPar :: Int -- ^ block indent (in twips) -> Int -- ^ first line indent (relative to block) (in twips) -> Alignment -- ^ alignment - -> String -- ^ string with content - -> String + -> Text -- ^ string with content + -> Text rtfPar = rtfParSpaced 180 -- | Compact paragraph (e.g. for compact list items). rtfCompact :: Int -- ^ block indent (in twips) -> Int -- ^ first line indent (relative to block) (in twips) -> Alignment -- ^ alignment - -> String -- ^ string with content - -> String + -> Text -- ^ string with content + -> Text rtfCompact = rtfParSpaced 0 -- number of twips to indent @@ -200,13 +199,13 @@ listIncrement :: Int listIncrement = 360 -- | Returns appropriate bullet list marker for indent level. -bulletMarker :: Int -> String +bulletMarker :: Int -> Text bulletMarker indent = case indent `mod` 720 of 0 -> "\\bullet " _ -> "\\endash " -- | Returns appropriate (list of) ordered list markers for indent level. -orderedMarkers :: Int -> ListAttributes -> [String] +orderedMarkers :: Int -> ListAttributes -> [Text] orderedMarkers indent (start, style, delim) = if style == DefaultStyle && delim == DefaultDelim then case indent `mod` 720 of @@ -218,15 +217,15 @@ blocksToRTF :: PandocMonad m => Int -> Alignment -> [Block] - -> m String -blocksToRTF indent align = fmap concat . mapM (blockToRTF indent align) + -> m Text +blocksToRTF indent align = fmap T.concat . mapM (blockToRTF indent align) -- | Convert Pandoc block element to RTF. blockToRTF :: PandocMonad m => Int -- ^ indent level -> Alignment -- ^ alignment -> Block -- ^ block to convert - -> m String + -> m Text blockToRTF _ _ Null = return "" blockToRTF indent alignment (Div _ bs) = blocksToRTF indent alignment bs @@ -239,139 +238,143 @@ blockToRTF indent alignment (LineBlock lns) = blockToRTF indent alignment (BlockQuote lst) = blocksToRTF (indent + indentIncrement) alignment lst blockToRTF indent _ (CodeBlock _ str) = - return $ rtfPar indent 0 AlignLeft ("\\f1 " ++ codeStringToRTF str) + return $ rtfPar indent 0 AlignLeft ("\\f1 " <> codeStringToRTF str) blockToRTF _ _ b@(RawBlock f str) | f == Format "rtf" = return str | otherwise = do report $ BlockNotRendered b return "" -blockToRTF indent alignment (BulletList lst) = (spaceAtEnd . concat) <$> +blockToRTF indent alignment (BulletList lst) = (spaceAtEnd . T.concat) <$> mapM (listItemToRTF alignment indent (bulletMarker indent)) lst blockToRTF indent alignment (OrderedList attribs lst) = - (spaceAtEnd . concat) <$> + (spaceAtEnd . T.concat) <$> zipWithM (listItemToRTF alignment indent) (orderedMarkers indent attribs) lst -blockToRTF indent alignment (DefinitionList lst) = (spaceAtEnd . concat) <$> +blockToRTF indent alignment (DefinitionList lst) = (spaceAtEnd . T.concat) <$> mapM (definitionListItemToRTF alignment indent) lst blockToRTF indent _ HorizontalRule = return $ rtfPar indent 0 AlignCenter "\\emdash\\emdash\\emdash\\emdash\\emdash" blockToRTF indent alignment (Header level _ lst) = do contents <- inlinesToRTF lst return $ rtfPar indent 0 alignment $ - "\\b \\fs" ++ show (40 - (level * 4)) ++ " " ++ contents + "\\b \\fs" <> tshow (40 - (level * 4)) <> " " <> contents blockToRTF indent alignment (Table caption aligns sizes headers rows) = do caption' <- inlinesToRTF caption header' <- if all null headers then return "" else tableRowToRTF True indent aligns sizes headers - rows' <- concat <$> mapM (tableRowToRTF False indent aligns sizes) rows - return $ header' ++ rows' ++ rtfPar indent 0 alignment caption' + rows' <- T.concat <$> mapM (tableRowToRTF False indent aligns sizes) rows + return $ header' <> rows' <> rtfPar indent 0 alignment caption' tableRowToRTF :: PandocMonad m - => Bool -> Int -> [Alignment] -> [Double] -> [[Block]] -> m String + => Bool -> Int -> [Alignment] -> [Double] -> [[Block]] -> m Text tableRowToRTF header indent aligns sizes' cols = do let totalTwips = 6 * 1440 -- 6 inches let sizes = if all (== 0) sizes' then replicate (length cols) (1.0 / fromIntegral (length cols)) else sizes' - columns <- concat <$> + columns <- T.concat <$> zipWithM (tableItemToRTF indent) aligns cols let rightEdges = tail $ scanl (\sofar new -> sofar + floor (new * totalTwips)) (0 :: Integer) sizes let cellDefs = map (\edge -> (if header then "\\clbrdrb\\brdrs" - else "") ++ "\\cellx" ++ show edge) + else "") <> "\\cellx" <> tshow edge) rightEdges - let start = "{\n\\trowd \\trgaph120\n" ++ concat cellDefs ++ "\n" ++ + let start = "{\n\\trowd \\trgaph120\n" <> T.concat cellDefs <> "\n" <> "\\trkeep\\intbl\n{\n" let end = "}\n\\intbl\\row}\n" - return $ start ++ columns ++ end + return $ start <> columns <> end -tableItemToRTF :: PandocMonad m => Int -> Alignment -> [Block] -> m String +tableItemToRTF :: PandocMonad m => Int -> Alignment -> [Block] -> m Text tableItemToRTF indent alignment item = do contents <- blocksToRTF indent alignment item - return $ "{" ++ substitute "\\pard" "\\pard\\intbl" contents ++ "\\cell}\n" + return $ "{" <> T.replace "\\pard" "\\pard\\intbl" contents <> "\\cell}\n" -- | Ensure that there's the same amount of space after compact -- lists as after regular lists. -spaceAtEnd :: String -> String -spaceAtEnd str = - if "\\par}\n" `isSuffixOf` str - then take (length str - 6) str ++ "\\sa180\\par}\n" - else str +spaceAtEnd :: Text -> Text +spaceAtEnd str = maybe str (<> "\\sa180\\par}\n") $ T.stripSuffix "\\par}\n" str -- | Convert list item (list of blocks) to RTF. listItemToRTF :: PandocMonad m => Alignment -- ^ alignment -> Int -- ^ indent level - -> String -- ^ list start marker + -> Text -- ^ list start marker -> [Block] -- ^ list item (list of blocks) - -> m String + -> m Text listItemToRTF alignment indent marker [] = return $ rtfCompact (indent + listIncrement) (negate listIncrement) alignment - (marker ++ "\\tx" ++ show listIncrement ++ "\\tab ") + (marker <> "\\tx" <> tshow listIncrement <> "\\tab ") listItemToRTF alignment indent marker (listFirst:listRest) = do let f = blockToRTF (indent + listIncrement) alignment first <- f listFirst rest <- mapM f listRest - let listMarker = "\\fi" ++ show (negate listIncrement) ++ " " ++ marker ++ - "\\tx" ++ show listIncrement ++ "\\tab" - let insertListMarker ('\\':'f':'i':'-':d:xs) | isDigit d = - listMarker ++ dropWhile isDigit xs - insertListMarker ('\\':'f':'i':d:xs) | isDigit d = - listMarker ++ dropWhile isDigit xs - insertListMarker (x:xs) = - x : insertListMarker xs - insertListMarker [] = [] + let listMarker = "\\fi" <> tshow (negate listIncrement) <> " " <> marker <> + "\\tx" <> tshow listIncrement <> "\\tab" + -- Find the first occurrence of \\fi or \\fi-, then replace it and the following + -- digits with the list marker. + let insertListMarker t = case popDigit $ optionDash $ T.drop 3 suff of + Just suff' -> pref <> listMarker <> T.dropWhile isDigit suff' + Nothing -> t + where + (pref, suff) = T.breakOn "\\fi" t + optionDash x = case T.uncons x of + Just ('-', xs) -> xs + _ -> x + popDigit x + | Just (d, xs) <- T.uncons x + , isDigit d = Just xs + | otherwise = Nothing -- insert the list marker into the (processed) first block - return $ insertListMarker first ++ concat rest + return $ insertListMarker first <> T.concat rest -- | Convert definition list item (label, list of blocks) to RTF. definitionListItemToRTF :: PandocMonad m => Alignment -- ^ alignment -> Int -- ^ indent level -> ([Inline],[[Block]]) -- ^ list item (list of blocks) - -> m String + -> m Text definitionListItemToRTF alignment indent (label, defs) = do labelText <- blockToRTF indent alignment (Plain label) itemsText <- blocksToRTF (indent + listIncrement) alignment (concat defs) - return $ labelText ++ itemsText + return $ labelText <> itemsText -- | Convert list of inline items to RTF. inlinesToRTF :: PandocMonad m => [Inline] -- ^ list of inlines to convert - -> m String -inlinesToRTF lst = concat <$> mapM inlineToRTF lst + -> m Text +inlinesToRTF lst = T.concat <$> mapM inlineToRTF lst -- | Convert inline item to RTF. inlineToRTF :: PandocMonad m => Inline -- ^ inline to convert - -> m String + -> m Text inlineToRTF (Span _ lst) = inlinesToRTF lst inlineToRTF (Emph lst) = do contents <- inlinesToRTF lst - return $ "{\\i " ++ contents ++ "}" + return $ "{\\i " <> contents <> "}" inlineToRTF (Strong lst) = do contents <- inlinesToRTF lst - return $ "{\\b " ++ contents ++ "}" + return $ "{\\b " <> contents <> "}" inlineToRTF (Strikeout lst) = do contents <- inlinesToRTF lst - return $ "{\\strike " ++ contents ++ "}" + return $ "{\\strike " <> contents <> "}" inlineToRTF (Superscript lst) = do contents <- inlinesToRTF lst - return $ "{\\super " ++ contents ++ "}" + return $ "{\\super " <> contents <> "}" inlineToRTF (Subscript lst) = do contents <- inlinesToRTF lst - return $ "{\\sub " ++ contents ++ "}" + return $ "{\\sub " <> contents <> "}" inlineToRTF (SmallCaps lst) = do contents <- inlinesToRTF lst - return $ "{\\scaps " ++ contents ++ "}" + return $ "{\\scaps " <> contents <> "}" inlineToRTF (Quoted SingleQuote lst) = do contents <- inlinesToRTF lst - return $ "\\u8216'" ++ contents ++ "\\u8217'" + return $ "\\u8216'" <> contents <> "\\u8217'" inlineToRTF (Quoted DoubleQuote lst) = do contents <- inlinesToRTF lst - return $ "\\u8220\"" ++ contents ++ "\\u8221\"" -inlineToRTF (Code _ str) = return $ "{\\f1 " ++ codeStringToRTF str ++ "}" + return $ "\\u8220\"" <> contents <> "\\u8221\"" +inlineToRTF (Code _ str) = return $ "{\\f1 " <> codeStringToRTF str <> "}" inlineToRTF (Str str) = return $ stringToRTF str inlineToRTF (Math t str) = texMathToInlines t str >>= inlinesToRTF inlineToRTF (Cite _ lst) = inlinesToRTF lst @@ -385,11 +388,11 @@ inlineToRTF SoftBreak = return " " inlineToRTF Space = return " " inlineToRTF (Link _ text (src, _)) = do contents <- inlinesToRTF text - return $ "{\\field{\\*\\fldinst{HYPERLINK \"" ++ codeStringToRTF src ++ - "\"}}{\\fldrslt{\\ul\n" ++ contents ++ "\n}}}\n" + return $ "{\\field{\\*\\fldinst{HYPERLINK \"" <> codeStringToRTF src <> + "\"}}{\\fldrslt{\\ul\n" <> contents <> "\n}}}\n" inlineToRTF (Image _ _ (source, _)) = - return $ "{\\cf1 [image: " ++ source ++ "]\\cf0}" + return $ "{\\cf1 [image: " <> source <> "]\\cf0}" inlineToRTF (Note contents) = do - body <- concat <$> mapM (blockToRTF 0 AlignDefault) contents - return $ "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++ - body ++ "}" + body <- T.concat <$> mapM (blockToRTF 0 AlignDefault) contents + return $ "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " <> + body <> "}" diff --git a/src/Text/Pandoc/Writers/Roff.hs b/src/Text/Pandoc/Writers/Roff.hs index 4dadb1073..2718b3f13 100644 --- a/src/Text/Pandoc/Writers/Roff.hs +++ b/src/Text/Pandoc/Writers/Roff.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.Roff Copyright : Copyright (C) 2007-2019 John MacFarlane @@ -24,6 +25,8 @@ import Prelude import Data.Char (ord, isAscii) import Control.Monad.State.Strict import qualified Data.Map as Map +import Data.Text (Text) +import qualified Data.Text as Text import Data.String import Data.Maybe (fromMaybe, isJust, catMaybes) import Text.Pandoc.Class (PandocMonad) @@ -66,36 +69,38 @@ data EscapeMode = AllowUTF8 -- ^ use preferred man escapes | AsciiOnly -- ^ escape everything deriving Show -combiningAccentsMap :: Map.Map Char String +combiningAccentsMap :: Map.Map Char Text combiningAccentsMap = Map.fromList combiningAccents -essentialEscapes :: Map.Map Char String +essentialEscapes :: Map.Map Char Text essentialEscapes = Map.fromList standardEscapes -- | Escape special characters for roff. -escapeString :: EscapeMode -> String -> String -escapeString _ [] = [] -escapeString escapeMode ('\n':'.':xs) = - '\n':'\\':'&':'.':escapeString escapeMode xs -escapeString escapeMode (x:xs) = - case Map.lookup x essentialEscapes of - Just s -> s ++ escapeString escapeMode xs - Nothing - | isAscii x -> x : escapeString escapeMode xs - | otherwise -> - case escapeMode of - AllowUTF8 -> x : escapeString escapeMode xs - AsciiOnly -> - let accents = catMaybes $ takeWhile isJust - (map (\c -> Map.lookup c combiningAccentsMap) xs) - rest = drop (length accents) xs - s = case Map.lookup x characterCodeMap of - Just t -> "\\[" <> unwords (t:accents) <> "]" - Nothing -> "\\[" <> unwords - (printf "u%04X" (ord x) : accents) <> "]" - in s ++ escapeString escapeMode rest +escapeString :: EscapeMode -> Text -> Text +escapeString e = Text.concat . escapeString' e . Text.unpack + where + escapeString' _ [] = [] + escapeString' escapeMode ('\n':'.':xs) = + "\n\\&." : escapeString' escapeMode xs + escapeString' escapeMode (x:xs) = + case Map.lookup x essentialEscapes of + Just s -> s : escapeString' escapeMode xs + Nothing + | isAscii x -> Text.singleton x : escapeString' escapeMode xs + | otherwise -> + case escapeMode of + AllowUTF8 -> Text.singleton x : escapeString' escapeMode xs + AsciiOnly -> + let accents = catMaybes $ takeWhile isJust + (map (\c -> Map.lookup c combiningAccentsMap) xs) + rest = drop (length accents) xs + s = case Map.lookup x characterCodeMap of + Just t -> "\\[" <> Text.unwords (t:accents) <> "]" + Nothing -> "\\[" <> Text.unwords + (Text.pack (printf "u%04X" (ord x)) : accents) <> "]" + in s : escapeString' escapeMode rest -characterCodeMap :: Map.Map Char String +characterCodeMap :: Map.Map Char Text characterCodeMap = Map.fromList characterCodes fontChange :: (HasChars a, IsString a, PandocMonad m) => MS m (Doc a) diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index 359a1bb3c..9aa19c2d9 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -83,11 +83,8 @@ metaToContext' :: (Monad m, TemplateTarget a) -> ([Inline] -> m (Doc a)) -> Meta -> m (Context a) -metaToContext' blockWriter inlineWriter (Meta metamap) = do - renderedMap <- mapM (metaValueToVal blockWriter inlineWriter) metamap - return $ Context - $ M.foldrWithKey (\k v x -> M.insert (T.pack k) v x) mempty - $ renderedMap +metaToContext' blockWriter inlineWriter (Meta metamap) = + Context <$> mapM (metaValueToVal blockWriter inlineWriter) metamap -- | Add variables to a template Context, replacing any existing values. addVariablesToContext :: TemplateTarget a @@ -109,8 +106,7 @@ metaValueToVal :: (Monad m, TemplateTarget a) -> MetaValue -> m (Val a) metaValueToVal blockWriter inlineWriter (MetaMap metamap) = - MapVal . Context . M.mapKeys T.pack <$> - mapM (metaValueToVal blockWriter inlineWriter) metamap + MapVal . Context <$> mapM (metaValueToVal blockWriter inlineWriter) metamap metaValueToVal blockWriter inlineWriter (MetaList xs) = ListVal <$> mapM (metaValueToVal blockWriter inlineWriter) xs metaValueToVal _ _ (MetaBool True) = return $ SimpleVal "true" @@ -122,15 +118,15 @@ metaValueToVal _ inlineWriter (MetaInlines is) = SimpleVal <$> inlineWriter is -- | Retrieve a field value from a template context. -getField :: FromContext a b => String -> Context a -> Maybe b -getField field (Context m) = M.lookup (T.pack field) m >>= fromVal +getField :: FromContext a b => T.Text -> Context a -> Maybe b +getField field (Context m) = M.lookup field m >>= fromVal -- | Set a field of a template context. If the field already has a value, -- convert it into a list with the new value appended to the old value(s). -- This is a utility function to be used in preparing template contexts. -setField :: ToContext a b => String -> b -> Context a -> Context a +setField :: ToContext a b => T.Text -> b -> Context a -> Context a setField field val (Context m) = - Context $ M.insertWith combine (T.pack field) (toVal val) m + Context $ M.insertWith combine field (toVal val) m where combine newval (ListVal xs) = ListVal (xs ++ [newval]) combine newval x = ListVal [x, newval] @@ -138,31 +134,31 @@ setField field val (Context m) = -- | Reset a field of a template context. If the field already has a -- value, the new value replaces it. -- This is a utility function to be used in preparing template contexts. -resetField :: ToContext a b => String -> b -> Context a -> Context a +resetField :: ToContext a b => T.Text -> b -> Context a -> Context a resetField field val (Context m) = - Context (M.insert (T.pack field) (toVal val) m) + Context (M.insert field (toVal val) m) -- | Set a field of a template context if it currently has no value. -- If it has a value, do nothing. -- This is a utility function to be used in preparing template contexts. -defField :: ToContext a b => String -> b -> Context a -> Context a +defField :: ToContext a b => T.Text -> b -> Context a -> Context a defField field val (Context m) = - Context (M.insertWith f (T.pack field) (toVal val) m) + Context (M.insertWith f field (toVal val) m) where f _newval oldval = oldval -- Produce an HTML tag with the given pandoc attributes. -tagWithAttrs :: HasChars a => String -> Attr -> Doc a +tagWithAttrs :: HasChars a => T.Text -> Attr -> Doc a tagWithAttrs tag (ident,classes,kvs) = hsep - ["<" <> text tag - ,if null ident + ["<" <> text (T.unpack tag) + ,if T.null ident then empty - else "id=" <> doubleQuotes (text ident) + else "id=" <> doubleQuotes (text $ T.unpack ident) ,if null classes then empty - else "class=" <> doubleQuotes (text (unwords classes)) - ,hsep (map (\(k,v) -> text k <> "=" <> - doubleQuotes (text (escapeStringForXML v))) kvs) + else "class=" <> doubleQuotes (text $ T.unpack (T.unwords classes)) + ,hsep (map (\(k,v) -> text (T.unpack k) <> "=" <> + doubleQuotes (text $ T.unpack (escapeStringForXML v))) kvs) ] <> ">" isDisplayMath :: Inline -> Bool @@ -198,20 +194,20 @@ fixDisplayMath (Para lst) not (isDisplayMath x || isDisplayMath y)) lst fixDisplayMath x = x -unsmartify :: WriterOptions -> String -> String -unsmartify opts ('\8217':xs) = '\'' : unsmartify opts xs -unsmartify opts ('\8230':xs) = "..." ++ unsmartify opts xs -unsmartify opts ('\8211':xs) - | isEnabled Ext_old_dashes opts = '-' : unsmartify opts xs - | otherwise = "--" ++ unsmartify opts xs -unsmartify opts ('\8212':xs) - | isEnabled Ext_old_dashes opts = "--" ++ unsmartify opts xs - | otherwise = "---" ++ unsmartify opts xs -unsmartify opts ('\8220':xs) = '"' : unsmartify opts xs -unsmartify opts ('\8221':xs) = '"' : unsmartify opts xs -unsmartify opts ('\8216':xs) = '\'' : unsmartify opts xs -unsmartify opts (x:xs) = x : unsmartify opts xs -unsmartify _ [] = [] +unsmartify :: WriterOptions -> T.Text -> T.Text +unsmartify opts = T.concatMap $ \c -> case c of + '\8217' -> "'" + '\8230' -> "..." + '\8211' + | isEnabled Ext_old_dashes opts -> "-" + | otherwise -> "--" + '\8212' + | isEnabled Ext_old_dashes opts -> "--" + | otherwise -> "---" + '\8220' -> "\"" + '\8221' -> "\"" + '\8216' -> "'" + _ -> T.singleton c gridTable :: (Monad m, HasChars a) => WriterOptions @@ -315,22 +311,20 @@ gridTable opts blocksToDoc headless aligns widths headers rows = do body $$ border '-' (repeat AlignDefault) widthsInChars - - -- | Retrieve the metadata value for a given @key@ -- and convert to Bool. -lookupMetaBool :: String -> Meta -> Bool +lookupMetaBool :: T.Text -> Meta -> Bool lookupMetaBool key meta = case lookupMeta key meta of - Just (MetaBlocks _) -> True - Just (MetaInlines _) -> True - Just (MetaString (_:_)) -> True - Just (MetaBool True) -> True - _ -> False + Just (MetaBlocks _) -> True + Just (MetaInlines _) -> True + Just (MetaString x) -> not (T.null x) + Just (MetaBool True) -> True + _ -> False -- | Retrieve the metadata value for a given @key@ -- and extract blocks. -lookupMetaBlocks :: String -> Meta -> [Block] +lookupMetaBlocks :: T.Text -> Meta -> [Block] lookupMetaBlocks key meta = case lookupMeta key meta of Just (MetaBlocks bs) -> bs @@ -340,7 +334,7 @@ lookupMetaBlocks key meta = -- | Retrieve the metadata value for a given @key@ -- and extract inlines. -lookupMetaInlines :: String -> Meta -> [Inline] +lookupMetaInlines :: T.Text -> Meta -> [Inline] lookupMetaInlines key meta = case lookupMeta key meta of Just (MetaString s) -> [Str s] @@ -351,16 +345,15 @@ lookupMetaInlines key meta = -- | Retrieve the metadata value for a given @key@ -- and convert to String. -lookupMetaString :: String -> Meta -> String +lookupMetaString :: T.Text -> Meta -> T.Text lookupMetaString key meta = case lookupMeta key meta of Just (MetaString s) -> s Just (MetaInlines ils) -> stringify ils Just (MetaBlocks bs) -> stringify bs - Just (MetaBool b) -> show b + Just (MetaBool b) -> T.pack (show b) _ -> "" - toSuperscript :: Char -> Maybe Char toSuperscript '1' = Just '\x00B9' toSuperscript '2' = Just '\x00B2' @@ -406,14 +399,14 @@ sectionToListItem opts (Div (ident,_,_) , lev < writerTOCDepth opts] where num = fromMaybe "" $ lookup "number" kvs - addNumber = if null num + addNumber = if T.null num then id else (Span ("",["toc-section-number"],[]) [Str num] :) . (Space :) headerText' = addNumber $ walk (deLink . deNote) ils - headerLink = if null ident + headerLink = if T.null ident then headerText' - else [Link nullAttr headerText' ('#':ident, "")] + else [Link nullAttr headerText' ("#" <> ident, "")] listContents = filter (not . null) $ map (sectionToListItem opts) subsecs sectionToListItem _ _ = [] diff --git a/src/Text/Pandoc/Writers/TEI.hs b/src/Text/Pandoc/Writers/TEI.hs index b9b5aaa85..78f7b2cad 100644 --- a/src/Text/Pandoc/Writers/TEI.hs +++ b/src/Text/Pandoc/Writers/TEI.hs @@ -14,9 +14,8 @@ Conversion of 'Pandoc' documents to Docbook XML. -} module Text.Pandoc.Writers.TEI (writeTEI) where import Prelude -import Data.Char (toLower) -import Data.List (isPrefixOf, stripPrefix) import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.Highlighting (languages, languagesByExtension) @@ -89,13 +88,13 @@ listItemToTEI :: PandocMonad m => WriterOptions -> [Block] -> m (Doc Text) listItemToTEI opts item = inTagsIndented "item" <$> blocksToTEI opts (map plainToPara item) -imageToTEI :: PandocMonad m => WriterOptions -> Attr -> String -> m (Doc Text) +imageToTEI :: PandocMonad m => WriterOptions -> Attr -> Text -> m (Doc Text) imageToTEI opts attr src = return $ selfClosingTag "graphic" $ ("url", src) : idFromAttr opts attr ++ dims where dims = go Width "width" ++ go Height "height" go dir dstr = case dimension dir attr of - Just a -> [(dstr, show a)] + Just a -> [(dstr, tshow a)] Nothing -> [] -- | Convert a Pandoc block element to TEI. @@ -111,7 +110,7 @@ blockToTEI opts (Div attr@(_,"section":_,_) (Header lvl _ ils : xs)) = divType = case lvl of n | n == -1 -> "part" | n == 0 -> "chapter" - | n >= 1 && n <= 5 -> "level" ++ show n + | n >= 1 && n <= 5 -> "level" <> tshow n | otherwise -> "section" titleContents <- inlinesToTEI opts ils contents <- blocksToTEI opts xs' @@ -150,15 +149,15 @@ blockToTEI opts (LineBlock lns) = blockToTEI opts (BlockQuote blocks) = inTagsIndented "quote" <$> blocksToTEI opts blocks blockToTEI _ (CodeBlock (_,classes,_) str) = - return $ text ("<ab type='codeblock " ++ lang ++ "'>") <> cr <> - flush (text (escapeStringForXML str) <> cr <> text "</ab>") + return $ literal ("<ab type='codeblock " <> lang <> "'>") <> cr <> + flush (literal (escapeStringForXML str) <> cr <> text "</ab>") where lang = if null langs then "" else escapeStringForXML (head langs) - isLang l = map toLower l `elem` map (map toLower) languages + isLang l = T.toLower l `elem` map T.toLower languages langsFrom s = if isLang s then [s] - else languagesByExtension . map toLower $ s + else languagesByExtension . T.toLower $ s langs = concatMap langsFrom classes blockToTEI opts (BulletList lst) = do let attribs = [("type", "unordered")] @@ -178,13 +177,13 @@ blockToTEI opts (OrderedList (start, numstyle, _) (first:rest)) = do else do fi <- blocksToTEI opts $ map plainToPara first re <- listItemsToTEI opts rest - return $ inTags True "item" [("n",show start)] fi $$ re + return $ inTags True "item" [("n",tshow start)] fi $$ re return $ inTags True "list" attribs items blockToTEI opts (DefinitionList lst) = do let attribs = [("type", "definition")] inTags True "list" attribs <$> deflistItemsToTEI opts lst blockToTEI _ b@(RawBlock f str) - | f == "tei" = return $ text str + | f == "tei" = return $ literal str -- raw TEI block (should such a thing exist). | otherwise = do report $ BlockNotRendered b @@ -230,7 +229,7 @@ inlinesToTEI opts lst = hcat <$> mapM (inlineToTEI opts) lst -- | Convert an inline element to TEI. inlineToTEI :: PandocMonad m => WriterOptions -> Inline -> m (Doc Text) -inlineToTEI _ (Str str) = return $ text $ escapeStringForXML str +inlineToTEI _ (Str str) = return $ literal $ escapeStringForXML str inlineToTEI opts (Emph lst) = inTags False "hi" [("rendition","simple:italic")] <$> inlinesToTEI opts lst inlineToTEI opts (Strong lst) = @@ -254,16 +253,16 @@ inlineToTEI opts (Cite _ lst) = inlineToTEI opts (Span _ ils) = inlinesToTEI opts ils inlineToTEI _ (Code _ str) = return $ - inTags False "seg" [("type","code")] $ text (escapeStringForXML str) + inTags False "seg" [("type","code")] $ literal (escapeStringForXML str) -- Distinguish display from inline math by wrapping the former in a "figure." inlineToTEI _ (Math t str) = return $ case t of InlineMath -> inTags False "formula" [("notation","TeX")] $ - text str + literal str DisplayMath -> inTags True "figure" [("type","math")] $ - inTags False "formula" [("notation","TeX")] $ text str + inTags False "formula" [("notation","TeX")] $ literal str -inlineToTEI _ il@(RawInline f x) | f == "tei" = return $ text x +inlineToTEI _ il@(RawInline f x) | f == "tei" = return $ literal x | otherwise = empty <$ report (InlineNotRendered il) inlineToTEI _ LineBreak = return $ selfClosingTag "lb" [] @@ -273,8 +272,8 @@ inlineToTEI _ Space = inlineToTEI _ SoftBreak = return space inlineToTEI opts (Link attr txt (src, _)) - | Just email <- stripPrefix "mailto:" src = do - let emailLink = text $ + | Just email <- T.stripPrefix "mailto:" src = do + let emailLink = literal $ escapeStringForXML email case txt of [Str s] | escapeURI s == email -> @@ -283,17 +282,17 @@ inlineToTEI opts (Link attr txt (src, _)) linktext <- inlinesToTEI opts txt return $ linktext <+> char '(' <> emailLink <> char ')' | otherwise = - (if "#" `isPrefixOf` src - then inTags False "ref" $ ("target", drop 1 src) + (if "#" `T.isPrefixOf` src + then inTags False "ref" $ ("target", T.drop 1 src) : idFromAttr opts attr else inTags False "ref" $ ("target", src) : idFromAttr opts attr ) <$> inlinesToTEI opts txt inlineToTEI opts (Image attr description (src, tit)) = do - let titleDoc = if null tit + let titleDoc = if T.null tit then empty else inTags False "figDesc" [] - (text $ escapeStringForXML tit) + (literal $ escapeStringForXML tit) imageDesc <- if null description then return empty else inTags False "head" [] @@ -303,8 +302,8 @@ inlineToTEI opts (Image attr description (src, tit)) = do inlineToTEI opts (Note contents) = inTagsIndented "note" <$> blocksToTEI opts contents -idFromAttr :: WriterOptions -> Attr -> [(String, String)] +idFromAttr :: WriterOptions -> Attr -> [(Text, Text)] idFromAttr opts (id',_,_) = - if null id' + if T.null id' then [] - else [("xml:id", writerIdentifierPrefix opts ++ id')] + else [("xml:id", writerIdentifierPrefix opts <> id')] diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index 5c5eb7fd3..387858fd3 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -39,7 +39,7 @@ import Text.Printf (printf) data WriterState = WriterState { stStrikeout :: Bool -- document contains strikeout , stEscapeComma :: Bool -- in a context where we need @comma - , stIdentifiers :: Set.Set String -- header ids used already + , stIdentifiers :: Set.Set Text -- header ids used already , stOptions :: WriterOptions -- writer options } @@ -85,7 +85,7 @@ pandocToTexinfo options (Pandoc meta blocks) = do Just tpl -> renderTemplate tpl context -- | Escape things as needed for Texinfo. -stringToTexinfo :: String -> String +stringToTexinfo :: Text -> Text stringToTexinfo = escapeStringUsing texinfoEscapes where texinfoEscapes = [ ('{', "@{") , ('}', "@}") @@ -106,8 +106,8 @@ escapeCommas parser = do return res -- | Puts contents into Texinfo command. -inCmd :: String -> Doc Text -> Doc Text -inCmd cmd contents = char '@' <> text cmd <> braces contents +inCmd :: Text -> Doc Text -> Doc Text +inCmd cmd contents = char '@' <> literal cmd <> braces contents -- | Convert Pandoc block element to Texinfo. blockToTexinfo :: PandocMonad m @@ -122,13 +122,14 @@ blockToTexinfo (Plain lst) = inlineListToTexinfo lst -- title beginning with fig: indicates that the image is a figure -blockToTexinfo (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do - capt <- if null txt - then return empty - else (\c -> text "@caption" <> braces c) `fmap` - inlineListToTexinfo txt - img <- inlineToTexinfo (Image attr txt (src,tit)) - return $ text "@float" $$ img $$ capt $$ text "@end float" +blockToTexinfo (Para [Image attr txt (src,tgt)]) + | Just tit <- T.stripPrefix "fig:" tgt = do + capt <- if null txt + then return empty + else (\c -> text "@caption" <> braces c) `fmap` + inlineListToTexinfo txt + img <- inlineToTexinfo (Image attr txt (src,tit)) + return $ text "@float" $$ img $$ capt $$ text "@end float" blockToTexinfo (Para lst) = inlineListToTexinfo lst -- this is handled differently from Plain in blockListToTexinfo @@ -145,13 +146,13 @@ blockToTexinfo (BlockQuote lst) = do blockToTexinfo (CodeBlock _ str) = return $ blankline $$ text "@verbatim" $$ - flush (text str) $$ + flush (literal str) $$ text "@end verbatim" <> blankline blockToTexinfo b@(RawBlock f str) - | f == "texinfo" = return $ text str + | f == "texinfo" = return $ literal str | f == "latex" || f == "tex" = - return $ text "@tex" $$ text str $$ text "@end tex" + return $ text "@tex" $$ literal str $$ text "@end tex" | otherwise = do report $ BlockNotRendered b return empty @@ -211,18 +212,18 @@ blockToTexinfo (Header level (ident,_,_) lst) txt <- inlineListToTexinfo lst idsUsed <- gets stIdentifiers opts <- gets stOptions - let id' = if null ident + let id' = if T.null ident then uniqueIdent (writerExtensions opts) lst idsUsed else ident modify $ \st -> st{ stIdentifiers = Set.insert id' idsUsed } sec <- seccmd level return $ if (level > 0) && (level <= 4) then blankline <> text "@node " <> node $$ - text sec <> txt $$ - text "@anchor" <> braces (text $ '#':id') + literal sec <> txt $$ + text "@anchor" <> braces (literal $ "#" <> id') else txt where - seccmd :: PandocMonad m => Int -> TI m String + seccmd :: PandocMonad m => Int -> TI m Text seccmd 1 = return "@chapter " seccmd 2 = return "@section " seccmd 3 = return "@subsection " @@ -266,13 +267,13 @@ tableRowToTexinfo :: PandocMonad m tableRowToTexinfo = tableAnyRowToTexinfo "@item " tableAnyRowToTexinfo :: PandocMonad m - => String + => Text -> [Alignment] -> [[Block]] -> TI m (Doc Text) tableAnyRowToTexinfo itemtype aligns cols = zipWithM alignedBlock aligns cols >>= - return . (text itemtype $$) . foldl (\row item -> row $$ + return . (literal itemtype $$) . foldl (\row item -> row $$ (if isEmpty row then empty else text " @tab ") <> item) empty alignedBlock :: PandocMonad m @@ -375,8 +376,8 @@ inlineListToTexinfo lst = hcat <$> mapM inlineToTexinfo lst inlineListForNode :: PandocMonad m => [Inline] -- ^ Inlines to convert -> TI m (Doc Text) -inlineListForNode = return . text . stringToTexinfo . - filter (not . disallowedInNode) . stringify +inlineListForNode = return . literal . stringToTexinfo . + T.filter (not . disallowedInNode) . stringify -- periods, commas, colons, and parentheses are disallowed in node names disallowedInNode :: Char -> Bool @@ -413,7 +414,7 @@ inlineToTexinfo (SmallCaps lst) = inCmd "sc" <$> inlineListToTexinfo lst inlineToTexinfo (Code _ str) = - return $ text $ "@code{" ++ stringToTexinfo str ++ "}" + return $ literal $ "@code{" <> stringToTexinfo str <> "}" inlineToTexinfo (Quoted SingleQuote lst) = do contents <- inlineListToTexinfo lst @@ -425,12 +426,12 @@ inlineToTexinfo (Quoted DoubleQuote lst) = do inlineToTexinfo (Cite _ lst) = inlineListToTexinfo lst -inlineToTexinfo (Str str) = return $ text (stringToTexinfo str) -inlineToTexinfo (Math _ str) = return $ inCmd "math" $ text str +inlineToTexinfo (Str str) = return $ literal (stringToTexinfo str) +inlineToTexinfo (Math _ str) = return $ inCmd "math" $ literal str inlineToTexinfo il@(RawInline f str) | f == "latex" || f == "tex" = - return $ text "@tex" $$ text str $$ text "@end tex" - | f == "texinfo" = return $ text str + return $ text "@tex" $$ literal str $$ text "@end tex" + | f == "texinfo" = return $ literal str | otherwise = do report $ InlineNotRendered il return empty @@ -443,35 +444,36 @@ inlineToTexinfo SoftBreak = do WrapPreserve -> return cr inlineToTexinfo Space = return space -inlineToTexinfo (Link _ txt (src@('#':_), _)) = do - contents <- escapeCommas $ inlineListToTexinfo txt - return $ text "@ref" <> - braces (text (stringToTexinfo src) <> text "," <> contents) -inlineToTexinfo (Link _ txt (src, _)) = - case txt of - [Str x] | escapeURI x == src -> -- autolink - return $ text $ "@url{" ++ x ++ "}" - _ -> do contents <- escapeCommas $ inlineListToTexinfo txt - let src1 = stringToTexinfo src - return $ text ("@uref{" ++ src1 ++ ",") <> contents <> - char '}' +inlineToTexinfo (Link _ txt (src, _)) + | Just ('#', _) <- T.uncons src = do + contents <- escapeCommas $ inlineListToTexinfo txt + return $ text "@ref" <> + braces (literal (stringToTexinfo src) <> text "," <> contents) + | otherwise = case txt of + [Str x] | escapeURI x == src -> -- autolink + return $ literal $ "@url{" <> x <> "}" + _ -> do + contents <- escapeCommas $ inlineListToTexinfo txt + let src1 = stringToTexinfo src + return $ literal ("@uref{" <> src1 <> ",") <> contents <> + char '}' inlineToTexinfo (Image attr alternate (source, _)) = do content <- escapeCommas $ inlineListToTexinfo alternate opts <- gets stOptions let showDim dim = case dimension dim attr of - (Just (Pixel a)) -> showInInch opts (Pixel a) ++ "in" + (Just (Pixel a)) -> showInInch opts (Pixel a) <> "in" (Just (Percent _)) -> "" - (Just d) -> show d + (Just d) -> tshow d Nothing -> "" - return $ text ("@image{" ++ base ++ ',':(showDim Width) ++ ',':(showDim Height) ++ ",") - <> content <> text "," <> text (ext ++ "}") + return $ literal ("@image{" <> base <> "," <> showDim Width <> "," <> showDim Height <> ",") + <> content <> text "," <> literal (ext <> "}") where - ext = drop 1 $ takeExtension source' - base = dropExtension source' + ext = T.drop 1 $ T.pack $ takeExtension source' + base = T.pack $ dropExtension source' source' = if isURI source - then source - else unEscapeString source + then T.unpack source + else unEscapeString $ T.unpack source inlineToTexinfo (Note contents) = do contents' <- blockListToTexinfo contents diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index 1a7c386e0..c0c5727d7 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -1,4 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.Textile Copyright : Copyright (C) 2010-2019 John MacFarlane @@ -16,8 +18,8 @@ module Text.Pandoc.Writers.Textile ( writeTextile ) where import Prelude import Control.Monad.State.Strict import Data.Char (isSpace) -import Data.List (intercalate) -import Data.Text (Text, pack) +import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.ImageSize @@ -30,10 +32,10 @@ import Text.Pandoc.Writers.Shared import Text.Pandoc.XML (escapeStringForXML) data WriterState = WriterState { - stNotes :: [String] -- Footnotes - , stListLevel :: [Char] -- String at beginning of list items, e.g. "**" - , stStartNum :: Maybe Int -- Start number if first list item - , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list + stNotes :: [Text] -- Footnotes + , stListLevel :: [Char] -- String at beginning of list items, e.g. "**" + , stStartNum :: Maybe Int -- Start number if first list item + , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list } type TW = StateT WriterState @@ -52,11 +54,11 @@ pandocToTextile :: PandocMonad m => WriterOptions -> Pandoc -> TW m Text pandocToTextile opts (Pandoc meta blocks) = do metadata <- metaToContext opts - (fmap (literal . pack) . blockListToTextile opts) - (fmap (literal . pack) . inlineListToTextile opts) meta + (fmap literal . blockListToTextile opts) + (fmap literal . inlineListToTextile opts) meta body <- blockListToTextile opts blocks - notes <- gets $ unlines . reverse . stNotes - let main = pack $ body ++ if null notes then "" else "\n\n" ++ notes + notes <- gets $ T.unlines . reverse . stNotes + let main = body <> if T.null notes then "" else "\n\n" <> notes let context = defField "body" main metadata return $ case writerTemplate opts of @@ -72,7 +74,7 @@ withUseTags action = do return result -- | Escape one character as needed for Textile. -escapeCharForTextile :: Char -> String +escapeCharForTextile :: Char -> Text escapeCharForTextile x = case x of '&' -> "&" '<' -> "<" @@ -88,17 +90,17 @@ escapeCharForTextile x = case x of '\x2013' -> " - " '\x2019' -> "'" '\x2026' -> "..." - c -> [c] + c -> T.singleton c -- | Escape string as needed for Textile. -escapeStringForTextile :: String -> String -escapeStringForTextile = concatMap escapeCharForTextile +escapeTextForTextile :: Text -> Text +escapeTextForTextile = T.concatMap escapeCharForTextile -- | Convert Pandoc block element to Textile. blockToTextile :: PandocMonad m => WriterOptions -- ^ Options -> Block -- ^ Block element - -> TW m String + -> TW m Text blockToTextile _ Null = return "" @@ -106,24 +108,24 @@ blockToTextile opts (Div attr bs) = do let startTag = render Nothing $ tagWithAttrs "div" attr let endTag = "</div>" contents <- blockListToTextile opts bs - return $ startTag ++ "\n\n" ++ contents ++ "\n\n" ++ endTag ++ "\n" + return $ startTag <> "\n\n" <> contents <> "\n\n" <> endTag <> "\n" blockToTextile opts (Plain inlines) = inlineListToTextile opts inlines -- title beginning with fig: indicates that the image is a figure -blockToTextile opts (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do +blockToTextile opts (Para [Image attr txt (src,T.stripPrefix "fig:" -> Just tit)]) = do capt <- blockToTextile opts (Para txt) im <- inlineToTextile opts (Image attr txt (src,tit)) - return $ im ++ "\n" ++ capt + return $ im <> "\n" <> capt blockToTextile opts (Para inlines) = do useTags <- gets stUseTags listLevel <- gets stListLevel contents <- inlineListToTextile opts inlines return $ if useTags - then "<p>" ++ contents ++ "</p>" - else contents ++ if null listLevel then "\n" else "" + then "<p>" <> contents <> "</p>" + else contents <> if null listLevel then "\n" else "" blockToTextile opts (LineBlock lns) = blockToTextile opts $ linesToPara lns @@ -138,41 +140,41 @@ blockToTextile _ HorizontalRule = return "<hr />\n" blockToTextile opts (Header level (ident,classes,keyvals) inlines) = do contents <- inlineListToTextile opts inlines - let identAttr = if null ident then "" else '#':ident - let attribs = if null identAttr && null classes + let identAttr = if T.null ident then "" else "#" <> ident + let attribs = if T.null identAttr && null classes then "" - else "(" ++ unwords classes ++ identAttr ++ ")" - let lang = maybe "" (\x -> "[" ++ x ++ "]") $ lookup "lang" keyvals - let styles = maybe "" (\x -> "{" ++ x ++ "}") $ lookup "style" keyvals - let prefix = 'h' : show level ++ attribs ++ styles ++ lang ++ ". " - return $ prefix ++ contents ++ "\n" - -blockToTextile _ (CodeBlock (_,classes,_) str) | any (all isSpace) (lines str) = - return $ "<pre" ++ classes' ++ ">\n" ++ escapeStringForXML str ++ + else "(" <> T.unwords classes <> identAttr <> ")" + let lang = maybe "" (\x -> "[" <> x <> "]") $ lookup "lang" keyvals + let styles = maybe "" (\x -> "{" <> x <> "}") $ lookup "style" keyvals + let prefix = "h" <> tshow level <> attribs <> styles <> lang <> ". " + return $ prefix <> contents <> "\n" + +blockToTextile _ (CodeBlock (_,classes,_) str) | any (T.all isSpace) (T.lines str) = + return $ "<pre" <> classes' <> ">\n" <> escapeStringForXML str <> "\n</pre>\n" where classes' = if null classes then "" - else " class=\"" ++ unwords classes ++ "\"" + else " class=\"" <> T.unwords classes <> "\"" blockToTextile _ (CodeBlock (_,classes,_) str) = - return $ "bc" ++ classes' ++ ". " ++ str ++ "\n\n" + return $ "bc" <> classes' <> ". " <> str <> "\n\n" where classes' = if null classes then "" - else "(" ++ unwords classes ++ ")" + else "(" <> T.unwords classes <> ")" blockToTextile opts (BlockQuote bs@[Para _]) = do contents <- blockListToTextile opts bs - return $ "bq. " ++ contents ++ "\n\n" + return $ "bq. " <> contents <> "\n\n" blockToTextile opts (BlockQuote blocks) = do contents <- blockListToTextile opts blocks - return $ "<blockquote>\n\n" ++ contents ++ "\n</blockquote>\n" + return $ "<blockquote>\n\n" <> contents <> "\n</blockquote>\n" blockToTextile opts (Table [] aligns widths headers rows') | all (==0) widths = do - hs <- mapM (liftM (("_. " ++) . stripTrailingNewlines) . blockListToTextile opts) headers - let cellsToRow cells = "|" ++ intercalate "|" cells ++ "|" - let header = if all null headers then "" else cellsToRow hs ++ "\n" + hs <- mapM (liftM (("_. " <>) . stripTrailingNewlines) . blockListToTextile opts) headers + let cellsToRow cells = "|" <> T.intercalate "|" cells <> "|" + let header = if all null headers then "" else cellsToRow hs <> "\n" let blocksToCell (align, bs) = do contents <- stripTrailingNewlines <$> blockListToTextile opts bs let alignMarker = case align of @@ -180,32 +182,32 @@ blockToTextile opts (Table [] aligns widths headers rows') | AlignRight -> ">. " AlignCenter -> "=. " AlignDefault -> "" - return $ alignMarker ++ contents + return $ alignMarker <> contents let rowToCells = mapM blocksToCell . zip aligns bs <- mapM rowToCells rows' - let body = unlines $ map cellsToRow bs - return $ header ++ body + let body = T.unlines $ map cellsToRow bs + return $ header <> body blockToTextile opts (Table capt aligns widths headers rows') = do - let alignStrings = map alignmentToString aligns + let alignStrings = map alignmentToText aligns captionDoc <- if null capt then return "" else do c <- inlineListToTextile opts capt - return $ "<caption>" ++ c ++ "</caption>\n" - let percent w = show (truncate (100*w) :: Integer) ++ "%" + return $ "<caption>" <> c <> "</caption>\n" + let percent w = tshow (truncate (100*w) :: Integer) <> "%" let coltags = if all (== 0.0) widths then "" - else unlines $ map - (\w -> "<col width=\"" ++ percent w ++ "\" />") widths + else T.unlines $ map + (\w -> "<col width=\"" <> percent w <> "\" />") widths head' <- if all null headers then return "" else do hs <- tableRowToTextile opts alignStrings 0 headers - return $ "<thead>\n" ++ hs ++ "\n</thead>\n" + return $ "<thead>\n" <> hs <> "\n</thead>\n" body' <- zipWithM (tableRowToTextile opts alignStrings) [1..] rows' - return $ "<table>\n" ++ captionDoc ++ coltags ++ head' ++ - "<tbody>\n" ++ unlines body' ++ "</tbody>\n</table>\n" + return $ "<table>\n" <> captionDoc <> coltags <> head' <> + "<tbody>\n" <> T.unlines body' <> "</tbody>\n</table>\n" blockToTextile opts x@(BulletList items) = do oldUseTags <- gets stUseTags @@ -213,13 +215,13 @@ blockToTextile opts x@(BulletList items) = do if useTags then do contents <- withUseTags $ mapM (listItemToTextile opts) items - return $ "<ul>\n" ++ vcat contents ++ "\n</ul>\n" + return $ "<ul>\n" <> vcat contents <> "\n</ul>\n" else do - modify $ \s -> s { stListLevel = stListLevel s ++ "*" } + modify $ \s -> s { stListLevel = stListLevel s <> "*" } level <- gets $ length . stListLevel contents <- mapM (listItemToTextile opts) items modify $ \s -> s { stListLevel = init (stListLevel s) } - return $ vcat contents ++ (if level > 1 then "" else "\n") + return $ vcat contents <> (if level > 1 then "" else "\n") blockToTextile opts x@(OrderedList attribs@(start, _, _) items) = do oldUseTags <- gets stUseTags @@ -227,10 +229,10 @@ blockToTextile opts x@(OrderedList attribs@(start, _, _) items) = do if useTags then do contents <- withUseTags $ mapM (listItemToTextile opts) items - return $ "<ol" ++ listAttribsToString attribs ++ ">\n" ++ vcat contents ++ + return $ "<ol" <> listAttribsToString attribs <> ">\n" <> vcat contents <> "\n</ol>\n" else do - modify $ \s -> s { stListLevel = stListLevel s ++ "#" + modify $ \s -> s { stListLevel = stListLevel s <> "#" , stStartNum = if start > 1 then Just start else Nothing } @@ -238,52 +240,52 @@ blockToTextile opts x@(OrderedList attribs@(start, _, _) items) = do contents <- mapM (listItemToTextile opts) items modify $ \s -> s { stListLevel = init (stListLevel s), stStartNum = Nothing } - return $ vcat contents ++ (if level > 1 then "" else "\n") + return $ vcat contents <> (if level > 1 then "" else "\n") blockToTextile opts (DefinitionList items) = do contents <- withUseTags $ mapM (definitionListItemToTextile opts) items - return $ "<dl>\n" ++ vcat contents ++ "\n</dl>\n" + return $ "<dl>\n" <> vcat contents <> "\n</dl>\n" -- Auxiliary functions for lists: -- | Convert ordered list attributes to HTML attribute string -listAttribsToString :: ListAttributes -> String +listAttribsToString :: ListAttributes -> Text listAttribsToString (startnum, numstyle, _) = - let numstyle' = camelCaseToHyphenated $ show numstyle + let numstyle' = camelCaseToHyphenated $ tshow numstyle in (if startnum /= 1 - then " start=\"" ++ show startnum ++ "\"" - else "") ++ + then " start=\"" <> tshow startnum <> "\"" + else "") <> (if numstyle /= DefaultStyle - then " style=\"list-style-type: " ++ numstyle' ++ ";\"" + then " style=\"list-style-type: " <> numstyle' <> ";\"" else "") -- | Convert bullet or ordered list item (list of blocks) to Textile. listItemToTextile :: PandocMonad m - => WriterOptions -> [Block] -> TW m String + => WriterOptions -> [Block] -> TW m Text listItemToTextile opts items = do contents <- blockListToTextile opts items useTags <- gets stUseTags if useTags - then return $ "<li>" ++ contents ++ "</li>" + then return $ "<li>" <> contents <> "</li>" else do marker <- gets stListLevel mbstart <- gets stStartNum case mbstart of Just n -> do modify $ \s -> s{ stStartNum = Nothing } - return $ marker ++ show n ++ " " ++ contents - Nothing -> return $ marker ++ " " ++ contents + return $ T.pack marker <> tshow n <> " " <> contents + Nothing -> return $ T.pack marker <> " " <> contents -- | Convert definition list item (label, list of blocks) to Textile. definitionListItemToTextile :: PandocMonad m => WriterOptions -> ([Inline],[[Block]]) - -> TW m String + -> TW m Text definitionListItemToTextile opts (label, items) = do labelText <- inlineListToTextile opts label contents <- mapM (blockListToTextile opts) items - return $ "<dt>" ++ labelText ++ "</dt>\n" ++ - intercalate "\n" (map (\d -> "<dd>" ++ d ++ "</dd>") contents) + return $ "<dt>" <> labelText <> "</dt>\n" <> + T.intercalate "\n" (map (\d -> "<dd>" <> d <> "</dd>") contents) -- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed. isSimpleList :: Block -> Bool @@ -318,18 +320,18 @@ isPlainOrPara (Para _) = True isPlainOrPara _ = False -- | Concatenates strings with line breaks between them. -vcat :: [String] -> String -vcat = intercalate "\n" +vcat :: [Text] -> Text +vcat = T.intercalate "\n" -- Auxiliary functions for tables. (TODO: these are common to HTML, MediaWiki, -- and Textile writers, and should be abstracted out.) tableRowToTextile :: PandocMonad m => WriterOptions - -> [String] + -> [Text] -> Int -> [[Block]] - -> TW m String + -> TW m Text tableRowToTextile opts alignStrings rownum cols' = do let celltype = if rownum == 0 then "th" else "td" let rowclass = case rownum of @@ -339,10 +341,10 @@ tableRowToTextile opts alignStrings rownum cols' = do cols'' <- zipWithM (\alignment item -> tableItemToTextile opts celltype alignment item) alignStrings cols' - return $ "<tr class=\"" ++ rowclass ++ "\">\n" ++ unlines cols'' ++ "</tr>" + return $ "<tr class=\"" <> rowclass <> "\">\n" <> T.unlines cols'' <> "</tr>" -alignmentToString :: Alignment -> [Char] -alignmentToString alignment = case alignment of +alignmentToText :: Alignment -> Text +alignmentToText alignment = case alignment of AlignLeft -> "left" AlignRight -> "right" AlignCenter -> "center" @@ -350,13 +352,13 @@ alignmentToString alignment = case alignment of tableItemToTextile :: PandocMonad m => WriterOptions - -> String - -> String + -> Text + -> Text -> [Block] - -> TW m String + -> TW m Text tableItemToTextile opts celltype align' item = do - let mkcell x = "<" ++ celltype ++ " align=\"" ++ align' ++ "\">" ++ - x ++ "</" ++ celltype ++ ">" + let mkcell x = "<" <> celltype <> " align=\"" <> align' <> "\">" <> + x <> "</" <> celltype <> ">" contents <- blockListToTextile opts item return $ mkcell contents @@ -364,73 +366,73 @@ tableItemToTextile opts celltype align' item = do blockListToTextile :: PandocMonad m => WriterOptions -- ^ Options -> [Block] -- ^ List of block elements - -> TW m String + -> TW m Text blockListToTextile opts blocks = vcat <$> mapM (blockToTextile opts) blocks -- | Convert list of Pandoc inline elements to Textile. inlineListToTextile :: PandocMonad m - => WriterOptions -> [Inline] -> TW m String + => WriterOptions -> [Inline] -> TW m Text inlineListToTextile opts lst = - concat <$> mapM (inlineToTextile opts) lst + T.concat <$> mapM (inlineToTextile opts) lst -- | Convert Pandoc inline element to Textile. -inlineToTextile :: PandocMonad m => WriterOptions -> Inline -> TW m String +inlineToTextile :: PandocMonad m => WriterOptions -> Inline -> TW m Text inlineToTextile opts (Span _ lst) = inlineListToTextile opts lst inlineToTextile opts (Emph lst) = do contents <- inlineListToTextile opts lst - return $ if '_' `elem` contents - then "<em>" ++ contents ++ "</em>" - else "_" ++ contents ++ "_" + return $ if '_' `elemText` contents + then "<em>" <> contents <> "</em>" + else "_" <> contents <> "_" inlineToTextile opts (Strong lst) = do contents <- inlineListToTextile opts lst - return $ if '*' `elem` contents - then "<strong>" ++ contents ++ "</strong>" - else "*" ++ contents ++ "*" + return $ if '*' `elemText` contents + then "<strong>" <> contents <> "</strong>" + else "*" <> contents <> "*" inlineToTextile opts (Strikeout lst) = do contents <- inlineListToTextile opts lst - return $ if '-' `elem` contents - then "<del>" ++ contents ++ "</del>" - else "-" ++ contents ++ "-" + return $ if '-' `elemText` contents + then "<del>" <> contents <> "</del>" + else "-" <> contents <> "-" inlineToTextile opts (Superscript lst) = do contents <- inlineListToTextile opts lst - return $ if '^' `elem` contents - then "<sup>" ++ contents ++ "</sup>" - else "[^" ++ contents ++ "^]" + return $ if '^' `elemText` contents + then "<sup>" <> contents <> "</sup>" + else "[^" <> contents <> "^]" inlineToTextile opts (Subscript lst) = do contents <- inlineListToTextile opts lst - return $ if '~' `elem` contents - then "<sub>" ++ contents ++ "</sub>" - else "[~" ++ contents ++ "~]" + return $ if '~' `elemText` contents + then "<sub>" <> contents <> "</sub>" + else "[~" <> contents <> "~]" inlineToTextile opts (SmallCaps lst) = inlineListToTextile opts lst inlineToTextile opts (Quoted SingleQuote lst) = do contents <- inlineListToTextile opts lst - return $ "'" ++ contents ++ "'" + return $ "'" <> contents <> "'" inlineToTextile opts (Quoted DoubleQuote lst) = do contents <- inlineListToTextile opts lst - return $ "\"" ++ contents ++ "\"" + return $ "\"" <> contents <> "\"" inlineToTextile opts (Cite _ lst) = inlineListToTextile opts lst inlineToTextile _ (Code _ str) = - return $ if '@' `elem` str - then "<tt>" ++ escapeStringForXML str ++ "</tt>" - else "@" ++ str ++ "@" + return $ if '@' `elemText` str + then "<tt>" <> escapeStringForXML str <> "</tt>" + else "@" <> str <> "@" -inlineToTextile _ (Str str) = return $ escapeStringForTextile str +inlineToTextile _ (Str str) = return $ escapeTextForTextile str inlineToTextile _ (Math _ str) = - return $ "<span class=\"math\">" ++ escapeStringForXML str ++ "</span>" + return $ "<span class=\"math\">" <> escapeStringForXML str <> "</span>" inlineToTextile opts il@(RawInline f str) | f == Format "html" || f == Format "textile" = return str @@ -455,36 +457,36 @@ inlineToTextile opts (Link (_, cls, _) txt (src, _)) = do _ -> inlineListToTextile opts txt let classes = if null cls || cls == ["uri"] && label == "$" then "" - else "(" ++ unwords cls ++ ")" - return $ "\"" ++ classes ++ label ++ "\":" ++ src + else "(" <> T.unwords cls <> ")" + return $ "\"" <> classes <> label <> "\":" <> src inlineToTextile opts (Image attr@(_, cls, _) alt (source, tit)) = do alt' <- inlineListToTextile opts alt - let txt = if null tit - then if null alt' + let txt = if T.null tit + then if T.null alt' then "" - else "(" ++ alt' ++ ")" - else "(" ++ tit ++ ")" + else "(" <> alt' <> ")" + else "(" <> tit <> ")" classes = if null cls then "" - else "(" ++ unwords cls ++ ")" - showDim dir = let toCss str = Just $ show dir ++ ":" ++ str ++ ";" + else "(" <> T.unwords cls <> ")" + showDim dir = let toCss str = Just $ tshow dir <> ":" <> str <> ";" in case dimension dir attr of - Just (Percent a) -> toCss $ show (Percent a) - Just dim -> toCss $ showInPixel opts dim ++ "px" + Just (Percent a) -> toCss $ tshow (Percent a) + Just dim -> toCss $ showInPixel opts dim <> "px" Nothing -> Nothing styles = case (showDim Width, showDim Height) of - (Just w, Just h) -> "{" ++ w ++ h ++ "}" - (Just w, Nothing) -> "{" ++ w ++ "height:auto;}" - (Nothing, Just h) -> "{" ++ "width:auto;" ++ h ++ "}" + (Just w, Just h) -> "{" <> w <> h <> "}" + (Just w, Nothing) -> "{" <> w <> "height:auto;}" + (Nothing, Just h) -> "{" <> "width:auto;" <> h <> "}" (Nothing, Nothing) -> "" - return $ "!" ++ classes ++ styles ++ source ++ txt ++ "!" + return $ "!" <> classes <> styles <> source <> txt <> "!" inlineToTextile opts (Note contents) = do curNotes <- gets stNotes let newnum = length curNotes + 1 contents' <- blockListToTextile opts contents - let thisnote = "fn" ++ show newnum ++ ". " ++ contents' ++ "\n" + let thisnote = "fn" <> tshow newnum <> ". " <> contents' <> "\n" modify $ \s -> s { stNotes = thisnote : curNotes } - return $ "[" ++ show newnum ++ "]" + return $ "[" <> tshow newnum <> "]" -- note - may not work for notes with multiple blocks diff --git a/src/Text/Pandoc/Writers/XWiki.hs b/src/Text/Pandoc/Writers/XWiki.hs index e6cd0b086..7afe845c7 100644 --- a/src/Text/Pandoc/Writers/XWiki.hs +++ b/src/Text/Pandoc/Writers/XWiki.hs @@ -38,12 +38,12 @@ import Prelude import Control.Monad.Reader (ReaderT, asks, local, runReaderT) import qualified Data.Set as Set import qualified Data.Text as Text -import Data.Text (Text, intercalate, pack, replace, split) +import Data.Text (Text, intercalate, replace, split) import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Shared (escapeURI, isURI, linesToPara) +import Text.Pandoc.Shared import Text.Pandoc.Writers.MediaWiki (highlightingLangs) data WriterState = WriterState { @@ -65,10 +65,10 @@ vcat = intercalate "\n" -- If an id is provided, we can generate an anchor using the id macro -- https://extensions.xwiki.org/xwiki/bin/view/Extension/Id%20Macro -genAnchor :: String -> Text -genAnchor id' = if null id' +genAnchor :: Text -> Text +genAnchor id' = if Text.null id' then "" - else pack $ "{{id name=\"" ++ id' ++ "\" /}}" + else "{{id name=\"" <> id' <> "\" /}}" blockListToXWiki :: PandocMonad m => [Block] -> XWikiReader m Text blockListToXWiki blocks = @@ -93,7 +93,7 @@ blockToXWiki (LineBlock lns) = blockToXWiki $ linesToPara lns blockToXWiki b@(RawBlock f str) - | f == Format "xwiki" = return $ pack str + | f == Format "xwiki" = return str | otherwise = "" <$ report (BlockNotRendered b) blockToXWiki HorizontalRule = return "\n----\n" @@ -140,7 +140,7 @@ tableCellXWiki :: PandocMonad m => Bool -> [Block] -> XWikiReader m Text tableCellXWiki isHeader cell = do contents <- blockListToXWiki cell let isMultiline = (length . split (== '\n')) contents > 1 - let contents' = intercalate contents $ if isMultiline then [pack "(((", pack ")))"] else [mempty, mempty] + let contents' = intercalate contents $ if isMultiline then ["(((", ")))"] else [mempty, mempty] let cellBorder = if isHeader then "|=" else "|" return $ cellBorder <> contents' @@ -151,7 +151,7 @@ inlineListToXWiki lst = inlineToXWiki :: PandocMonad m => Inline -> XWikiReader m Text -inlineToXWiki (Str str) = return $ escapeXWikiString $ pack str +inlineToXWiki (Str str) = return $ escapeXWikiString str inlineToXWiki Space = return " " @@ -193,39 +193,37 @@ inlineToXWiki (Quoted DoubleQuote lst) = do contents <- inlineListToXWiki lst return $ "“" <> contents <> "”" -inlineToXWiki (Code (_,classes,_) contents') = do +inlineToXWiki (Code (_,classes,_) contents) = do let at = Set.fromList classes `Set.intersection` highlightingLangs - let contents = pack contents' return $ case Set.toList at of [] -> "{{code}}" <> contents <> "{{/code}}" - (l:_) -> "{{code language=\"" <> (pack l) <> "\"}}" <> contents <> "{{/code}}" + (l:_) -> "{{code language=\"" <> l <> "\"}}" <> contents <> "{{/code}}" inlineToXWiki (Cite _ lst) = inlineListToXWiki lst -- FIXME: optionally support this (plugin?) -inlineToXWiki (Math _ str) = return $ "{{formula}}" <> (pack str) <> "{{/formula}}" +inlineToXWiki (Math _ str) = return $ "{{formula}}" <> str <> "{{/formula}}" inlineToXWiki il@(RawInline frmt str) - | frmt == Format "xwiki" = return $ pack str + | frmt == Format "xwiki" = return str | otherwise = "" <$ report (InlineNotRendered il) -- TODO: Handle anchors inlineToXWiki (Link (id', _, _) txt (src, _)) = do label <- inlineListToXWiki txt case txt of - [Str s] | isURI src && escapeURI s == src -> return $ (pack src) <> (genAnchor id') - _ -> return $ "[[" <> label <> ">>" <> (pack src) <> "]]" <> (genAnchor id') + [Str s] | isURI src && escapeURI s == src -> return $ src <> (genAnchor id') + _ -> return $ "[[" <> label <> ">>" <> src <> "]]" <> (genAnchor id') inlineToXWiki (Image _ alt (source, tit)) = do alt' <- inlineListToXWiki alt let - titText = pack tit params = intercalate " " $ filter (not . Text.null) [ if Text.null alt' then "" else "alt=\"" <> alt' <> "\"", - if Text.null titText then "" else "title=\"" <> titText <> "\"" + if Text.null tit then "" else "title=\"" <> tit <> "\"" ] - return $ "[[image:" <> (pack source) <> (if Text.null params then "" else "||" <> params) <> "]]" + return $ "[[image:" <> source <> (if Text.null params then "" else "||" <> params) <> "]]" inlineToXWiki (Note contents) = do contents' <- blockListToXWiki contents diff --git a/src/Text/Pandoc/Writers/ZimWiki.hs b/src/Text/Pandoc/Writers/ZimWiki.hs index e1bc40351..7f7821fe2 100644 --- a/src/Text/Pandoc/Writers/ZimWiki.hs +++ b/src/Text/Pandoc/Writers/ZimWiki.hs @@ -1,4 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.ZimWiki Copyright : Copyright (C) 2008-2019 John MacFarlane, 2017-2019 Alex Ivkin @@ -18,11 +20,12 @@ import Prelude import Control.Monad (zipWithM) import Control.Monad.State.Strict (StateT, evalStateT, gets, modify) import Data.Default (Default (..)) -import Data.List (intercalate, isInfixOf, isPrefixOf, transpose) +import Data.List (transpose) import qualified Data.Map as Map import Text.DocLayout (render, literal) import Data.Maybe (fromMaybe) -import Data.Text (Text, breakOnAll, pack) +import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.ImageSize @@ -30,13 +33,12 @@ import Text.Pandoc.Logging import Text.Pandoc.Options (WrapOption (..), WriterOptions (writerTableOfContents, writerTemplate, writerWrapText)) -import Text.Pandoc.Shared (escapeURI, isURI, linesToPara, removeFormatting, - substitute, trimr) +import Text.Pandoc.Shared (escapeURI, isURI, linesToPara, removeFormatting, trimr) import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Writers.Shared (defField, metaToContext) data WriterState = WriterState { - stIndent :: String, -- Indent after the marker at the beginning of list items + stIndent :: Text, -- Indent after the marker at the beginning of list items stInTable :: Bool, -- Inside a table stInLink :: Bool -- Inside a link description } @@ -54,10 +56,10 @@ writeZimWiki opts document = evalStateT (pandocToZimWiki opts document) def pandocToZimWiki :: PandocMonad m => WriterOptions -> Pandoc -> ZW m Text pandocToZimWiki opts (Pandoc meta blocks) = do metadata <- metaToContext opts - (fmap (literal . pack . trimr) . blockListToZimWiki opts) - (fmap (literal . pack . trimr) . inlineListToZimWiki opts) + (fmap (literal . trimr) . blockListToZimWiki opts) + (fmap (literal . trimr) . inlineListToZimWiki opts) meta - main <- pack <$> blockListToZimWiki opts blocks + main <- blockListToZimWiki opts blocks --let header = "Content-Type: text/x-zim-wiki\nWiki-Format: zim 0.4\n" let context = defField "body" main $ defField "toc" (writerTableOfContents opts) metadata @@ -67,39 +69,39 @@ pandocToZimWiki opts (Pandoc meta blocks) = do Nothing -> main -- | Escape special characters for ZimWiki. -escapeString :: String -> String -escapeString = substitute "__" "''__''" . - substitute "**" "''**''" . - substitute "~~" "''~~''" . - substitute "//" "''//''" +escapeText :: Text -> Text +escapeText = T.replace "__" "''__''" . + T.replace "**" "''**''" . + T.replace "~~" "''~~''" . + T.replace "//" "''//''" -- | Convert Pandoc block element to ZimWiki. -blockToZimWiki :: PandocMonad m => WriterOptions -> Block -> ZW m String +blockToZimWiki :: PandocMonad m => WriterOptions -> Block -> ZW m Text blockToZimWiki _ Null = return "" blockToZimWiki opts (Div _attrs bs) = do contents <- blockListToZimWiki opts bs - return $ contents ++ "\n" + return $ contents <> "\n" blockToZimWiki opts (Plain inlines) = inlineListToZimWiki opts inlines -- title beginning with fig: indicates that the image is a figure -- ZimWiki doesn't support captions - so combine together alt and caption into alt -blockToZimWiki opts (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do +blockToZimWiki opts (Para [Image attr txt (src,T.stripPrefix "fig:" -> Just tit)]) = do capt <- if null txt then return "" - else (" " ++) `fmap` inlineListToZimWiki opts txt + else (" " <>) `fmap` inlineListToZimWiki opts txt let opt = if null txt then "" - else "|" ++ if null tit then capt else tit ++ capt - return $ "{{" ++ src ++ imageDims opts attr ++ opt ++ "}}\n" + else "|" <> if T.null tit then capt else tit <> capt + return $ "{{" <> src <> imageDims opts attr <> opt <> "}}\n" blockToZimWiki opts (Para inlines) = do indent <- gets stIndent -- useTags <- gets stUseTags contents <- inlineListToZimWiki opts inlines - return $ contents ++ if null indent then "\n" else "" + return $ contents <> if T.null indent then "\n" else "" blockToZimWiki opts (LineBlock lns) = blockToZimWiki opts $ linesToPara lns @@ -115,63 +117,63 @@ blockToZimWiki _ HorizontalRule = return "\n----\n" blockToZimWiki opts (Header level _ inlines) = do contents <- inlineListToZimWiki opts $ removeFormatting inlines -- emphasis, links etc. not allowed in headers - let eqs = replicate ( 7 - level ) '=' - return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n" + let eqs = T.replicate ( 7 - level ) "=" + return $ eqs <> " " <> contents <> " " <> eqs <> "\n" blockToZimWiki _ (CodeBlock (_,classes,_) str) = do -- Remap languages into the gtksourceview2 convention that ZimWiki source code plugin is using let langal = [("javascript", "js"), ("bash", "sh"), ("winbatch", "dosbatch")] let langmap = Map.fromList langal return $ case classes of - [] -> "'''\n" ++ cleanupCode str ++ "\n'''\n" -- turn no lang block into a quote block - (x:_) -> "{{{code: lang=\"" ++ - fromMaybe x (Map.lookup x langmap) ++ "\" linenumbers=\"True\"\n" ++ str ++ "\n}}}\n" -- for zim's code plugin, go verbatim on the lang spec + [] -> "'''\n" <> cleanupCode str <> "\n'''\n" -- turn no lang block into a quote block + (x:_) -> "{{{code: lang=\"" <> + fromMaybe x (Map.lookup x langmap) <> "\" linenumbers=\"True\"\n" <> str <> "\n}}}\n" -- for zim's code plugin, go verbatim on the lang spec blockToZimWiki opts (BlockQuote blocks) = do contents <- blockListToZimWiki opts blocks - return $ unlines $ map ("> " ++) $ lines contents + return $ T.unlines $ map ("> " <>) $ T.lines contents blockToZimWiki opts (Table capt aligns _ headers rows) = do captionDoc <- if null capt then return "" else do c <- inlineListToZimWiki opts capt - return $ "" ++ c ++ "\n" + return $ "" <> c <> "\n" headers' <- if all null headers then zipWithM (tableItemToZimWiki opts) aligns (head rows) else mapM (inlineListToZimWiki opts . removeFormatting)headers -- emphasis, links etc. are not allowed in table headers rows' <- mapM (zipWithM (tableItemToZimWiki opts) aligns) rows - let widths = map (maximum . map length) $ transpose (headers':rows') + let widths = map (maximum . map T.length) $ transpose (headers':rows') let padTo (width, al) s = - case width - length s of + case width - T.length s of x | x > 0 -> if al == AlignLeft || al == AlignDefault - then s ++ replicate x ' ' + then s <> T.replicate x " " else if al == AlignRight - then replicate x ' ' ++ s - else replicate (x `div` 2) ' ' ++ - s ++ replicate (x - x `div` 2) ' ' + then T.replicate x " " <> s + else T.replicate (x `div` 2) " " <> + s <> T.replicate (x - x `div` 2) " " | otherwise -> s let borderCell (width, al) _ - | al == AlignLeft = ":"++ replicate (width-1) '-' - | al == AlignDefault = replicate width '-' - | al == AlignRight = replicate (width-1) '-' ++ ":" - | otherwise = ":" ++ replicate (width-2) '-' ++ ":" - let underheader = "|" ++ intercalate "|" (zipWith borderCell (zip widths aligns) headers') ++ "|" - let renderRow cells = "|" ++ intercalate "|" (zipWith padTo (zip widths aligns) cells) ++ "|" - return $ captionDoc ++ - (if null headers' then "" else renderRow headers' ++ "\n") ++ underheader ++ "\n" ++ - unlines (map renderRow rows') + | al == AlignLeft = ":"<> T.replicate (width-1) "-" + | al == AlignDefault = T.replicate width "-" + | al == AlignRight = T.replicate (width-1) "-" <> ":" + | otherwise = ":" <> T.replicate (width-2) "-" <> ":" + let underheader = "|" <> T.intercalate "|" (zipWith borderCell (zip widths aligns) headers') <> "|" + let renderRow cells = "|" <> T.intercalate "|" (zipWith padTo (zip widths aligns) cells) <> "|" + return $ captionDoc <> + (if null headers' then "" else renderRow headers' <> "\n") <> underheader <> "\n" <> + T.unlines (map renderRow rows') blockToZimWiki opts (BulletList items) = do contents <- mapM (listItemToZimWiki opts) items indent <- gets stIndent - return $ vcat contents ++ if null indent then "\n" else "" + return $ vcat contents <> if T.null indent then "\n" else "" blockToZimWiki opts (OrderedList _ items) = do contents <- zipWithM (orderedListItemToZimWiki opts) [1..] items indent <- gets stIndent - return $ vcat contents ++ if null indent then "\n" else "" + return $ vcat contents <> if T.null indent then "\n" else "" blockToZimWiki opts (DefinitionList items) = do contents <- mapM (definitionListItemToZimWiki opts) items @@ -180,71 +182,71 @@ blockToZimWiki opts (DefinitionList items) = do definitionListItemToZimWiki :: PandocMonad m => WriterOptions -> ([Inline],[[Block]]) - -> ZW m String + -> ZW m Text definitionListItemToZimWiki opts (label, items) = do labelText <- inlineListToZimWiki opts label contents <- mapM (blockListToZimWiki opts) items indent <- gets stIndent - return $ indent ++ "* **" ++ labelText ++ "** " ++ concat contents + return $ indent <> "* **" <> labelText <> "** " <> T.concat contents -- Auxiliary functions for lists: -indentFromHTML :: PandocMonad m => WriterOptions -> String -> ZW m String +indentFromHTML :: PandocMonad m => WriterOptions -> Text -> ZW m Text indentFromHTML _ str = do indent <- gets stIndent - if "<li>" `isInfixOf` str + if "<li>" `T.isInfixOf` str then return indent - else if "</li>" `isInfixOf` str + else if "</li>" `T.isInfixOf` str then return "\n" - else if "<li value=" `isInfixOf` str + else if "<li value=" `T.isInfixOf` str then return "" - else if "<ol>" `isInfixOf` str + else if "<ol>" `T.isInfixOf` str then do let olcount=countSubStrs "<ol>" str - modify $ \s -> s { stIndent = stIndent s ++ - replicate olcount '\t' } + modify $ \s -> s { stIndent = stIndent s <> + T.replicate olcount "\t" } return "" - else if "</ol>" `isInfixOf` str + else if "</ol>" `T.isInfixOf` str then do let olcount=countSubStrs "/<ol>" str - modify $ \s -> s{ stIndent = drop olcount (stIndent s) } + modify $ \s -> s{ stIndent = T.drop olcount (stIndent s) } return "" else return "" -countSubStrs :: String -> String -> Int -countSubStrs sub str = length $ breakOnAll (pack sub) (pack str) +countSubStrs :: Text -> Text -> Int +countSubStrs sub str = length $ T.breakOnAll sub str -cleanupCode :: String -> String -cleanupCode = substitute "<nowiki>" "" . substitute "</nowiki>" "" +cleanupCode :: Text -> Text +cleanupCode = T.replace "<nowiki>" "" . T.replace "</nowiki>" "" -vcat :: [String] -> String -vcat = intercalate "\n" +vcat :: [Text] -> Text +vcat = T.intercalate "\n" -- | Convert bullet list item (list of blocks) to ZimWiki. -listItemToZimWiki :: PandocMonad m => WriterOptions -> [Block] -> ZW m String +listItemToZimWiki :: PandocMonad m => WriterOptions -> [Block] -> ZW m Text listItemToZimWiki opts items = do indent <- gets stIndent - modify $ \s -> s { stIndent = indent ++ "\t" } + modify $ \s -> s { stIndent = indent <> "\t" } contents <- blockListToZimWiki opts items modify $ \s -> s{ stIndent = indent } - return $ indent ++ "* " ++ contents + return $ indent <> "* " <> contents -- | Convert ordered list item (list of blocks) to ZimWiki. orderedListItemToZimWiki :: PandocMonad m - => WriterOptions -> Int -> [Block] -> ZW m String + => WriterOptions -> Int -> [Block] -> ZW m Text orderedListItemToZimWiki opts itemnum items = do indent <- gets stIndent - modify $ \s -> s { stIndent = indent ++ "\t" } + modify $ \s -> s { stIndent = indent <> "\t" } contents <- blockListToZimWiki opts items modify $ \s -> s{ stIndent = indent } - return $ indent ++ show itemnum ++ ". " ++ contents + return $ indent <> T.pack (show itemnum) <> ". " <> contents -- Auxiliary functions for tables: tableItemToZimWiki :: PandocMonad m - => WriterOptions -> Alignment -> [Block] -> ZW m String + => WriterOptions -> Alignment -> [Block] -> ZW m Text tableItemToZimWiki opts align' item = do let mkcell x = (if align' == AlignRight || align' == AlignCenter then " " - else "") ++ x ++ + else "") <> x <> (if align' == AlignLeft || align' == AlignCenter then " " else "") @@ -255,45 +257,45 @@ tableItemToZimWiki opts align' item = do -- | Convert list of Pandoc block elements to ZimWiki. blockListToZimWiki :: PandocMonad m - => WriterOptions -> [Block] -> ZW m String + => WriterOptions -> [Block] -> ZW m Text blockListToZimWiki opts blocks = vcat <$> mapM (blockToZimWiki opts) blocks -- | Convert list of Pandoc inline elements to ZimWiki. inlineListToZimWiki :: PandocMonad m - => WriterOptions -> [Inline] -> ZW m String -inlineListToZimWiki opts lst = concat <$> mapM (inlineToZimWiki opts) lst + => WriterOptions -> [Inline] -> ZW m Text +inlineListToZimWiki opts lst = T.concat <$> mapM (inlineToZimWiki opts) lst -- | Convert Pandoc inline element to ZimWiki. inlineToZimWiki :: PandocMonad m - => WriterOptions -> Inline -> ZW m String + => WriterOptions -> Inline -> ZW m Text inlineToZimWiki opts (Emph lst) = do contents <- inlineListToZimWiki opts lst - return $ "//" ++ contents ++ "//" + return $ "//" <> contents <> "//" inlineToZimWiki opts (Strong lst) = do contents <- inlineListToZimWiki opts lst - return $ "**" ++ contents ++ "**" + return $ "**" <> contents <> "**" inlineToZimWiki opts (Strikeout lst) = do contents <- inlineListToZimWiki opts lst - return $ "~~" ++ contents ++ "~~" + return $ "~~" <> contents <> "~~" inlineToZimWiki opts (Superscript lst) = do contents <- inlineListToZimWiki opts lst - return $ "^{" ++ contents ++ "}" + return $ "^{" <> contents <> "}" inlineToZimWiki opts (Subscript lst) = do contents <- inlineListToZimWiki opts lst - return $ "_{" ++ contents ++ "}" + return $ "_{" <> contents <> "}" inlineToZimWiki opts (Quoted SingleQuote lst) = do contents <- inlineListToZimWiki opts lst - return $ "\8216" ++ contents ++ "\8217" + return $ "\8216" <> contents <> "\8217" inlineToZimWiki opts (Quoted DoubleQuote lst) = do contents <- inlineListToZimWiki opts lst - return $ "\8220" ++ contents ++ "\8221" + return $ "\8220" <> contents <> "\8221" inlineToZimWiki opts (Span _attrs ils) = inlineListToZimWiki opts ils @@ -301,24 +303,24 @@ inlineToZimWiki opts (SmallCaps lst) = inlineListToZimWiki opts lst inlineToZimWiki opts (Cite _ lst) = inlineListToZimWiki opts lst -inlineToZimWiki _ (Code _ str) = return $ "''" ++ str ++ "''" +inlineToZimWiki _ (Code _ str) = return $ "''" <> str <> "''" inlineToZimWiki _ (Str str) = do inTable <- gets stInTable inLink <- gets stInLink if inTable - then return $ substitute "|" "\\|" . escapeString $ str + then return $ T.replace "|" "\\|" . escapeText $ str else if inLink then return str - else return $ escapeString str + else return $ escapeText str -inlineToZimWiki _ (Math mathType str) = return $ delim ++ str ++ delim -- note: str should NOT be escaped +inlineToZimWiki _ (Math mathType str) = return $ delim <> str <> delim -- note: str should NOT be escaped where delim = case mathType of DisplayMath -> "$$" InlineMath -> "$" --- | f == Format "html" = return $ "<html>" ++ str ++ "</html>" +-- | f == Format "html" = return $ "<html>" <> str <> "</html>" inlineToZimWiki opts il@(RawInline f str) | f == Format "zimwiki" = return str | f == Format "html" = indentFromHTML opts str @@ -347,38 +349,39 @@ inlineToZimWiki opts (Link _ txt (src, _)) = do modify $ \s -> s { stInLink = False } let label'= if inTable then "" -- no label is allowed in a table - else "|"++label + else "|"<>label case txt of - [Str s] | "mailto:" `isPrefixOf` src -> return $ "<" ++ s ++ ">" + [Str s] | "mailto:" `T.isPrefixOf` src -> return $ "<" <> s <> ">" | escapeURI s == src -> return src _ -> if isURI src - then return $ "[[" ++ src ++ label' ++ "]]" - else return $ "[[" ++ src' ++ label' ++ "]]" - where src' = case src of - '/':xs -> xs -- with leading / it's a - _ -> src -- link to a help page + then return $ "[[" <> src <> label' <> "]]" + else return $ "[[" <> src' <> label' <> "]]" + where + -- with leading / it's a link to a help page + src' = fromMaybe src $ T.stripPrefix "/" src + inlineToZimWiki opts (Image attr alt (source, tit)) = do alt' <- inlineListToZimWiki opts alt inTable <- gets stInTable let txt = case (tit, alt, inTable) of ("",[], _) -> "" - ("", _, False ) -> "|" ++ alt' - (_ , _, False ) -> "|" ++ tit + ("", _, False ) -> "|" <> alt' + (_ , _, False ) -> "|" <> tit (_ , _, True ) -> "" - return $ "{{" ++ source ++ imageDims opts attr ++ txt ++ "}}" + return $ "{{" <> source <> imageDims opts attr <> txt <> "}}" inlineToZimWiki opts (Note contents) = do -- no concept of notes in zim wiki, use a text block contents' <- blockListToZimWiki opts contents - return $ " **{Note:** " ++ trimr contents' ++ "**}**" + return $ " **{Note:** " <> trimr contents' <> "**}**" -imageDims :: WriterOptions -> Attr -> String +imageDims :: WriterOptions -> Attr -> Text imageDims opts attr = go (toPx $ dimension Width attr) (toPx $ dimension Height attr) where toPx = fmap (showInPixel opts) . checkPct checkPct (Just (Percent _)) = Nothing checkPct maybeDim = maybeDim - go (Just w) Nothing = "?" ++ w - go (Just w) (Just h) = "?" ++ w ++ "x" ++ h - go Nothing (Just h) = "?0x" ++ h + go (Just w) Nothing = "?" <> w + go (Just w) (Just h) = "?" <> w <> "x" <> h + go Nothing (Just h) = "?0x" <> h go Nothing Nothing = "" diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs index f0cdf8302..21f6d4d46 100644 --- a/src/Text/Pandoc/XML.hs +++ b/src/Text/Pandoc/XML.hs @@ -1,4 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.XML Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -31,17 +33,17 @@ import qualified Data.Map as M import Data.String -- | Escape one character as needed for XML. -escapeCharForXML :: Char -> String +escapeCharForXML :: Char -> Text escapeCharForXML x = case x of '&' -> "&" '<' -> "<" '>' -> ">" '"' -> """ - c -> [c] + c -> T.singleton c -- | Escape string as needed for XML. Entity references are not preserved. -escapeStringForXML :: String -> String -escapeStringForXML = concatMap escapeCharForXML . filter isLegalXMLChar +escapeStringForXML :: Text -> Text +escapeStringForXML = T.concatMap escapeCharForXML . T.filter isLegalXMLChar where isLegalXMLChar c = c == '\t' || c == '\n' || c == '\r' || (c >= '\x20' && c <= '\xD7FF') || (c >= '\xE000' && c <= '\xFFFD') || @@ -49,44 +51,43 @@ escapeStringForXML = concatMap escapeCharForXML . filter isLegalXMLChar -- see https://www.w3.org/TR/xml/#charsets -- | Escape newline characters as -escapeNls :: String -> String -escapeNls (x:xs) - | x == '\n' = " " ++ escapeNls xs - | otherwise = x : escapeNls xs -escapeNls [] = [] +escapeNls :: Text -> Text +escapeNls = T.concatMap $ \x -> case x of + '\n' -> " " + c -> T.singleton c -- | Return a text object with a string of formatted XML attributes. -attributeList :: (HasChars a, IsString a) => [(String, String)] -> Doc a +attributeList :: (HasChars a, IsString a) => [(Text, Text)] -> Doc a attributeList = hcat . map - (\(a, b) -> text (' ' : escapeStringForXML a ++ "=\"" ++ - escapeNls (escapeStringForXML b) ++ "\"")) + (\(a, b) -> text (T.unpack $ " " <> escapeStringForXML a <> "=\"" <> + escapeNls (escapeStringForXML b) <> "\"")) -- | Put the supplied contents between start and end tags of tagType, -- with specified attributes and (if specified) indentation. -inTags:: (HasChars a, IsString a) - => Bool -> String -> [(String, String)] -> Doc a -> Doc a +inTags :: (HasChars a, IsString a) + => Bool -> Text -> [(Text, Text)] -> Doc a -> Doc a inTags isIndented tagType attribs contents = - let openTag = char '<' <> text tagType <> attributeList attribs <> + let openTag = char '<' <> text (T.unpack tagType) <> attributeList attribs <> char '>' - closeTag = text "</" <> text tagType <> char '>' + closeTag = text "</" <> text (T.unpack tagType) <> char '>' in if isIndented then openTag $$ nest 2 contents $$ closeTag else openTag <> contents <> closeTag -- | Return a self-closing tag of tagType with specified attributes selfClosingTag :: (HasChars a, IsString a) - => String -> [(String, String)] -> Doc a + => Text -> [(Text, Text)] -> Doc a selfClosingTag tagType attribs = - char '<' <> text tagType <> attributeList attribs <> text " />" + char '<' <> text (T.unpack tagType) <> attributeList attribs <> text " />" -- | Put the supplied contents between start and end tags of tagType. inTagsSimple :: (HasChars a, IsString a) - => String -> Doc a -> Doc a + => Text -> Doc a -> Doc a inTagsSimple tagType = inTags False tagType [] -- | Put the supplied contents in indented block btw start and end tags. inTagsIndented :: (HasChars a, IsString a) - => String -> Doc a -> Doc a + => Text -> Doc a -> Doc a inTagsIndented tagType = inTags True tagType [] -- | Escape all non-ascii characters using numerical entities. @@ -118,18 +119,21 @@ html5EntityMap = foldr go mempty htmlEntities -- Unescapes XML entities -fromEntities :: String -> String -fromEntities ('&':xs) = - case lookupEntity ent' of - Just c -> c ++ fromEntities rest - Nothing -> '&' : fromEntities xs - where (ent, rest) = case break (\c -> isSpace c || c == ';') xs of - (zs,';':ys) -> (zs,ys) - (zs, ys) -> (zs,ys) - ent' = case ent of - '#':'X':ys -> '#':'x':ys -- workaround tagsoup bug - '#':_ -> ent - _ -> ent ++ ";" - -fromEntities (x:xs) = x : fromEntities xs -fromEntities [] = [] +fromEntities :: Text -> Text +fromEntities = T.pack . fromEntities' + +fromEntities' :: Text -> String +fromEntities' (T.uncons -> Just ('&', xs)) = + case lookupEntity $ T.unpack ent' of + Just c -> c <> fromEntities' rest + Nothing -> "&" <> fromEntities' xs + where (ent, rest) = case T.break (\c -> isSpace c || c == ';') xs of + (zs,T.uncons -> Just (';',ys)) -> (zs,ys) + (zs, ys) -> (zs,ys) + ent' + | Just ys <- T.stripPrefix "#X" ent = "#x" <> ys -- workaround tagsoup bug + | Just ('#', _) <- T.uncons ent = ent + | otherwise = ent <> ";" +fromEntities' t = case T.uncons t of + Just (x, xs) -> x : fromEntities' xs + Nothing -> "" diff --git a/stack.yaml b/stack.yaml index 0989fbed1..4a751175b 100644 --- a/stack.yaml +++ b/stack.yaml @@ -12,9 +12,8 @@ flags: packages: - '.' extra-deps: -- pandoc-citeproc-0.16.3.1 -#- pandoc-types-1.17.6.1 -- texmath-0.11.3 +- pandoc-types-1.20 +- texmath-0.12 - haddock-library-1.8.0 - skylighting-0.8.2.3 - skylighting-core-0.8.2.3 @@ -23,8 +22,9 @@ extra-deps: - HsYAML-0.2.0.0 - HsYAML-aeson-0.2.0.0 - doctemplates-0.7 -- git: https://github.com/jgm/pandoc-types - commit: 00f7bb79e79d7cfd3523880dbc64ba3ea46c3da2 +# - pandoc-citeproc-0.16.3.1 +- git: https://github.com/jgm/pandoc-citeproc + commit: dc09b028d6876df81cd76b731e58886f77f269b1 ghc-options: "$locals": -fhide-source-paths -Wno-missing-home-modules resolver: lts-14.6 diff --git a/test/Tests/Command.hs b/test/Tests/Command.hs index 7c47870aa..d76cca71a 100644 --- a/test/Tests/Command.hs +++ b/test/Tests/Command.hs @@ -16,6 +16,7 @@ where import Prelude import Data.Algorithm.Diff import qualified Data.ByteString as BS +import qualified Data.Text as T import Data.List (isSuffixOf) import Prelude hiding (readFile) import System.Directory @@ -77,7 +78,7 @@ isCodeBlock (CodeBlock _ _) = True isCodeBlock _ = False extractCode :: Block -> String -extractCode (CodeBlock _ code) = code +extractCode (CodeBlock _ code) = T.unpack code extractCode _ = "" dropPercent :: String -> String diff --git a/test/Tests/Helpers.hs b/test/Tests/Helpers.hs index 5ad867065..85bd518b3 100644 --- a/test/Tests/Helpers.hs +++ b/test/Tests/Helpers.hs @@ -142,7 +142,7 @@ instance ToString Blocks where toString = unpack . purely (writeNative def) . toPandoc instance ToString Inlines where - toString = trimr . unpack . purely (writeNative def) . toPandoc + toString = unpack . trimr . purely (writeNative def) . toPandoc instance ToString String where toString = id diff --git a/test/Tests/Lua.hs b/test/Tests/Lua.hs index 49d54c9c8..7683df09f 100644 --- a/test/Tests/Lua.hs +++ b/test/Tests/Lua.hs @@ -33,7 +33,8 @@ import Text.Pandoc.Options (def) import Text.Pandoc.Shared (pandocVersion) import qualified Foreign.Lua as Lua -import qualified Data.ByteString.Char8 as BS +import qualified Data.Text as T +import qualified Data.Text.Encoding as TE tests :: [TestTree] tests = map (localOption (QuickCheckTests 20)) @@ -132,12 +133,12 @@ tests = map (localOption (QuickCheckTests 20)) assertFilterConversion "unexpected script name" "script-name.lua" (doc $ para "ignored") - (doc $ para (str $ "lua" </> "script-name.lua")) + (doc $ para (str $ T.pack $ "lua" </> "script-name.lua")) , testCase "Pandoc version is set" . runLuaTest $ do Lua.getglobal "PANDOC_VERSION" Lua.liftIO . - assertEqual "pandoc version is wrong" (BS.pack pandocVersion) + assertEqual "pandoc version is wrong" (TE.encodeUtf8 pandocVersion) =<< Lua.tostring' Lua.stackTop , testCase "Pandoc types version is set" . runLuaTest $ do diff --git a/test/Tests/Readers/Docx.hs b/test/Tests/Readers/Docx.hs index e107ff9ee..bc036e0cc 100644 --- a/test/Tests/Readers/Docx.hs +++ b/test/Tests/Readers/Docx.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Docx Copyright : © 2017-2019 Jesse Rosenthal, John MacFarlane @@ -79,7 +80,7 @@ testForWarningsWithOptsIO opts name docxFile expected = do df <- B.readFile docxFile logs <- runIOorExplode $ setVerbosity ERROR >> readDocx opts df >> P.getLog let warns = [m | DocxParserWarning m <- logs] - return $ test id name (unlines warns, unlines expected) + return $ test id name (T.unlines warns, unlines expected) testForWarningsWithOpts :: ReaderOptions -> String -> FilePath -> [String] -> TestTree testForWarningsWithOpts opts name docxFile expected = diff --git a/test/Tests/Readers/EPUB.hs b/test/Tests/Readers/EPUB.hs index f917668ef..3aca6c88c 100644 --- a/test/Tests/Readers/EPUB.hs +++ b/test/Tests/Readers/EPUB.hs @@ -14,6 +14,7 @@ module Tests.Readers.EPUB (tests) where import Prelude import qualified Data.ByteString.Lazy as BL +import qualified Data.Text as T import Test.Tasty import Test.Tasty.HUnit import qualified Text.Pandoc.Class as P @@ -35,7 +36,9 @@ testMediaBag fp bag = do ++ show bag ++ "\nActual: " ++ show actBag) - (actBag == bag) + (actBag == packBag bag) + where + packBag = map $ \(x, y, z) -> (x, T.pack y, z) featuresBag :: [(String, String, Int)] featuresBag = [("img/check.gif","image/gif",1340) diff --git a/test/Tests/Readers/LaTeX.hs b/test/Tests/Readers/LaTeX.hs index d12eb22c9..8842bfee5 100644 --- a/test/Tests/Readers/LaTeX.hs +++ b/test/Tests/Readers/LaTeX.hs @@ -169,10 +169,10 @@ tests = [ testGroup "tokenization" testGroup "Character Escapes" [ "Two-character escapes" =: mconcat ["^^" <> T.pack [i,j] | i <- hex, j <- hex] =?> - para (str ['\0'..'\255']) + para (str $ T.pack ['\0'..'\255']) , "One-character escapes" =: mconcat ["^^" <> T.pack [i] | i <- hex] =?> - para (str $ ['p'..'y']++['!'..'&']) + para (str $ T.pack $ ['p'..'y']++['!'..'&']) ] , testGroup "memoir scene breaks" [ "plainbreak" =: @@ -255,7 +255,7 @@ baseCitation = Citation{ citationId = "item1" } rt :: String -> Inlines -rt = rawInline "latex" +rt = rawInline "latex" . T.pack natbibCitations :: TestTree natbibCitations = testGroup "natbib" diff --git a/test/Tests/Readers/Markdown.hs b/test/Tests/Readers/Markdown.hs index 566a42485..17b5cf800 100644 --- a/test/Tests/Readers/Markdown.hs +++ b/test/Tests/Readers/Markdown.hs @@ -53,7 +53,8 @@ autolink :: String -> Inlines autolink = autolinkWith ("",["uri"],[]) autolinkWith :: Attr -> String -> Inlines -autolinkWith attr s = linkWith attr s "" (str s) +autolinkWith attr s = linkWith attr s' "" (str s') + where s' = T.pack s bareLinkTests :: [(Text, Inlines)] bareLinkTests = diff --git a/test/Tests/Readers/Odt.hs b/test/Tests/Readers/Odt.hs index 9dc93c92e..cecb9a353 100644 --- a/test/Tests/Readers/Odt.hs +++ b/test/Tests/Readers/Odt.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Odt Copyright : © 2015-2019 John MacFarlane diff --git a/test/Tests/Readers/Org/Block.hs b/test/Tests/Readers/Org/Block.hs index 35fd4c1fa..8cf9a0e56 100644 --- a/test/Tests/Readers/Org/Block.hs +++ b/test/Tests/Readers/Org/Block.hs @@ -179,7 +179,7 @@ tests = , "\\end{equation}" ] =?> rawBlock "latex" - (unlines [ "\\begin{equation}" + (T.unlines [ "\\begin{equation}" , "X_i = \\begin{cases}" , " G_{\\alpha(i)} & \\text{if }\\alpha(i-1) =" <> " \\alpha(i)\\\\" diff --git a/test/Tests/Readers/Org/Block/CodeBlock.hs b/test/Tests/Readers/Org/Block/CodeBlock.hs index 7f50a1c81..01c89642e 100644 --- a/test/Tests/Readers/Org/Block/CodeBlock.hs +++ b/test/Tests/Readers/Org/Block/CodeBlock.hs @@ -80,7 +80,7 @@ tests = params = [ ("org-language", "emacs-lisp") , ("exports", "both") ] - code' = unlines [ "(progn (message \"Hello, World!\")" + code' = T.unlines [ "(progn (message \"Hello, World!\")" , " (+ 23 42))" ] in codeBlockWith ("", classes, params) code' @@ -96,8 +96,8 @@ tests = params = [ ("org-language", "emacs-lisp") , ("exports", "both") ] - code' = unlines [ "(progn (message \"Hello, World!\")" - , " (+ 23 42))" ] + code' = T.unlines [ "(progn (message \"Hello, World!\")" + , " (+ 23 42))" ] results' = "65\n" in codeBlockWith ("", classes, params) code' <> @@ -115,8 +115,8 @@ tests = params = [ ("org-language", "emacs-lisp") , ("exports", "code") ] - code' = unlines [ "(progn (message \"Hello, World!\")" - , " (+ 23 42))" ] + code' = T.unlines [ "(progn (message \"Hello, World!\")" + , " (+ 23 42))" ] in codeBlockWith ("", classes, params) code' , "Source block with results and :exports results" =: @@ -190,9 +190,9 @@ tests = (plain $ spanWith ("", ["label"], []) (spcSep [ "Functor", "laws", "in", "Haskell" ])) (codeBlockWith ("functor-laws", ["haskell"], []) - (unlines [ "fmap id = id" - , "fmap (p . q) = (fmap p) . (fmap q)" - ]))) + (T.unlines [ "fmap id = id" + , "fmap (p . q) = (fmap p) . (fmap q)" + ]))) , "Non-letter chars in source block parameters" =: T.unlines [ "#+BEGIN_SRC C :tangle xxxx.c :city Zürich" diff --git a/test/Tests/Readers/Org/Shared.hs b/test/Tests/Readers/Org/Shared.hs index f26442621..aa253aa36 100644 --- a/test/Tests/Readers/Org/Shared.hs +++ b/test/Tests/Readers/Org/Shared.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Shared Copyright : © 2014-2019 Albert Krewinkel @@ -38,5 +39,5 @@ spcSep :: [Inlines] -> Inlines spcSep = mconcat . intersperse space -- | Create a span for the given tag. -tagSpan :: String -> Inlines +tagSpan :: Text -> Inlines tagSpan t = spanWith ("", ["tag"], [("tag-name", t)]) . smallcaps $ str t diff --git a/test/Tests/Shared.hs b/test/Tests/Shared.hs index 2a699623c..788dab257 100644 --- a/test/Tests/Shared.hs +++ b/test/Tests/Shared.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Shared Copyright : © 2006-2019 John MacFarlane diff --git a/test/Tests/Writers/AsciiDoc.hs b/test/Tests/Writers/AsciiDoc.hs index ea61ed044..75f6e5e97 100644 --- a/test/Tests/Writers/AsciiDoc.hs +++ b/test/Tests/Writers/AsciiDoc.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} module Tests.Writers.AsciiDoc (tests) where import Prelude @@ -12,29 +13,35 @@ import Text.Pandoc.Builder asciidoc :: (ToPandoc a) => a -> String asciidoc = unpack . purely (writeAsciiDoc def{ writerWrapText = WrapNone }) . toPandoc +testAsciidoc :: (ToString a, ToPandoc a) + => String + -> (a, String) + -> TestTree +testAsciidoc = test asciidoc + tests :: [TestTree] tests = [ testGroup "emphasis" - [ test asciidoc "emph word before" $ + [ testAsciidoc "emph word before" $ para (text "foo" <> emph (text "bar")) =?> "foo__bar__" - , test asciidoc "emph word after" $ + , testAsciidoc "emph word after" $ para (emph (text "foo") <> text "bar") =?> "__foo__bar" - , test asciidoc "emph quoted" $ + , testAsciidoc "emph quoted" $ para (doubleQuoted (emph (text "foo"))) =?> "``__foo__''" - , test asciidoc "strong word before" $ + , testAsciidoc "strong word before" $ para (text "foo" <> strong (text "bar")) =?> "foo**bar**" - , test asciidoc "strong word after" $ + , testAsciidoc "strong word after" $ para (strong (text "foo") <> text "bar") =?> "**foo**bar" - , test asciidoc "strong quoted" $ + , testAsciidoc "strong quoted" $ para (singleQuoted (strong (text "foo"))) =?> "`**foo**'" ] , testGroup "tables" - [ test asciidoc "empty cells" $ + [ testAsciidoc "empty cells" $ simpleTable [] [[mempty],[mempty]] =?> unlines [ "[cols=\"\",]" , "|===" diff --git a/test/Tests/Writers/ConTeXt.hs b/test/Tests/Writers/ConTeXt.hs index c11e409f8..082ff12fe 100644 --- a/test/Tests/Writers/ConTeXt.hs +++ b/test/Tests/Writers/ConTeXt.hs @@ -3,7 +3,7 @@ module Tests.Writers.ConTeXt (tests) where import Prelude -import Data.Text (unpack) +import Data.Text (unpack, pack) import Test.Tasty import Test.Tasty.QuickCheck import Tests.Helpers @@ -46,9 +46,9 @@ tests = [ testGroup "inline code" , "without '}'" =: code "]" =?> "\\type{]}" , testProperty "code property" $ \s -> null s || '\n' `elem` s || if '{' `elem` s || '}' `elem` s - then context' (code s) == "\\mono{" ++ - context' (str s) ++ "}" - else context' (code s) == "\\type{" ++ s ++ "}" + then context' (code $ pack s) == "\\mono{" ++ + context' (str $ pack s) ++ "}" + else context' (code $ pack s) == "\\type{" ++ s ++ "}" ] , testGroup "headers" [ "level 1" =: |